Home | History | Annotate | Download | only in common_lib
      1 """
      2 Module with abstraction layers to revision control systems.
      3 
      4 With this library, autotest developers can handle source code checkouts and
      5 updates on both client as well as server code.
      6 """
      7 
      8 import os, warnings, logging
      9 import error, utils
     10 from autotest_lib.client.bin import os_dep
     11 
     12 
     13 class RevisionControlError(Exception):
     14     """Local exception to be raised by code in this file."""
     15 
     16 
     17 class GitError(RevisionControlError):
     18     """Exceptions raised for general git errors."""
     19 
     20 
     21 class GitCloneError(GitError):
     22     """Exceptions raised for git clone errors."""
     23 
     24 
     25 class GitFetchError(GitError):
     26     """Exception raised for git fetch errors."""
     27 
     28 
     29 class GitPullError(GitError):
     30     """Exception raised for git pull errors."""
     31 
     32 
     33 class GitResetError(GitError):
     34     """Exception raised for git reset errors."""
     35 
     36 
     37 class GitCommitError(GitError):
     38     """Exception raised for git commit errors."""
     39 
     40 
     41 class GitRepo(object):
     42     """
     43     This class represents a git repo.
     44 
     45     It is used to pull down a local copy of a git repo, check if the local
     46     repo is up-to-date, if not update.  It delegates the install to
     47     implementation classes.
     48     """
     49 
     50     def __init__(self, repodir, giturl=None, weburl=None, abs_work_tree=None):
     51         """
     52         Initialized reposotory.
     53 
     54         @param repodir: destination repo directory.
     55         @param giturl: master repo git url.
     56         @param weburl: a web url for the master repo.
     57         @param abs_work_tree: work tree of the git repo. In the
     58             absence of a work tree git manipulations will occur
     59             in the current working directory for non bare repos.
     60             In such repos the -git-dir option should point to
     61             the .git directory and -work-tree should point to
     62             the repos working tree.
     63         Note: a bare reposotory is one which contains all the
     64         working files (the tree) and the other wise hidden files
     65         (.git) in the same directory. This class assumes non-bare
     66         reposotories.
     67         """
     68         if repodir is None:
     69             raise ValueError('You must provide a path that will hold the'
     70                              'git repository')
     71         self.repodir = utils.sh_escape(repodir)
     72         self._giturl = giturl
     73         if weburl is not None:
     74             warnings.warn("Param weburl: You are no longer required to provide "
     75                           "a web URL for your git repos", DeprecationWarning)
     76 
     77         # path to .git dir
     78         self.gitpath = utils.sh_escape(os.path.join(self.repodir,'.git'))
     79 
     80         # Find git base command. If not found, this will throw an exception
     81         self.git_base_cmd = os_dep.command('git')
     82         self.work_tree = abs_work_tree
     83 
     84         # default to same remote path as local
     85         self._build = os.path.dirname(self.repodir)
     86 
     87 
     88     @property
     89     def giturl(self):
     90         """
     91         A giturl is necessary to perform certain actions (clone, pull, fetch)
     92         but not others (like diff).
     93         """
     94         if self._giturl is None:
     95             raise ValueError('Unsupported operation -- this object was not'
     96                              'constructed with a git URL.')
     97         return self._giturl
     98 
     99 
    100     def gen_git_cmd_base(self):
    101         """
    102         The command we use to run git cannot be set. It is reconstructed
    103         on each access from it's component variables. This is it's getter.
    104         """
    105         # base git command , pointing to gitpath git dir
    106         gitcmdbase = '%s --git-dir=%s' % (self.git_base_cmd,
    107                                           self.gitpath)
    108         if self.work_tree:
    109             gitcmdbase += ' --work-tree=%s' % self.work_tree
    110         return gitcmdbase
    111 
    112 
    113     def _run(self, command, timeout=None, ignore_status=False):
    114         """
    115         Auxiliary function to run a command, with proper shell escaping.
    116 
    117         @param timeout: Timeout to run the command.
    118         @param ignore_status: Whether we should supress error.CmdError
    119                 exceptions if the command did return exit code !=0 (True), or
    120                 not supress them (False).
    121         """
    122         return utils.run(r'%s' % (utils.sh_escape(command)),
    123                          timeout, ignore_status)
    124 
    125 
    126     def gitcmd(self, cmd, ignore_status=False, error_class=None,
    127                error_msg=None):
    128         """
    129         Wrapper for a git command.
    130 
    131         @param cmd: Git subcommand (ex 'clone').
    132         @param ignore_status: If True, ignore the CmdError raised by the
    133                 underlying command runner. NB: Passing in an error_class
    134                 impiles ignore_status=True.
    135         @param error_class: When ignore_status is False, optional error
    136                 error class to log and raise in case of errors. Must be a
    137                 (sub)type of GitError.
    138         @param error_msg: When passed with error_class, used as a friendly
    139                 error message.
    140         """
    141         # TODO(pprabhu) Get rid of the ignore_status argument.
    142         # Now that we support raising custom errors, we always want to get a
    143         # return code from the command execution, instead of an exception.
    144         ignore_status = ignore_status or error_class is not None
    145         cmd = '%s %s' % (self.gen_git_cmd_base(), cmd)
    146         rv = self._run(cmd, ignore_status=ignore_status)
    147         if rv.exit_status != 0 and error_class is not None:
    148             logging.error('git command failed: %s: %s',
    149                           cmd, error_msg if error_msg is not None else '')
    150             logging.error(rv.stderr)
    151             raise error_class(error_msg if error_msg is not None
    152                               else rv.stderr)
    153 
    154         return rv
    155 
    156 
    157     def clone(self):
    158         """
    159         Clones a repo using giturl and repodir.
    160 
    161         Since we're cloning the master repo we don't have a work tree yet,
    162         make sure the getter of the gitcmd doesn't think we do by setting
    163         work_tree to None.
    164 
    165         @raises GitCloneError: if cloning the master repo fails.
    166         """
    167         logging.info('Cloning git repo %s', self.giturl)
    168         cmd = 'clone %s %s ' % (self.giturl, self.repodir)
    169         abs_work_tree = self.work_tree
    170         self.work_tree = None
    171         try:
    172             rv = self.gitcmd(cmd, True)
    173             if rv.exit_status != 0:
    174                 logging.error(rv.stderr)
    175                 raise GitCloneError('Failed to clone git url', rv)
    176             else:
    177                 logging.info(rv.stdout)
    178         finally:
    179             self.work_tree = abs_work_tree
    180 
    181 
    182     def pull(self, rebase=False):
    183         """
    184         Pulls into repodir using giturl.
    185 
    186         @param rebase: If true forces git pull to perform a rebase instead of a
    187                         merge.
    188         @raises GitPullError: if pulling from giturl fails.
    189         """
    190         logging.info('Updating git repo %s', self.giturl)
    191         cmd = 'pull '
    192         if rebase:
    193             cmd += '--rebase '
    194         cmd += self.giturl
    195 
    196         rv = self.gitcmd(cmd, True)
    197         if rv.exit_status != 0:
    198             logging.error(rv.stderr)
    199             e_msg = 'Failed to pull git repo data'
    200             raise GitPullError(e_msg, rv)
    201 
    202 
    203     def commit(self, msg='default'):
    204         """
    205         Commit changes to repo with the supplied commit msg.
    206 
    207         @param msg: A message that goes with the commit.
    208         """
    209         rv = self.gitcmd('commit -a -m %s' % msg)
    210         if rv.exit_status != 0:
    211             logging.error(rv.stderr)
    212             raise revision_control.GitCommitError('Unable to commit', rv)
    213 
    214 
    215     def reset(self, branch_or_sha):
    216         """
    217         Reset repo to the given branch or git sha.
    218 
    219         @param branch_or_sha: Name of a local or remote branch or git sha.
    220 
    221         @raises GitResetError if operation fails.
    222         """
    223         self.gitcmd('reset --hard %s' % branch_or_sha,
    224                     error_class=GitResetError,
    225                     error_msg='Failed to reset to %s' % branch_or_sha)
    226 
    227 
    228     def reset_head(self):
    229         """
    230         Reset repo to HEAD@{0} by running git reset --hard HEAD.
    231 
    232         TODO(pprabhu): cleanup. Use reset.
    233 
    234         @raises GitResetError: if we fails to reset HEAD.
    235         """
    236         logging.info('Resetting head on repo %s', self.repodir)
    237         rv = self.gitcmd('reset --hard HEAD')
    238         if rv.exit_status != 0:
    239             logging.error(rv.stderr)
    240             e_msg = 'Failed to reset HEAD'
    241             raise GitResetError(e_msg, rv)
    242 
    243 
    244     def fetch_remote(self):
    245         """
    246         Fetches all files from the remote but doesn't reset head.
    247 
    248         @raises GitFetchError: if we fail to fetch all files from giturl.
    249         """
    250         logging.info('fetching from repo %s', self.giturl)
    251         rv = self.gitcmd('fetch --all')
    252         if rv.exit_status != 0:
    253             logging.error(rv.stderr)
    254             e_msg = 'Failed to fetch from %s' % self.giturl
    255             raise GitFetchError(e_msg, rv)
    256 
    257 
    258     def reinit_repo_at(self, remote_branch):
    259         """
    260         Does all it can to ensure that the repo is at remote_branch.
    261 
    262         This will try to be nice and detect any local changes and bail early.
    263         OTOH, if it finishes successfully, it'll blow away anything and
    264         everything so that local repo reflects the upstream branch requested.
    265         """
    266         if not self.is_repo_initialized():
    267             self.clone()
    268 
    269         # Play nice. Detect any local changes and bail.
    270         # Re-stat all files before comparing index. This is needed for
    271         # diff-index to work properly in cases when the stat info on files is
    272         # stale. (e.g., you just untarred the whole git folder that you got from
    273         # Alice)
    274         rv = self.gitcmd('update-index --refresh -q',
    275                          error_class=GitError,
    276                          error_msg='Failed to refresh index.')
    277         rv = self.gitcmd(
    278                 'diff-index --quiet HEAD --',
    279                 error_class=GitError,
    280                 error_msg='Failed to check for local changes.')
    281         if rv.stdout:
    282             loggin.error(rv.stdout)
    283             e_msg = 'Local checkout dirty. (%s)'
    284             raise GitError(e_msg % rv.stdout)
    285 
    286         # Play the bad cop. Destroy everything in your path.
    287         # Don't trust the existing repo setup at all (so don't trust the current
    288         # config, current branches / remotes etc).
    289         self.gitcmd('config remote.origin.url %s' % self.giturl,
    290                     error_class=GitError,
    291                     error_msg='Failed to set origin.')
    292         self.gitcmd('checkout -f',
    293                     error_class=GitError,
    294                     error_msg='Failed to checkout.')
    295         self.gitcmd('clean -qxdf',
    296                     error_class=GitError,
    297                     error_msg='Failed to clean.')
    298         self.fetch_remote()
    299         self.reset('origin/%s' % remote_branch)
    300 
    301 
    302     def get(self, **kwargs):
    303         """
    304         This method overrides baseclass get so we can do proper git
    305         clone/pulls, and check for updated versions.  The result of
    306         this method will leave an up-to-date version of git repo at
    307         'giturl' in 'repodir' directory to be used by build/install
    308         methods.
    309 
    310         @param kwargs: Dictionary of parameters to the method get.
    311         """
    312         if not self.is_repo_initialized():
    313             # this is your first time ...
    314             self.clone()
    315         elif self.is_out_of_date():
    316             # exiting repo, check if we're up-to-date
    317             self.pull()
    318         else:
    319             logging.info('repo up-to-date')
    320 
    321         # remember where the source is
    322         self.source_material = self.repodir
    323 
    324 
    325     def get_local_head(self):
    326         """
    327         Get the top commit hash of the current local git branch.
    328 
    329         @return: Top commit hash of local git branch
    330         """
    331         cmd = 'log --pretty=format:"%H" -1'
    332         l_head_cmd = self.gitcmd(cmd)
    333         return l_head_cmd.stdout.strip()
    334 
    335 
    336     def get_remote_head(self):
    337         """
    338         Get the top commit hash of the current remote git branch.
    339 
    340         @return: Top commit hash of remote git branch
    341         """
    342         cmd1 = 'remote show'
    343         origin_name_cmd = self.gitcmd(cmd1)
    344         cmd2 = 'log --pretty=format:"%H" -1 ' + origin_name_cmd.stdout.strip()
    345         r_head_cmd = self.gitcmd(cmd2)
    346         return r_head_cmd.stdout.strip()
    347 
    348 
    349     def is_out_of_date(self):
    350         """
    351         Return whether this branch is out of date with regards to remote branch.
    352 
    353         @return: False, if the branch is outdated, True if it is current.
    354         """
    355         local_head = self.get_local_head()
    356         remote_head = self.get_remote_head()
    357 
    358         # local is out-of-date, pull
    359         if local_head != remote_head:
    360             return True
    361 
    362         return False
    363 
    364 
    365     def is_repo_initialized(self):
    366         """
    367         Return whether the git repo was already initialized.
    368 
    369         Counts objects in .git directory, since these will exist even if the
    370         repo is empty. Assumes non-bare reposotories like the rest of this file.
    371 
    372         @return: True if the repo is initialized.
    373         """
    374         cmd = 'count-objects'
    375         rv = self.gitcmd(cmd, True)
    376         if rv.exit_status == 0:
    377             return True
    378 
    379         return False
    380 
    381 
    382     def get_latest_commit_hash(self):
    383         """
    384         Get the commit hash of the latest commit in the repo.
    385 
    386         We don't raise an exception if no commit hash was found as
    387         this could be an empty repository. The caller should notice this
    388         methods return value and raise one appropriately.
    389 
    390         @return: The first commit hash if anything has been committed.
    391         """
    392         cmd = 'rev-list -n 1 --all'
    393         rv = self.gitcmd(cmd, True)
    394         if rv.exit_status == 0:
    395             return rv.stdout
    396         return None
    397 
    398 
    399     def is_repo_empty(self):
    400         """
    401         Checks for empty but initialized repos.
    402 
    403         eg: we clone an empty master repo, then don't pull
    404         after the master commits.
    405 
    406         @return True if the repo has no commits.
    407         """
    408         if self.get_latest_commit_hash():
    409             return False
    410         return True
    411 
    412 
    413     def get_revision(self):
    414         """
    415         Return current HEAD commit id
    416         """
    417         if not self.is_repo_initialized():
    418             self.get()
    419 
    420         cmd = 'rev-parse --verify HEAD'
    421         gitlog = self.gitcmd(cmd, True)
    422         if gitlog.exit_status != 0:
    423             logging.error(gitlog.stderr)
    424             raise error.CmdError('Failed to find git sha1 revision', gitlog)
    425         else:
    426             return gitlog.stdout.strip('\n')
    427 
    428 
    429     def checkout(self, remote, local=None):
    430         """
    431         Check out the git commit id, branch, or tag given by remote.
    432 
    433         Optional give the local branch name as local.
    434 
    435         @param remote: Remote commit hash
    436         @param local: Local commit hash
    437         @note: For git checkout tag git version >= 1.5.0 is required
    438         """
    439         if not self.is_repo_initialized():
    440             self.get()
    441 
    442         assert(isinstance(remote, basestring))
    443         if local:
    444             cmd = 'checkout -b %s %s' % (local, remote)
    445         else:
    446             cmd = 'checkout %s' % (remote)
    447         gitlog = self.gitcmd(cmd, True)
    448         if gitlog.exit_status != 0:
    449             logging.error(gitlog.stderr)
    450             raise error.CmdError('Failed to checkout git branch', gitlog)
    451         else:
    452             logging.info(gitlog.stdout)
    453 
    454 
    455     def get_branch(self, all=False, remote_tracking=False):
    456         """
    457         Show the branches.
    458 
    459         @param all: List both remote-tracking branches and local branches (True)
    460                 or only the local ones (False).
    461         @param remote_tracking: Lists the remote-tracking branches.
    462         """
    463         if not self.is_repo_initialized():
    464             self.get()
    465 
    466         cmd = 'branch --no-color'
    467         if all:
    468             cmd = " ".join([cmd, "-a"])
    469         if remote_tracking:
    470             cmd = " ".join([cmd, "-r"])
    471 
    472         gitlog = self.gitcmd(cmd, True)
    473         if gitlog.exit_status != 0:
    474             logging.error(gitlog.stderr)
    475             raise error.CmdError('Failed to get git branch', gitlog)
    476         elif all or remote_tracking:
    477             return gitlog.stdout.strip('\n')
    478         else:
    479             branch = [b[2:] for b in gitlog.stdout.split('\n')
    480                       if b.startswith('*')][0]
    481             return branch
    482