Home | History | Annotate | Download | only in common_lib
      1 """
      2 Module with abstraction layers to revision control systems.
      3 
      4 With this library, autotest developers can handle source code checkouts and
      5 updates on both client as well as server code.
      6 """
      7 
      8 import os, warnings, logging
      9 import error, utils
     10 from autotest_lib.client.bin import os_dep
     11 
     12 
     13 class RevisionControlError(Exception):
     14     """Local exception to be raised by code in this file."""
     15 
     16 
     17 class GitError(RevisionControlError):
     18     """Exceptions raised for general git errors."""
     19 
     20 
     21 class GitCloneError(GitError):
     22     """Exceptions raised for git clone errors."""
     23 
     24 
     25 class GitFetchError(GitError):
     26     """Exception raised for git fetch errors."""
     27 
     28 
     29 class GitPullError(GitError):
     30     """Exception raised for git pull errors."""
     31 
     32 
     33 class GitResetError(GitError):
     34     """Exception raised for git reset errors."""
     35 
     36 
     37 class GitCommitError(GitError):
     38     """Exception raised for git commit errors."""
     39 
     40 
     41 class GitRepo(object):
     42     """
     43     This class represents a git repo.
     44 
     45     It is used to pull down a local copy of a git repo, check if the local
     46     repo is up-to-date, if not update.  It delegates the install to
     47     implementation classes.
     48     """
     49 
     50     def __init__(self, repodir, giturl=None, weburl=None, abs_work_tree=None):
     51         """
     52         Initialized reposotory.
     53 
     54         @param repodir: destination repo directory.
     55         @param giturl: master repo git url.
     56         @param weburl: a web url for the master repo.
     57         @param abs_work_tree: work tree of the git repo. In the
     58             absence of a work tree git manipulations will occur
     59             in the current working directory for non bare repos.
     60             In such repos the -git-dir option should point to
     61             the .git directory and -work-tree should point to
     62             the repos working tree.
     63         Note: a bare reposotory is one which contains all the
     64         working files (the tree) and the other wise hidden files
     65         (.git) in the same directory. This class assumes non-bare
     66         reposotories.
     67         """
     68         if repodir is None:
     69             raise ValueError('You must provide a path that will hold the'
     70                              'git repository')
     71         self.repodir = utils.sh_escape(repodir)
     72         self._giturl = giturl
     73         if weburl is not None:
     74             warnings.warn("Param weburl: You are no longer required to provide "
     75                           "a web URL for your git repos", DeprecationWarning)
     76 
     77         # path to .git dir
     78         self.gitpath = utils.sh_escape(os.path.join(self.repodir,'.git'))
     79 
     80         # Find git base command. If not found, this will throw an exception
     81         self.git_base_cmd = os_dep.command('git')
     82         self.work_tree = abs_work_tree
     83 
     84         # default to same remote path as local
     85         self._build = os.path.dirname(self.repodir)
     86 
     87 
     88     @property
     89     def giturl(self):
     90         """
     91         A giturl is necessary to perform certain actions (clone, pull, fetch)
     92         but not others (like diff).
     93         """
     94         if self._giturl is None:
     95             raise ValueError('Unsupported operation -- this object was not'
     96                              'constructed with a git URL.')
     97         return self._giturl
     98 
     99 
    100     def gen_git_cmd_base(self):
    101         """
    102         The command we use to run git cannot be set. It is reconstructed
    103         on each access from it's component variables. This is it's getter.
    104         """
    105         # base git command , pointing to gitpath git dir
    106         gitcmdbase = '%s --git-dir=%s' % (self.git_base_cmd,
    107                                           self.gitpath)
    108         if self.work_tree:
    109             gitcmdbase += ' --work-tree=%s' % self.work_tree
    110         return gitcmdbase
    111 
    112 
    113     def _run(self, command, timeout=None, ignore_status=False):
    114         """
    115         Auxiliary function to run a command, with proper shell escaping.
    116 
    117         @param timeout: Timeout to run the command.
    118         @param ignore_status: Whether we should supress error.CmdError
    119                 exceptions if the command did return exit code !=0 (True), or
    120                 not supress them (False).
    121         """
    122         return utils.run(r'%s' % (utils.sh_escape(command)),
    123                          timeout, ignore_status)
    124 
    125 
    126     def gitcmd(self, cmd, ignore_status=False, error_class=None,
    127                error_msg=None):
    128         """
    129         Wrapper for a git command.
    130 
    131         @param cmd: Git subcommand (ex 'clone').
    132         @param ignore_status: If True, ignore the CmdError raised by the
    133                 underlying command runner. NB: Passing in an error_class
    134                 impiles ignore_status=True.
    135         @param error_class: When ignore_status is False, optional error
    136                 error class to log and raise in case of errors. Must be a
    137                 (sub)type of GitError.
    138         @param error_msg: When passed with error_class, used as a friendly
    139                 error message.
    140         """
    141         # TODO(pprabhu) Get rid of the ignore_status argument.
    142         # Now that we support raising custom errors, we always want to get a
    143         # return code from the command execution, instead of an exception.
    144         ignore_status = ignore_status or error_class is not None
    145         cmd = '%s %s' % (self.gen_git_cmd_base(), cmd)
    146         rv = self._run(cmd, ignore_status=ignore_status)
    147         if rv.exit_status != 0 and error_class is not None:
    148             logging.error('git command failed: %s: %s',
    149                           cmd, error_msg if error_msg is not None else '')
    150             logging.error(rv.stderr)
    151             raise error_class(error_msg if error_msg is not None
    152                               else rv.stderr)
    153 
    154         return rv
    155 
    156 
    157     def clone(self, remote_branch=None):
    158         """
    159         Clones a repo using giturl and repodir.
    160 
    161         Since we're cloning the master repo we don't have a work tree yet,
    162         make sure the getter of the gitcmd doesn't think we do by setting
    163         work_tree to None.
    164 
    165         @param remote_branch: Specify the remote branch to clone. None if to
    166                               clone master branch.
    167 
    168         @raises GitCloneError: if cloning the master repo fails.
    169         """
    170         logging.info('Cloning git repo %s', self.giturl)
    171         cmd = 'clone %s %s ' % (self.giturl, self.repodir)
    172         if remote_branch:
    173             cmd += '-b %s' % remote_branch
    174         abs_work_tree = self.work_tree
    175         self.work_tree = None
    176         try:
    177             rv = self.gitcmd(cmd, True)
    178             if rv.exit_status != 0:
    179                 logging.error(rv.stderr)
    180                 raise GitCloneError('Failed to clone git url', rv)
    181             else:
    182                 logging.info(rv.stdout)
    183         finally:
    184             self.work_tree = abs_work_tree
    185 
    186 
    187     def pull(self, rebase=False):
    188         """
    189         Pulls into repodir using giturl.
    190 
    191         @param rebase: If true forces git pull to perform a rebase instead of a
    192                         merge.
    193         @raises GitPullError: if pulling from giturl fails.
    194         """
    195         logging.info('Updating git repo %s', self.giturl)
    196         cmd = 'pull '
    197         if rebase:
    198             cmd += '--rebase '
    199         cmd += self.giturl
    200 
    201         rv = self.gitcmd(cmd, True)
    202         if rv.exit_status != 0:
    203             logging.error(rv.stderr)
    204             e_msg = 'Failed to pull git repo data'
    205             raise GitPullError(e_msg, rv)
    206 
    207 
    208     def commit(self, msg='default'):
    209         """
    210         Commit changes to repo with the supplied commit msg.
    211 
    212         @param msg: A message that goes with the commit.
    213         """
    214         rv = self.gitcmd('commit -a -m %s' % msg)
    215         if rv.exit_status != 0:
    216             logging.error(rv.stderr)
    217             raise GitCommitError('Unable to commit', rv)
    218 
    219 
    220     def reset(self, branch_or_sha):
    221         """
    222         Reset repo to the given branch or git sha.
    223 
    224         @param branch_or_sha: Name of a local or remote branch or git sha.
    225 
    226         @raises GitResetError if operation fails.
    227         """
    228         self.gitcmd('reset --hard %s' % branch_or_sha,
    229                     error_class=GitResetError,
    230                     error_msg='Failed to reset to %s' % branch_or_sha)
    231 
    232 
    233     def reset_head(self):
    234         """
    235         Reset repo to HEAD@{0} by running git reset --hard HEAD.
    236 
    237         TODO(pprabhu): cleanup. Use reset.
    238 
    239         @raises GitResetError: if we fails to reset HEAD.
    240         """
    241         logging.info('Resetting head on repo %s', self.repodir)
    242         rv = self.gitcmd('reset --hard HEAD')
    243         if rv.exit_status != 0:
    244             logging.error(rv.stderr)
    245             e_msg = 'Failed to reset HEAD'
    246             raise GitResetError(e_msg, rv)
    247 
    248 
    249     def fetch_remote(self):
    250         """
    251         Fetches all files from the remote but doesn't reset head.
    252 
    253         @raises GitFetchError: if we fail to fetch all files from giturl.
    254         """
    255         logging.info('fetching from repo %s', self.giturl)
    256         rv = self.gitcmd('fetch --all')
    257         if rv.exit_status != 0:
    258             logging.error(rv.stderr)
    259             e_msg = 'Failed to fetch from %s' % self.giturl
    260             raise GitFetchError(e_msg, rv)
    261 
    262 
    263     def reinit_repo_at(self, remote_branch):
    264         """
    265         Does all it can to ensure that the repo is at remote_branch.
    266 
    267         This will try to be nice and detect any local changes and bail early.
    268         OTOH, if it finishes successfully, it'll blow away anything and
    269         everything so that local repo reflects the upstream branch requested.
    270 
    271         @param remote_branch: branch to check out.
    272         """
    273         if not self.is_repo_initialized():
    274             self.clone()
    275 
    276         # Play nice. Detect any local changes and bail.
    277         # Re-stat all files before comparing index. This is needed for
    278         # diff-index to work properly in cases when the stat info on files is
    279         # stale. (e.g., you just untarred the whole git folder that you got from
    280         # Alice)
    281         rv = self.gitcmd('update-index --refresh -q',
    282                          error_class=GitError,
    283                          error_msg='Failed to refresh index.')
    284         rv = self.gitcmd(
    285                 'diff-index --quiet HEAD --',
    286                 error_class=GitError,
    287                 error_msg='Failed to check for local changes.')
    288         if rv.stdout:
    289             logging.error(rv.stdout)
    290             e_msg = 'Local checkout dirty. (%s)'
    291             raise GitError(e_msg % rv.stdout)
    292 
    293         # Play the bad cop. Destroy everything in your path.
    294         # Don't trust the existing repo setup at all (so don't trust the current
    295         # config, current branches / remotes etc).
    296         self.gitcmd('config remote.origin.url %s' % self.giturl,
    297                     error_class=GitError,
    298                     error_msg='Failed to set origin.')
    299         self.gitcmd('checkout -f',
    300                     error_class=GitError,
    301                     error_msg='Failed to checkout.')
    302         self.gitcmd('clean -qxdf',
    303                     error_class=GitError,
    304                     error_msg='Failed to clean.')
    305         self.fetch_remote()
    306         self.reset('origin/%s' % remote_branch)
    307 
    308 
    309     def get(self, **kwargs):
    310         """
    311         This method overrides baseclass get so we can do proper git
    312         clone/pulls, and check for updated versions.  The result of
    313         this method will leave an up-to-date version of git repo at
    314         'giturl' in 'repodir' directory to be used by build/install
    315         methods.
    316 
    317         @param kwargs: Dictionary of parameters to the method get.
    318         """
    319         if not self.is_repo_initialized():
    320             # this is your first time ...
    321             self.clone()
    322         elif self.is_out_of_date():
    323             # exiting repo, check if we're up-to-date
    324             self.pull()
    325         else:
    326             logging.info('repo up-to-date')
    327 
    328         # remember where the source is
    329         self.source_material = self.repodir
    330 
    331 
    332     def get_local_head(self):
    333         """
    334         Get the top commit hash of the current local git branch.
    335 
    336         @return: Top commit hash of local git branch
    337         """
    338         cmd = 'log --pretty=format:"%H" -1'
    339         l_head_cmd = self.gitcmd(cmd)
    340         return l_head_cmd.stdout.strip()
    341 
    342 
    343     def get_remote_head(self):
    344         """
    345         Get the top commit hash of the current remote git branch.
    346 
    347         @return: Top commit hash of remote git branch
    348         """
    349         cmd1 = 'remote show'
    350         origin_name_cmd = self.gitcmd(cmd1)
    351         cmd2 = 'log --pretty=format:"%H" -1 ' + origin_name_cmd.stdout.strip()
    352         r_head_cmd = self.gitcmd(cmd2)
    353         return r_head_cmd.stdout.strip()
    354 
    355 
    356     def is_out_of_date(self):
    357         """
    358         Return whether this branch is out of date with regards to remote branch.
    359 
    360         @return: False, if the branch is outdated, True if it is current.
    361         """
    362         local_head = self.get_local_head()
    363         remote_head = self.get_remote_head()
    364 
    365         # local is out-of-date, pull
    366         if local_head != remote_head:
    367             return True
    368 
    369         return False
    370 
    371 
    372     def is_repo_initialized(self):
    373         """
    374         Return whether the git repo was already initialized.
    375 
    376         Counts objects in .git directory, since these will exist even if the
    377         repo is empty. Assumes non-bare reposotories like the rest of this file.
    378 
    379         @return: True if the repo is initialized.
    380         """
    381         cmd = 'count-objects'
    382         rv = self.gitcmd(cmd, True)
    383         if rv.exit_status == 0:
    384             return True
    385 
    386         return False
    387 
    388 
    389     def get_latest_commit_hash(self):
    390         """
    391         Get the commit hash of the latest commit in the repo.
    392 
    393         We don't raise an exception if no commit hash was found as
    394         this could be an empty repository. The caller should notice this
    395         methods return value and raise one appropriately.
    396 
    397         @return: The first commit hash if anything has been committed.
    398         """
    399         cmd = 'rev-list -n 1 --all'
    400         rv = self.gitcmd(cmd, True)
    401         if rv.exit_status == 0:
    402             return rv.stdout
    403         return None
    404 
    405 
    406     def is_repo_empty(self):
    407         """
    408         Checks for empty but initialized repos.
    409 
    410         eg: we clone an empty master repo, then don't pull
    411         after the master commits.
    412 
    413         @return True if the repo has no commits.
    414         """
    415         if self.get_latest_commit_hash():
    416             return False
    417         return True
    418 
    419 
    420     def get_revision(self):
    421         """
    422         Return current HEAD commit id
    423         """
    424         if not self.is_repo_initialized():
    425             self.get()
    426 
    427         cmd = 'rev-parse --verify HEAD'
    428         gitlog = self.gitcmd(cmd, True)
    429         if gitlog.exit_status != 0:
    430             logging.error(gitlog.stderr)
    431             raise error.CmdError('Failed to find git sha1 revision', gitlog)
    432         else:
    433             return gitlog.stdout.strip('\n')
    434 
    435 
    436     def checkout(self, remote, local=None):
    437         """
    438         Check out the git commit id, branch, or tag given by remote.
    439 
    440         Optional give the local branch name as local.
    441 
    442         @param remote: Remote commit hash
    443         @param local: Local commit hash
    444         @note: For git checkout tag git version >= 1.5.0 is required
    445         """
    446         if not self.is_repo_initialized():
    447             self.get()
    448 
    449         assert(isinstance(remote, basestring))
    450         if local:
    451             cmd = 'checkout -b %s %s' % (local, remote)
    452         else:
    453             cmd = 'checkout %s' % (remote)
    454         gitlog = self.gitcmd(cmd, True)
    455         if gitlog.exit_status != 0:
    456             logging.error(gitlog.stderr)
    457             raise error.CmdError('Failed to checkout git branch', gitlog)
    458         else:
    459             logging.info(gitlog.stdout)
    460 
    461 
    462     def get_branch(self, all=False, remote_tracking=False):
    463         """
    464         Show the branches.
    465 
    466         @param all: List both remote-tracking branches and local branches (True)
    467                 or only the local ones (False).
    468         @param remote_tracking: Lists the remote-tracking branches.
    469         """
    470         if not self.is_repo_initialized():
    471             self.get()
    472 
    473         cmd = 'branch --no-color'
    474         if all:
    475             cmd = " ".join([cmd, "-a"])
    476         if remote_tracking:
    477             cmd = " ".join([cmd, "-r"])
    478 
    479         gitlog = self.gitcmd(cmd, True)
    480         if gitlog.exit_status != 0:
    481             logging.error(gitlog.stderr)
    482             raise error.CmdError('Failed to get git branch', gitlog)
    483         elif all or remote_tracking:
    484             return gitlog.stdout.strip('\n')
    485         else:
    486             branch = [b[2:] for b in gitlog.stdout.split('\n')
    487                       if b.startswith('*')][0]
    488             return branch
    489