Home | History | Annotate | Download | only in patman
      1 # SPDX-License-Identifier: GPL-2.0+
      2 # Copyright (c) 2011 The Chromium OS Authors.
      3 #
      4 
      5 import math
      6 import os
      7 import re
      8 import shutil
      9 import tempfile
     10 
     11 import command
     12 import commit
     13 import gitutil
     14 from series import Series
     15 
     16 # Tags that we detect and remove
     17 re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
     18     '|Reviewed-on:|Commit-\w*:')
     19 
     20 # Lines which are allowed after a TEST= line
     21 re_allowed_after_test = re.compile('^Signed-off-by:')
     22 
     23 # Signoffs
     24 re_signoff = re.compile('^Signed-off-by: *(.*)')
     25 
     26 # The start of the cover letter
     27 re_cover = re.compile('^Cover-letter:')
     28 
     29 # A cover letter Cc
     30 re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
     31 
     32 # Patch series tag
     33 re_series_tag = re.compile('^Series-([a-z-]*): *(.*)')
     34 
     35 # Commit series tag
     36 re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)')
     37 
     38 # Commit tags that we want to collect and keep
     39 re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)')
     40 
     41 # The start of a new commit in the git log
     42 re_commit = re.compile('^commit ([0-9a-f]*)$')
     43 
     44 # We detect these since checkpatch doesn't always do it
     45 re_space_before_tab = re.compile('^[+].* \t')
     46 
     47 # States we can be in - can we use range() and still have comments?
     48 STATE_MSG_HEADER = 0        # Still in the message header
     49 STATE_PATCH_SUBJECT = 1     # In patch subject (first line of log for a commit)
     50 STATE_PATCH_HEADER = 2      # In patch header (after the subject)
     51 STATE_DIFFS = 3             # In the diff part (past --- line)
     52 
     53 class PatchStream:
     54     """Class for detecting/injecting tags in a patch or series of patches
     55 
     56     We support processing the output of 'git log' to read out the tags we
     57     are interested in. We can also process a patch file in order to remove
     58     unwanted tags or inject additional ones. These correspond to the two
     59     phases of processing.
     60     """
     61     def __init__(self, series, name=None, is_log=False):
     62         self.skip_blank = False          # True to skip a single blank line
     63         self.found_test = False          # Found a TEST= line
     64         self.lines_after_test = 0        # MNumber of lines found after TEST=
     65         self.warn = []                   # List of warnings we have collected
     66         self.linenum = 1                 # Output line number we are up to
     67         self.in_section = None           # Name of start...END section we are in
     68         self.notes = []                  # Series notes
     69         self.section = []                # The current section...END section
     70         self.series = series             # Info about the patch series
     71         self.is_log = is_log             # True if indent like git log
     72         self.in_change = 0               # Non-zero if we are in a change list
     73         self.blank_count = 0             # Number of blank lines stored up
     74         self.state = STATE_MSG_HEADER    # What state are we in?
     75         self.signoff = []                # Contents of signoff line
     76         self.commit = None               # Current commit
     77 
     78     def AddToSeries(self, line, name, value):
     79         """Add a new Series-xxx tag.
     80 
     81         When a Series-xxx tag is detected, we come here to record it, if we
     82         are scanning a 'git log'.
     83 
     84         Args:
     85             line: Source line containing tag (useful for debug/error messages)
     86             name: Tag name (part after 'Series-')
     87             value: Tag value (part after 'Series-xxx: ')
     88         """
     89         if name == 'notes':
     90             self.in_section = name
     91             self.skip_blank = False
     92         if self.is_log:
     93             self.series.AddTag(self.commit, line, name, value)
     94 
     95     def AddToCommit(self, line, name, value):
     96         """Add a new Commit-xxx tag.
     97 
     98         When a Commit-xxx tag is detected, we come here to record it.
     99 
    100         Args:
    101             line: Source line containing tag (useful for debug/error messages)
    102             name: Tag name (part after 'Commit-')
    103             value: Tag value (part after 'Commit-xxx: ')
    104         """
    105         if name == 'notes':
    106             self.in_section = 'commit-' + name
    107             self.skip_blank = False
    108 
    109     def CloseCommit(self):
    110         """Save the current commit into our commit list, and reset our state"""
    111         if self.commit and self.is_log:
    112             self.series.AddCommit(self.commit)
    113             self.commit = None
    114         # If 'END' is missing in a 'Cover-letter' section, and that section
    115         # happens to show up at the very end of the commit message, this is
    116         # the chance for us to fix it up.
    117         if self.in_section == 'cover' and self.is_log:
    118             self.series.cover = self.section
    119             self.in_section = None
    120             self.skip_blank = True
    121             self.section = []
    122 
    123     def ProcessLine(self, line):
    124         """Process a single line of a patch file or commit log
    125 
    126         This process a line and returns a list of lines to output. The list
    127         may be empty or may contain multiple output lines.
    128 
    129         This is where all the complicated logic is located. The class's
    130         state is used to move between different states and detect things
    131         properly.
    132 
    133         We can be in one of two modes:
    134             self.is_log == True: This is 'git log' mode, where most output is
    135                 indented by 4 characters and we are scanning for tags
    136 
    137             self.is_log == False: This is 'patch' mode, where we already have
    138                 all the tags, and are processing patches to remove junk we
    139                 don't want, and add things we think are required.
    140 
    141         Args:
    142             line: text line to process
    143 
    144         Returns:
    145             list of output lines, or [] if nothing should be output
    146         """
    147         # Initially we have no output. Prepare the input line string
    148         out = []
    149         line = line.rstrip('\n')
    150 
    151         commit_match = re_commit.match(line) if self.is_log else None
    152 
    153         if self.is_log:
    154             if line[:4] == '    ':
    155                 line = line[4:]
    156 
    157         # Handle state transition and skipping blank lines
    158         series_tag_match = re_series_tag.match(line)
    159         commit_tag_match = re_commit_tag.match(line)
    160         cover_match = re_cover.match(line)
    161         cover_cc_match = re_cover_cc.match(line)
    162         signoff_match = re_signoff.match(line)
    163         tag_match = None
    164         if self.state == STATE_PATCH_HEADER:
    165             tag_match = re_tag.match(line)
    166         is_blank = not line.strip()
    167         if is_blank:
    168             if (self.state == STATE_MSG_HEADER
    169                     or self.state == STATE_PATCH_SUBJECT):
    170                 self.state += 1
    171 
    172             # We don't have a subject in the text stream of patch files
    173             # It has its own line with a Subject: tag
    174             if not self.is_log and self.state == STATE_PATCH_SUBJECT:
    175                 self.state += 1
    176         elif commit_match:
    177             self.state = STATE_MSG_HEADER
    178 
    179         # If a tag is detected, or a new commit starts
    180         if series_tag_match or commit_tag_match or \
    181            cover_match or cover_cc_match or signoff_match or \
    182            self.state == STATE_MSG_HEADER:
    183             # but we are already in a section, this means 'END' is missing
    184             # for that section, fix it up.
    185             if self.in_section:
    186                 self.warn.append("Missing 'END' in section '%s'" % self.in_section)
    187                 if self.in_section == 'cover':
    188                     self.series.cover = self.section
    189                 elif self.in_section == 'notes':
    190                     if self.is_log:
    191                         self.series.notes += self.section
    192                 elif self.in_section == 'commit-notes':
    193                     if self.is_log:
    194                         self.commit.notes += self.section
    195                 else:
    196                     self.warn.append("Unknown section '%s'" % self.in_section)
    197                 self.in_section = None
    198                 self.skip_blank = True
    199                 self.section = []
    200             # but we are already in a change list, that means a blank line
    201             # is missing, fix it up.
    202             if self.in_change:
    203                 self.warn.append("Missing 'blank line' in section 'Series-changes'")
    204                 self.in_change = 0
    205 
    206         # If we are in a section, keep collecting lines until we see END
    207         if self.in_section:
    208             if line == 'END':
    209                 if self.in_section == 'cover':
    210                     self.series.cover = self.section
    211                 elif self.in_section == 'notes':
    212                     if self.is_log:
    213                         self.series.notes += self.section
    214                 elif self.in_section == 'commit-notes':
    215                     if self.is_log:
    216                         self.commit.notes += self.section
    217                 else:
    218                     self.warn.append("Unknown section '%s'" % self.in_section)
    219                 self.in_section = None
    220                 self.skip_blank = True
    221                 self.section = []
    222             else:
    223                 self.section.append(line)
    224 
    225         # Detect the commit subject
    226         elif not is_blank and self.state == STATE_PATCH_SUBJECT:
    227             self.commit.subject = line
    228 
    229         # Detect the tags we want to remove, and skip blank lines
    230         elif re_remove.match(line) and not commit_tag_match:
    231             self.skip_blank = True
    232 
    233             # TEST= should be the last thing in the commit, so remove
    234             # everything after it
    235             if line.startswith('TEST='):
    236                 self.found_test = True
    237         elif self.skip_blank and is_blank:
    238             self.skip_blank = False
    239 
    240         # Detect the start of a cover letter section
    241         elif cover_match:
    242             self.in_section = 'cover'
    243             self.skip_blank = False
    244 
    245         elif cover_cc_match:
    246             value = cover_cc_match.group(1)
    247             self.AddToSeries(line, 'cover-cc', value)
    248 
    249         # If we are in a change list, key collected lines until a blank one
    250         elif self.in_change:
    251             if is_blank:
    252                 # Blank line ends this change list
    253                 self.in_change = 0
    254             elif line == '---':
    255                 self.in_change = 0
    256                 out = self.ProcessLine(line)
    257             else:
    258                 if self.is_log:
    259                     self.series.AddChange(self.in_change, self.commit, line)
    260             self.skip_blank = False
    261 
    262         # Detect Series-xxx tags
    263         elif series_tag_match:
    264             name = series_tag_match.group(1)
    265             value = series_tag_match.group(2)
    266             if name == 'changes':
    267                 # value is the version number: e.g. 1, or 2
    268                 try:
    269                     value = int(value)
    270                 except ValueError as str:
    271                     raise ValueError("%s: Cannot decode version info '%s'" %
    272                         (self.commit.hash, line))
    273                 self.in_change = int(value)
    274             else:
    275                 self.AddToSeries(line, name, value)
    276                 self.skip_blank = True
    277 
    278         # Detect Commit-xxx tags
    279         elif commit_tag_match:
    280             name = commit_tag_match.group(1)
    281             value = commit_tag_match.group(2)
    282             if name == 'notes':
    283                 self.AddToCommit(line, name, value)
    284                 self.skip_blank = True
    285 
    286         # Detect the start of a new commit
    287         elif commit_match:
    288             self.CloseCommit()
    289             self.commit = commit.Commit(commit_match.group(1))
    290 
    291         # Detect tags in the commit message
    292         elif tag_match:
    293             # Remove Tested-by self, since few will take much notice
    294             if (tag_match.group(1) == 'Tested-by' and
    295                     tag_match.group(2).find(os.getenv('USER') + '@') != -1):
    296                 self.warn.append("Ignoring %s" % line)
    297             elif tag_match.group(1) == 'Patch-cc':
    298                 self.commit.AddCc(tag_match.group(2).split(','))
    299             else:
    300                 out = [line]
    301 
    302         # Suppress duplicate signoffs
    303         elif signoff_match:
    304             if (self.is_log or not self.commit or
    305                 self.commit.CheckDuplicateSignoff(signoff_match.group(1))):
    306                 out = [line]
    307 
    308         # Well that means this is an ordinary line
    309         else:
    310             # Look for space before tab
    311             m = re_space_before_tab.match(line)
    312             if m:
    313                 self.warn.append('Line %d/%d has space before tab' %
    314                     (self.linenum, m.start()))
    315 
    316             # OK, we have a valid non-blank line
    317             out = [line]
    318             self.linenum += 1
    319             self.skip_blank = False
    320             if self.state == STATE_DIFFS:
    321                 pass
    322 
    323             # If this is the start of the diffs section, emit our tags and
    324             # change log
    325             elif line == '---':
    326                 self.state = STATE_DIFFS
    327 
    328                 # Output the tags (signeoff first), then change list
    329                 out = []
    330                 log = self.series.MakeChangeLog(self.commit)
    331                 out += [line]
    332                 if self.commit:
    333                     out += self.commit.notes
    334                 out += [''] + log
    335             elif self.found_test:
    336                 if not re_allowed_after_test.match(line):
    337                     self.lines_after_test += 1
    338 
    339         return out
    340 
    341     def Finalize(self):
    342         """Close out processing of this patch stream"""
    343         self.CloseCommit()
    344         if self.lines_after_test:
    345             self.warn.append('Found %d lines after TEST=' %
    346                     self.lines_after_test)
    347 
    348     def ProcessStream(self, infd, outfd):
    349         """Copy a stream from infd to outfd, filtering out unwanting things.
    350 
    351         This is used to process patch files one at a time.
    352 
    353         Args:
    354             infd: Input stream file object
    355             outfd: Output stream file object
    356         """
    357         # Extract the filename from each diff, for nice warnings
    358         fname = None
    359         last_fname = None
    360         re_fname = re.compile('diff --git a/(.*) b/.*')
    361         while True:
    362             line = infd.readline()
    363             if not line:
    364                 break
    365             out = self.ProcessLine(line)
    366 
    367             # Try to detect blank lines at EOF
    368             for line in out:
    369                 match = re_fname.match(line)
    370                 if match:
    371                     last_fname = fname
    372                     fname = match.group(1)
    373                 if line == '+':
    374                     self.blank_count += 1
    375                 else:
    376                     if self.blank_count and (line == '-- ' or match):
    377                         self.warn.append("Found possible blank line(s) at "
    378                                 "end of file '%s'" % last_fname)
    379                     outfd.write('+\n' * self.blank_count)
    380                     outfd.write(line + '\n')
    381                     self.blank_count = 0
    382         self.Finalize()
    383 
    384 
    385 def GetMetaDataForList(commit_range, git_dir=None, count=None,
    386                        series = None, allow_overwrite=False):
    387     """Reads out patch series metadata from the commits
    388 
    389     This does a 'git log' on the relevant commits and pulls out the tags we
    390     are interested in.
    391 
    392     Args:
    393         commit_range: Range of commits to count (e.g. 'HEAD..base')
    394         git_dir: Path to git repositiory (None to use default)
    395         count: Number of commits to list, or None for no limit
    396         series: Series object to add information into. By default a new series
    397             is started.
    398         allow_overwrite: Allow tags to overwrite an existing tag
    399     Returns:
    400         A Series object containing information about the commits.
    401     """
    402     if not series:
    403         series = Series()
    404     series.allow_overwrite = allow_overwrite
    405     params = gitutil.LogCmd(commit_range, reverse=True, count=count,
    406                             git_dir=git_dir)
    407     stdout = command.RunPipe([params], capture=True).stdout
    408     ps = PatchStream(series, is_log=True)
    409     for line in stdout.splitlines():
    410         ps.ProcessLine(line)
    411     ps.Finalize()
    412     return series
    413 
    414 def GetMetaData(start, count):
    415     """Reads out patch series metadata from the commits
    416 
    417     This does a 'git log' on the relevant commits and pulls out the tags we
    418     are interested in.
    419 
    420     Args:
    421         start: Commit to start from: 0=HEAD, 1=next one, etc.
    422         count: Number of commits to list
    423     """
    424     return GetMetaDataForList('HEAD~%d' % start, None, count)
    425 
    426 def GetMetaDataForTest(text):
    427     """Process metadata from a file containing a git log. Used for tests
    428 
    429     Args:
    430         text:
    431     """
    432     series = Series()
    433     ps = PatchStream(series, is_log=True)
    434     for line in text.splitlines():
    435         ps.ProcessLine(line)
    436     ps.Finalize()
    437     return series
    438 
    439 def FixPatch(backup_dir, fname, series, commit):
    440     """Fix up a patch file, by adding/removing as required.
    441 
    442     We remove our tags from the patch file, insert changes lists, etc.
    443     The patch file is processed in place, and overwritten.
    444 
    445     A backup file is put into backup_dir (if not None).
    446 
    447     Args:
    448         fname: Filename to patch file to process
    449         series: Series information about this patch set
    450         commit: Commit object for this patch file
    451     Return:
    452         A list of errors, or [] if all ok.
    453     """
    454     handle, tmpname = tempfile.mkstemp()
    455     outfd = os.fdopen(handle, 'w')
    456     infd = open(fname, 'r')
    457     ps = PatchStream(series)
    458     ps.commit = commit
    459     ps.ProcessStream(infd, outfd)
    460     infd.close()
    461     outfd.close()
    462 
    463     # Create a backup file if required
    464     if backup_dir:
    465         shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
    466     shutil.move(tmpname, fname)
    467     return ps.warn
    468 
    469 def FixPatches(series, fnames):
    470     """Fix up a list of patches identified by filenames
    471 
    472     The patch files are processed in place, and overwritten.
    473 
    474     Args:
    475         series: The series object
    476         fnames: List of patch files to process
    477     """
    478     # Current workflow creates patches, so we shouldn't need a backup
    479     backup_dir = None  #tempfile.mkdtemp('clean-patch')
    480     count = 0
    481     for fname in fnames:
    482         commit = series.commits[count]
    483         commit.patch = fname
    484         result = FixPatch(backup_dir, fname, series, commit)
    485         if result:
    486             print('%d warnings for %s:' % (len(result), fname))
    487             for warn in result:
    488                 print('\t', warn)
    489             print
    490         count += 1
    491     print('Cleaned %d patches' % count)
    492 
    493 def InsertCoverLetter(fname, series, count):
    494     """Inserts a cover letter with the required info into patch 0
    495 
    496     Args:
    497         fname: Input / output filename of the cover letter file
    498         series: Series object
    499         count: Number of patches in the series
    500     """
    501     fd = open(fname, 'r')
    502     lines = fd.readlines()
    503     fd.close()
    504 
    505     fd = open(fname, 'w')
    506     text = series.cover
    507     prefix = series.GetPatchPrefix()
    508     for line in lines:
    509         if line.startswith('Subject:'):
    510             # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc
    511             zero_repeat = int(math.log10(count)) + 1
    512             zero = '0' * zero_repeat
    513             line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0])
    514 
    515         # Insert our cover letter
    516         elif line.startswith('*** BLURB HERE ***'):
    517             # First the blurb test
    518             line = '\n'.join(text[1:]) + '\n'
    519             if series.get('notes'):
    520                 line += '\n'.join(series.notes) + '\n'
    521 
    522             # Now the change list
    523             out = series.MakeChangeLog(None)
    524             line += '\n' + '\n'.join(out)
    525         fd.write(line)
    526     fd.close()
    527