Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python2
      2 # SPDX-License-Identifier: GPL-2.0+
      3 #
      4 # Copyright (C) 2014, Masahiro Yamada <yamada.m (at] jp.panasonic.com>
      5 
      6 '''
      7 A tool to create/update the mailmap file
      8 
      9 The command 'git shortlog' summarizes git log output in a format suitable
     10 for inclusion in release announcements. Each commit will be grouped by
     11 author and title.
     12 
     13 One problem is that the authors' name and/or email address is sometimes
     14 spelled differently. The .mailmap feature can be used to coalesce together
     15 commits by the same persion.
     16 (See 'man git-shortlog' for furthur information of this feature.)
     17 
     18 This tool helps to create/update the mailmap file.
     19 
     20 It runs 'git shortlog' internally and searches differently spelled author
     21 names which share the same email address. The author name with the most
     22 commits is asuumed to be a canonical real name. If the number of commits
     23 from the cananonical name is equal to or greater than 'MIN_COMMITS',
     24 the entry for the cananical name will be output. ('MIN_COMMITS' is used
     25 here because we do not want to create a fat mailmap by adding every author
     26 with only a few commits.)
     27 
     28 If there exists a mailmap file specified by the mailmap.file configuration
     29 options or '.mailmap' at the toplevel of the repository, it is used as
     30 a base file. (The mailmap.file configuration takes precedence over the
     31 '.mailmap' file if both exist.)
     32 
     33 The base file and the newly added entries are merged together and sorted
     34 alphabetically (but the comment block is kept untouched), and then printed
     35 to standard output.
     36 
     37 Usage
     38 -----
     39 
     40   scripts/mailmapper
     41 
     42 prints the mailmapping to standard output.
     43 
     44   scripts/mailmapper > tmp; mv tmp .mailmap
     45 
     46 will be useful for updating '.mailmap' file.
     47 '''
     48 
     49 import sys
     50 import os
     51 import subprocess
     52 
     53 # The entries only for the canonical names with MIN_COMMITS or more commits.
     54 # This limitation is used so as not to create a too big mailmap file.
     55 MIN_COMMITS = 50
     56 
     57 try:
     58     toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'])
     59 except subprocess.CalledProcessError:
     60     sys.exit('Please run in a git repository.')
     61 
     62 # strip '\n'
     63 toplevel = toplevel.rstrip()
     64 
     65 # Change the current working directory to the toplevel of the respository
     66 # for our easier life.
     67 os.chdir(toplevel)
     68 
     69 # First, create 'auther name' vs 'number of commits' database.
     70 # We assume the name with the most commits as the canonical real name.
     71 shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n'])
     72 
     73 commits_per_name = {}
     74 
     75 for line in shortlog.splitlines():
     76     try:
     77         commits, name = line.split(None, 1)
     78     except ValueError:
     79         # ignore lines with an empty author name
     80         pass
     81     commits_per_name[name] = int(commits)
     82 
     83 # Next, coalesce the auther names with the same email address
     84 shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e'])
     85 
     86 mail_vs_name = {}
     87 output = {}
     88 
     89 for line in shortlog.splitlines():
     90     # tmp, mail = line.rsplit(None, 1) is not safe
     91     # because weird email addresses might include whitespaces
     92     tmp, mail = line.split('<')
     93     mail = '<' + mail.rstrip()
     94     try:
     95         _, name = tmp.rstrip().split(None, 1)
     96     except ValueError:
     97         # author name is empty
     98         name = ''
     99     if mail in mail_vs_name:
    100         # another name for the same email address
    101         prev_name = mail_vs_name[mail]
    102         # Take the name with more commits
    103         major_name = sorted([prev_name, name],
    104                             key=lambda x: commits_per_name[x] if x else 0)[1]
    105         mail_vs_name[mail] = major_name
    106         if commits_per_name[major_name] > MIN_COMMITS:
    107             output[mail] = major_name
    108     else:
    109         mail_vs_name[mail] = name
    110 
    111 # [1] If there exists a mailmap file at the location pointed to
    112 #     by the mailmap.file configuration option, update it.
    113 # [2] If the file .mailmap exists at the toplevel of the repository, update it.
    114 # [3] Otherwise, create a new mailmap file.
    115 mailmap_files = []
    116 
    117 try:
    118     config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file'])
    119 except subprocess.CalledProcessError:
    120     config_mailmap = ''
    121 
    122 config_mailmap = config_mailmap.rstrip()
    123 if config_mailmap:
    124     mailmap_files.append(config_mailmap)
    125 
    126 mailmap_files.append('.mailmap')
    127 
    128 infile = None
    129 
    130 for map_file in mailmap_files:
    131     try:
    132         infile = open(map_file)
    133     except:
    134         # Failed to open. Try next.
    135         continue
    136     break
    137 
    138 comment_block = []
    139 output_lines = []
    140 
    141 if infile:
    142     for line in infile:
    143         if line[0] == '#' or line[0] == '\n':
    144             comment_block.append(line)
    145         else:
    146             output_lines.append(line)
    147             break
    148     for line in infile:
    149         output_lines.append(line)
    150     infile.close()
    151 
    152 for mail, name in output.items():
    153     output_lines.append(name + ' ' + mail + '\n')
    154 
    155 output_lines.sort()
    156 
    157 sys.stdout.write(''.join(comment_block + output_lines))
    158