Home | History | Annotate | Download | only in scripts
      1 #! /usr/bin/env python3
      2 
      3 # Perform massive identifier substitution on C source files.
      4 # This actually tokenizes the files (to some extent) so it can
      5 # avoid making substitutions inside strings or comments.
      6 # Inside strings, substitutions are never made; inside comments,
      7 # it is a user option (off by default).
      8 #
      9 # The substitutions are read from one or more files whose lines,
     10 # when not empty, after stripping comments starting with #,
     11 # must contain exactly two words separated by whitespace: the
     12 # old identifier and its replacement.
     13 #
     14 # The option -r reverses the sense of the substitutions (this may be
     15 # useful to undo a particular substitution).
     16 #
     17 # If the old identifier is prefixed with a '*' (with no intervening
     18 # whitespace), then it will not be substituted inside comments.
     19 #
     20 # Command line arguments are files or directories to be processed.
     21 # Directories are searched recursively for files whose name looks
     22 # like a C file (ends in .h or .c).  The special filename '-' means
     23 # operate in filter mode: read stdin, write stdout.
     24 #
     25 # Symbolic links are always ignored (except as explicit directory
     26 # arguments).
     27 #
     28 # The original files are kept as back-up with a "~" suffix.
     29 #
     30 # Changes made are reported to stdout in a diff-like format.
     31 #
     32 # NB: by changing only the function fixline() you can turn this
     33 # into a program for different changes to C source files; by
     34 # changing the function wanted() you can make a different selection of
     35 # files.
     36 
     37 import sys
     38 import re
     39 import os
     40 from stat import *
     41 import getopt
     42 
     43 err = sys.stderr.write
     44 dbg = err
     45 rep = sys.stdout.write
     46 
     47 def usage():
     48     progname = sys.argv[0]
     49     err('Usage: ' + progname +
     50               ' [-c] [-r] [-s file] ... file-or-directory ...\n')
     51     err('\n')
     52     err('-c           : substitute inside comments\n')
     53     err('-r           : reverse direction for following -s options\n')
     54     err('-s substfile : add a file of substitutions\n')
     55     err('\n')
     56     err('Each non-empty non-comment line in a substitution file must\n')
     57     err('contain exactly two words: an identifier and its replacement.\n')
     58     err('Comments start with a # character and end at end of line.\n')
     59     err('If an identifier is preceded with a *, it is not substituted\n')
     60     err('inside a comment even when -c is specified.\n')
     61 
     62 def main():
     63     try:
     64         opts, args = getopt.getopt(sys.argv[1:], 'crs:')
     65     except getopt.error as msg:
     66         err('Options error: ' + str(msg) + '\n')
     67         usage()
     68         sys.exit(2)
     69     bad = 0
     70     if not args: # No arguments
     71         usage()
     72         sys.exit(2)
     73     for opt, arg in opts:
     74         if opt == '-c':
     75             setdocomments()
     76         if opt == '-r':
     77             setreverse()
     78         if opt == '-s':
     79             addsubst(arg)
     80     for arg in args:
     81         if os.path.isdir(arg):
     82             if recursedown(arg): bad = 1
     83         elif os.path.islink(arg):
     84             err(arg + ': will not process symbolic links\n')
     85             bad = 1
     86         else:
     87             if fix(arg): bad = 1
     88     sys.exit(bad)
     89 
     90 # Change this regular expression to select a different set of files
     91 Wanted = r'^[a-zA-Z0-9_]+\.[ch]$'
     92 def wanted(name):
     93     return re.match(Wanted, name)
     94 
     95 def recursedown(dirname):
     96     dbg('recursedown(%r)\n' % (dirname,))
     97     bad = 0
     98     try:
     99         names = os.listdir(dirname)
    100     except OSError as msg:
    101         err(dirname + ': cannot list directory: ' + str(msg) + '\n')
    102         return 1
    103     names.sort()
    104     subdirs = []
    105     for name in names:
    106         if name in (os.curdir, os.pardir): continue
    107         fullname = os.path.join(dirname, name)
    108         if os.path.islink(fullname): pass
    109         elif os.path.isdir(fullname):
    110             subdirs.append(fullname)
    111         elif wanted(name):
    112             if fix(fullname): bad = 1
    113     for fullname in subdirs:
    114         if recursedown(fullname): bad = 1
    115     return bad
    116 
    117 def fix(filename):
    118 ##  dbg('fix(%r)\n' % (filename,))
    119     if filename == '-':
    120         # Filter mode
    121         f = sys.stdin
    122         g = sys.stdout
    123     else:
    124         # File replacement mode
    125         try:
    126             f = open(filename, 'r')
    127         except IOError as msg:
    128             err(filename + ': cannot open: ' + str(msg) + '\n')
    129             return 1
    130         head, tail = os.path.split(filename)
    131         tempname = os.path.join(head, '@' + tail)
    132         g = None
    133     # If we find a match, we rewind the file and start over but
    134     # now copy everything to a temp file.
    135     lineno = 0
    136     initfixline()
    137     while 1:
    138         line = f.readline()
    139         if not line: break
    140         lineno = lineno + 1
    141         while line[-2:] == '\\\n':
    142             nextline = f.readline()
    143             if not nextline: break
    144             line = line + nextline
    145             lineno = lineno + 1
    146         newline = fixline(line)
    147         if newline != line:
    148             if g is None:
    149                 try:
    150                     g = open(tempname, 'w')
    151                 except IOError as msg:
    152                     f.close()
    153                     err(tempname+': cannot create: '+
    154                         str(msg)+'\n')
    155                     return 1
    156                 f.seek(0)
    157                 lineno = 0
    158                 initfixline()
    159                 rep(filename + ':\n')
    160                 continue # restart from the beginning
    161             rep(repr(lineno) + '\n')
    162             rep('< ' + line)
    163             rep('> ' + newline)
    164         if g is not None:
    165             g.write(newline)
    166 
    167     # End of file
    168     if filename == '-': return 0 # Done in filter mode
    169     f.close()
    170     if not g: return 0 # No changes
    171     g.close()
    172 
    173     # Finishing touch -- move files
    174 
    175     # First copy the file's mode to the temp file
    176     try:
    177         statbuf = os.stat(filename)
    178         os.chmod(tempname, statbuf[ST_MODE] & 0o7777)
    179     except OSError as msg:
    180         err(tempname + ': warning: chmod failed (' + str(msg) + ')\n')
    181     # Then make a backup of the original file as filename~
    182     try:
    183         os.rename(filename, filename + '~')
    184     except OSError as msg:
    185         err(filename + ': warning: backup failed (' + str(msg) + ')\n')
    186     # Now move the temp file to the original file
    187     try:
    188         os.rename(tempname, filename)
    189     except OSError as msg:
    190         err(filename + ': rename failed (' + str(msg) + ')\n')
    191         return 1
    192     # Return success
    193     return 0
    194 
    195 # Tokenizing ANSI C (partly)
    196 
    197 Identifier = '(struct )?[a-zA-Z_][a-zA-Z0-9_]+'
    198 String = r'"([^\n\\"]|\\.)*"'
    199 Char = r"'([^\n\\']|\\.)*'"
    200 CommentStart = r'/\*'
    201 CommentEnd = r'\*/'
    202 
    203 Hexnumber = '0[xX][0-9a-fA-F]*[uUlL]*'
    204 Octnumber = '0[0-7]*[uUlL]*'
    205 Decnumber = '[1-9][0-9]*[uUlL]*'
    206 Intnumber = Hexnumber + '|' + Octnumber + '|' + Decnumber
    207 Exponent = '[eE][-+]?[0-9]+'
    208 Pointfloat = r'([0-9]+\.[0-9]*|\.[0-9]+)(' + Exponent + r')?'
    209 Expfloat = '[0-9]+' + Exponent
    210 Floatnumber = Pointfloat + '|' + Expfloat
    211 Number = Floatnumber + '|' + Intnumber
    212 
    213 # Anything else is an operator -- don't list this explicitly because of '/*'
    214 
    215 OutsideComment = (Identifier, Number, String, Char, CommentStart)
    216 OutsideCommentPattern = '(' + '|'.join(OutsideComment) + ')'
    217 OutsideCommentProgram = re.compile(OutsideCommentPattern)
    218 
    219 InsideComment = (Identifier, Number, CommentEnd)
    220 InsideCommentPattern = '(' + '|'.join(InsideComment) + ')'
    221 InsideCommentProgram = re.compile(InsideCommentPattern)
    222 
    223 def initfixline():
    224     global Program
    225     Program = OutsideCommentProgram
    226 
    227 def fixline(line):
    228     global Program
    229 ##  print('-->', repr(line))
    230     i = 0
    231     while i < len(line):
    232         match = Program.search(line, i)
    233         if match is None: break
    234         i = match.start()
    235         found = match.group(0)
    236 ##      if Program is InsideCommentProgram: print(end='... ')
    237 ##      else: print(end='    ')
    238 ##      print(found)
    239         if len(found) == 2:
    240             if found == '/*':
    241                 Program = InsideCommentProgram
    242             elif found == '*/':
    243                 Program = OutsideCommentProgram
    244         n = len(found)
    245         if found in Dict:
    246             subst = Dict[found]
    247             if Program is InsideCommentProgram:
    248                 if not Docomments:
    249                     print('Found in comment:', found)
    250                     i = i + n
    251                     continue
    252                 if found in NotInComment:
    253 ##                  print(end='Ignored in comment: ')
    254 ##                  print(found, '-->', subst)
    255 ##                  print('Line:', line, end='')
    256                     subst = found
    257 ##              else:
    258 ##                  print(end='Substituting in comment: ')
    259 ##                  print(found, '-->', subst)
    260 ##                  print('Line:', line, end='')
    261             line = line[:i] + subst + line[i+n:]
    262             n = len(subst)
    263         i = i + n
    264     return line
    265 
    266 Docomments = 0
    267 def setdocomments():
    268     global Docomments
    269     Docomments = 1
    270 
    271 Reverse = 0
    272 def setreverse():
    273     global Reverse
    274     Reverse = (not Reverse)
    275 
    276 Dict = {}
    277 NotInComment = {}
    278 def addsubst(substfile):
    279     try:
    280         fp = open(substfile, 'r')
    281     except IOError as msg:
    282         err(substfile + ': cannot read substfile: ' + str(msg) + '\n')
    283         sys.exit(1)
    284     lineno = 0
    285     while 1:
    286         line = fp.readline()
    287         if not line: break
    288         lineno = lineno + 1
    289         try:
    290             i = line.index('#')
    291         except ValueError:
    292             i = -1          # Happens to delete trailing \n
    293         words = line[:i].split()
    294         if not words: continue
    295         if len(words) == 3 and words[0] == 'struct':
    296             words[:2] = [words[0] + ' ' + words[1]]
    297         elif len(words) != 2:
    298             err(substfile + '%s:%r: warning: bad line: %r' % (substfile, lineno, line))
    299             continue
    300         if Reverse:
    301             [value, key] = words
    302         else:
    303             [key, value] = words
    304         if value[0] == '*':
    305             value = value[1:]
    306         if key[0] == '*':
    307             key = key[1:]
    308             NotInComment[key] = value
    309         if key in Dict:
    310             err('%s:%r: warning: overriding: %r %r\n' % (substfile, lineno, key, value))
    311             err('%s:%r: warning: previous: %r\n' % (substfile, lineno, Dict[key]))
    312         Dict[key] = value
    313     fp.close()
    314 
    315 if __name__ == '__main__':
    316     main()
    317