Home | History | Annotate | Download | only in tools
      1 #! /usr/bin/python
      2 
      3 # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
      4 # All rights reserved.
      5 
      6 #
      7 #  Script to check and fix svn property settings for ICU source files.
      8 #  Also check for the correct line endings on files with svn:eol-style = native
      9 #
     10 #  THIS SCRIPT DOES NOT WORK ON WINDOWS
     11 #     It only works correctly on platforms where the native line ending is a plain \n
     12 #
     13 #  usage:
     14 #     icu-svnprops-check.py  [options]
     15 #
     16 #  options:
     17 #     -f | --fix     Fix any problems that are found
     18 #     -h | --help    Print a usage line and exit.
     19 #
     20 #  The tool operates recursively on the directory from which it is run.
     21 #  Only files from the svn repository are checked.
     22 #  No changes are made to the repository; only the working copy will be altered.
     23 
     24 import sys
     25 import os
     26 import os.path
     27 import re
     28 import getopt
     29 
     30 #
     31 #  svn autoprops definitions.
     32 #      Copy and paste here the ICU recommended auto-props from
     33 #      http://icu-project.org/docs/subversion_howto/index.html
     34 #
     35 #  This program will parse this autoprops string, and verify that files in
     36 #  the repository have the recommeded properties set.
     37 #
     38 svn_auto_props = """
     39 ### Section for configuring automatic properties.
     40 [auto-props]
     41 ### The format of the entries is:
     42 ###   file-name-pattern = propname[=value][;propname[=value]...]
     43 ### The file-name-pattern can contain wildcards (such as '*' and
     44 ### '?').  All entries which match will be applied to the file.
     45 ### Note that auto-props functionality must be enabled, which
     46 ### is typically done by setting the 'enable-auto-props' option.
     47 *.c = svn:eol-style=native
     48 *.cc = svn:eol-style=native
     49 *.cpp = svn:eol-style=native
     50 *.h = svn:eol-style=native
     51 *.rc = svn:eol-style=native
     52 *.dsp = svn:eol-style=native
     53 *.dsw = svn:eol-style=native
     54 *.sln = svn:eol-style=native
     55 *.vcproj = svn:eol-style=native
     56 configure = svn:eol-style=native;svn:executable
     57 *.sh = svn:eol-style=native;svn:executable
     58 *.pl = svn:eol-style=native;svn:executable
     59 *.py = svn:eol-style=native;svn:executable
     60 *.txt = svn:mime-type=text/plain;svn:eol-style=native
     61 *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
     62 *.ucm = svn:eol-style=native
     63 *.html = svn:eol-style=native;svn:mime-type=text/html
     64 *.htm = svn:eol-style=native;svn:mime-type=text/html
     65 *.xml = svn:eol-style=native
     66 Makefile = svn:eol-style=native
     67 *.in = svn:eol-style=native
     68 *.mak = svn:eol-style=native
     69 *.mk = svn:eol-style=native
     70 *.png = svn:mime-type=image/png
     71 *.jpeg = svn:mime-type=image/jpeg
     72 *.jpg = svn:mime-type=image/jpeg
     73 *.bin = svn:mime-type=application/octet-stream
     74 *.brk = svn:mime-type=application/octet-stream
     75 *.cnv = svn:mime-type=application/octet-stream
     76 *.dat = svn:mime-type=application/octet-stream
     77 *.icu = svn:mime-type=application/octet-stream
     78 *.res = svn:mime-type=application/octet-stream
     79 *.spp = svn:mime-type=application/octet-stream
     80 # new additions 2007-dec-5 srl
     81 *.rtf = mime-type=text/rtf
     82 *.pdf = mime-type=application/pdf
     83 # changed 2008-04-08: modified .txt, above, adding mime-type
     84 # changed 2010-11-09: modified .java, adding mime-type
     85 # Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1
     86 """
     87 
     88 
     89 # file_types:  The parsed form of the svn auto-props specification.
     90 #              A list of file types - .cc, .cpp, .txt, etc.
     91 #              each element is a [type, proplist]
     92 #              "type" is a regular expression string that will match a file name
     93 #              prop list is another list, one element per property.
     94 #              Each property item is a two element list, [prop name, prop value]
     95 file_types = list()
     96 
     97 def parse_auto_props():
     98     aprops = svn_auto_props.splitlines()
     99     for propline in aprops:
    100         if re.match("\s*(#.*)?$", propline):         # Match comment and blank lines
    101             continue
    102         if re.match("\s*\[auto-props\]", propline):  # Match the [auto-props] line.
    103             continue
    104         if not re.match("\s*[^\s]+\s*=", propline):  # minimal syntax check for <file-type> =
    105             print "Bad line from autoprops definitions: " + propline
    106             continue
    107         file_type, string_proplist = propline.split("=", 1)
    108 
    109         #transform the file type expression from autoprops into a normal regular expression.
    110         #  e.g.  "*.cpp"  ==>  ".*\.cpp$"
    111         file_type = file_type.strip()
    112         file_type = file_type.replace(".", "\.")
    113         file_type = file_type.replace("*", ".*")
    114         file_type = file_type + "$"
    115 
    116         # example string_proplist at this point: " svn:eol-style=native;svn:executable"
    117         # split on ';' into a list of properties.  The negative lookahead and lookbehind
    118         # in the split regexp are to prevent matching on ';;', which is an escaped ';'
    119         # within a property value.
    120         string_proplist = re.split("(?<!;);(?!;)", string_proplist)
    121         proplist = list()
    122         for prop in string_proplist:
    123             if prop.find("=") >= 0:
    124                 prop_name, prop_val = prop.split("=", 1)
    125             else:
    126                 # properties with no explicit value, e.g. svn:executable
    127                 prop_name, prop_val = prop, ""
    128             prop_name = prop_name.strip()
    129             prop_val = prop_val.strip()
    130             # unescape any ";;" in a property value, e.g. the mime-type from
    131             #    *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
    132             prop_val = prop_val.replace(";;", ";");
    133             proplist.append((prop_name, prop_val))
    134 
    135         file_types.append((file_type, proplist))
    136     # print file_types
    137 
    138         
    139 def runCommand(cmd):
    140     output_file = os.popen(cmd);
    141     output_text = output_file.read();
    142     exit_status = output_file.close();
    143     if exit_status:
    144         print >>sys.stderr, '"', cmd, '" failed.  Exiting.'
    145         sys.exit(exit_status)
    146     return output_text
    147 
    148 
    149 def usage():
    150     print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
    151 
    152     
    153 #
    154 #  UTF-8 file check.   For text files, add a charset to the mime-type if their contents are UTF-8
    155 #    file_name:        name of a text file.
    156 #    base_mime_type:   svn:mime-type property value from the auto-props file (no charset= part)
    157 #    actual_mime_type: existing svn:mime-type property value for the file.
    158 #    return:           svn:mime-type property value, with charset added when appropriate.
    159 #
    160 def check_utf8(file_name, base_mime_type, actual_mime_type):
    161 
    162     # If the file already has a charset in its mime-type, don't make any change.
    163 
    164     if actual_mime_type.find("charset=") > 0:
    165         return actual_mime_type;
    166 
    167     f = open(file_name, 'r')
    168     bytes = f.read()
    169     f.close()
    170 
    171     if all(ord(byte) < 128 for byte in bytes):
    172         # pure ASCII.
    173         # print "Pure ASCII " + file_name
    174         return base_mime_type
    175 
    176     try:
    177         bytes.decode("UTF-8")
    178     except UnicodeDecodeError:
    179         print "warning: %s: not ASCII, not UTF-8" % file_name
    180         return base_mime_type
    181 
    182     if ord(bytes[0]) != 0xef:
    183       print "UTF-8 file with no BOM: " + file_name
    184 
    185     # Append charset=utf-8.
    186     return base_mime_type + ';charset=utf-8'
    187 
    188 
    189 def main(argv):
    190     fix_problems = False;
    191     try:
    192         opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
    193     except getopt.GetoptError:
    194         print "unrecognized option: " + argv[0]
    195         usage()
    196         sys.exit(2)
    197     for opt, arg in opts:
    198         if opt in ("-h", "--help"):
    199             usage()
    200             sys.exit()
    201         if opt in ("-f", "--fix"):
    202             fix_problems = True
    203     if args:
    204         print "unexpected command line argument"
    205         usage()
    206         sys.exit()
    207 
    208     parse_auto_props()
    209     output = runCommand("svn ls -R ");
    210     file_list = output.splitlines()
    211 
    212     for f in file_list:
    213         if os.path.isdir(f):
    214             # print "Skipping dir " + f
    215             continue
    216         if not os.path.isfile(f):
    217             print "Repository file not in working copy: " + f
    218             continue;
    219 
    220         for file_pattern, props in file_types:
    221             if re.match(file_pattern, f):
    222                 # print "doing " + f
    223                 for propname, propval in props:
    224                     actual_propval = runCommand("svn propget --strict " + propname + " " + f)
    225                     #print propname + ": " + actual_propval
    226                     if propname == "svn:mime-type" and propval.find("text/") == 0:
    227                         # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
    228                         propval = check_utf8(f, propval, actual_propval)
    229                     if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
    230                         print "svn propset %s '%s' %s" % (propname, propval, f)
    231                         if fix_problems:
    232                             os.system("svn propset %s '%s' %s" % (propname, propval, f))
    233                     if propname == "svn:eol-style" and propval == "native":
    234                         if os.system("grep -q -v \r " + f):
    235                             if fix_problems:
    236                                 print f + ": Removing DOS CR characters."
    237                                 os.system("sed -i s/\r// " + f);
    238                             else:
    239                                 print f + " contains DOS CR characters."
    240 
    241 
    242 if __name__ == "__main__":
    243     main(sys.argv[1:])
    244