Home | History | Annotate | Download | only in stubdata
      1 #!/usr/bin/python
      2 #
      3 # Copyright (C) 2010 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 # Generates icudtXXl-default.dat from icudtXXl-all.dat and icu-data-default.txt.
     18 #
     19 # Usage:
     20 #    icu_dat_generator.py [-v] [-h]
     21 #
     22 # Sample usage:
     23 #   $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py --verbose
     24 
     25 import getopt
     26 import glob
     27 import os
     28 import os.path
     29 import re
     30 import shutil
     31 import subprocess
     32 import sys
     33 
     34 
     35 def PrintHelpAndExit():
     36   print "Usage:"
     37   print "  icu_dat_generator.py [-v|--verbose] [-h|--help]"
     38   print "Example:"
     39   print "  $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py"
     40   sys.exit(1)
     41 
     42 
     43 def InvokeIcuTool(tool, working_dir, args):
     44   command_list = [os.path.join(ICU_PREBUILT_DIR, tool)]
     45   command_list.extend(args)
     46 
     47   if VERBOSE:
     48     command = "[%s] %s" % (working_dir, " ".join(command_list))
     49     print command
     50 
     51   ret = subprocess.call(command_list, cwd=working_dir)
     52   if ret != 0:
     53     sys.exit(command_list[0:])
     54 
     55 
     56 def ExtractAllResourceFilesToTmpDir():
     57   # copy icudtXXl-all.dat to icudtXXl.dat
     58   src_dat = os.path.join(ICU4C_DIR, "stubdata", ICU_DATA + "-all.dat")
     59   dst_dat = os.path.join(ICU4C_DIR, "stubdata", ICU_DATA + ".dat")
     60   shutil.copyfile(src_dat, dst_dat)
     61   InvokeIcuTool("icupkg", None, [dst_dat, "-x", "*", "-d", TMP_DAT_PATH])
     62 
     63 
     64 def MakeDat(input_file, stubdata_dir):
     65   print "------ Processing '%s'..." % (input_file)
     66   if not os.path.isfile(input_file):
     67     print "%s not a file!" % input_file
     68     sys.exit(1)
     69   GenResIndex(input_file)
     70   CopyAndroidCnvFiles(stubdata_dir)
     71   # Run "icupkg -tl -s icudtXXl -a icu-data-default.txt new icudtXXl.dat".
     72   args = ["-tl", "-s", TMP_DAT_PATH, "-a", "add_list.txt", "new", ICU_DATA + ".dat"]
     73   InvokeIcuTool("icupkg", TMP_DAT_PATH, args)
     74 
     75 
     76 def ResFilesToLocales(res_files):
     77   locales = []
     78   for res_file in res_files:
     79     # res_file is something like 'coll/en_US.res'.
     80     if not '/' in res_file:
     81       locales.append(res_file)
     82     else:
     83       locales.append(res_file.split('/')[1].replace('.res', ''))
     84   return locales
     85 
     86 
     87 def WriteIndex(path, locales):
     88   empty_value = " {\"\"}\n"  # key-value pair for all locale entries
     89 
     90   f = open(path, "w")
     91   f.write("res_index:table(nofallback) {\n")
     92   f.write("  InstalledLocales {\n")
     93   for locale in sorted(locales):
     94     f.write(locale + empty_value)
     95 
     96   f.write("  }\n")
     97   f.write("}\n")
     98   f.close()
     99 
    100 
    101 def AddResFile(collection, path):
    102   # There are two consumers of the the input .txt file: this script and
    103   # icupkg. We only care about .res files, but icupkg needs files they depend
    104   # on too, so it's not an error to have to ignore non-.res files here.
    105   end = path.find(".res")
    106   if end > 0:
    107     collection.add(path[path.find("/")+1:end])
    108   return
    109 
    110 
    111 def AddAllResFiles(collection, dir_name, language):
    112   pattern1 = '%s/data/%s/%s.txt' % (ICU4C_DIR, dir_name, language)
    113   pattern2 = '%s/data/%s/%s_*.txt' % (ICU4C_DIR, dir_name, language)
    114   for path in glob.glob(pattern1) + glob.glob(pattern2):
    115     if 'TRADITIONAL' in path or 'PHONEBOOK' in path:
    116       continue
    117     parts = path.split('/')
    118     if dir_name == 'locales':
    119       path = parts[-1].replace('.txt', '')
    120     else:
    121       path = parts[-2] + '/' + parts[-1].replace('.txt', '.res')
    122     collection.add(path)
    123 
    124 
    125 def DumpFile(filename):
    126   print ' ----------------- %s' % filename
    127   os.system("cat %s" % filename)
    128   print ' ----------------- END'
    129 
    130 
    131 # Open input file (such as icu-data-default.txt).
    132 # Go through the list and generate res_index.res for locales, brkitr,
    133 # coll, et cetera.
    134 def GenResIndex(input_file):
    135   brkitrs = set()
    136   colls = set()
    137   currs = set()
    138   langs = set()
    139   locales = set()
    140   regions = set()
    141   zones = set()
    142 
    143   languages = [
    144     # Group 0.
    145     'en',
    146 
    147     # Group 1.
    148     'ar',
    149     'zh',
    150     'nl',
    151     'fr',
    152     'de',
    153     'it',
    154     'ja',
    155     'ko',
    156     'pl',
    157     'pt',
    158     'ru',
    159     'es',
    160     'th',
    161     'tr',
    162 
    163     # Group 2.
    164     'bg',
    165     'ca',
    166     'hr',
    167     'cs',
    168     'da',
    169     'fil','tl',
    170     'fi',
    171     'el',
    172     'iw','he',
    173     'hi',
    174     'hu',
    175     'id','in',
    176     'lv',
    177     'lt',
    178     'nb',
    179     'ro',
    180     'sr',
    181     'sk',
    182     'sl',
    183     'sv',
    184     'uk',
    185     'vi',
    186     'fa',
    187 
    188     # Group 3.
    189     'af',
    190     'am',
    191     'bn',
    192     'et',
    193     'is',
    194     'ms',
    195     'mr',
    196     'sw',
    197     'ta',
    198     'zu',
    199 
    200     # Group 4.
    201     'eu',
    202     'gl',
    203     'gu',
    204     'kn',
    205     'ml',
    206     'te',
    207     'ur',
    208 
    209     # Group 5.
    210     'km',
    211     'lo',
    212     'ne',
    213     'si',
    214     'ka',
    215     'hy',
    216     'mn',
    217     'cy',
    218 
    219     # Others.
    220     'az',
    221     'be',
    222     'rm',
    223   ]
    224 
    225   for language in languages:
    226     AddAllResFiles(brkitrs, 'brkitr', language)
    227     AddAllResFiles(colls, 'coll', language)
    228     AddAllResFiles(currs, 'curr', language)
    229     AddAllResFiles(langs, 'lang', language)
    230     AddAllResFiles(regions, 'region', language)
    231     AddAllResFiles(zones, 'zone', language)
    232     AddAllResFiles(locales, 'locales', language)
    233 
    234   # We need to merge the human-edited icu-data-default.txt with the
    235   # machine-generated list of files needed to support the various languages.
    236   new_add_list = []
    237 
    238   for line in open(input_file, "r"):
    239     new_add_list.append(line)
    240     if "root." in line or "res_index" in line or "_.res" in line:
    241       continue
    242     if "brkitr/" in line:
    243       AddResFile(brkitrs, line)
    244     elif "coll/" in line:
    245       AddResFile(colls, line)
    246     elif "curr/" in line:
    247       AddResFile(currs, line)
    248     elif "lang/" in line:
    249       AddResFile(langs, line)
    250     elif "region/" in line:
    251       AddResFile(regions, line)
    252     elif "zone/" in line:
    253       AddResFile(zones, line)
    254     elif ".res" in line:
    255       # TODO: these should all now be misc resources!
    256       # We need to determine the resource is locale resource or misc resource.
    257       # To determine the locale resource, we assume max script length is 3.
    258       end = line.find(".res")
    259       if end <= 3 or (line.find("_") <= 3 and line.find("_") > 0):
    260         locales.add(line[:end])
    261 
    262   kind_to_res_files = {
    263       "brkitr": brkitrs,
    264       "coll": colls,
    265       "curr": currs,
    266       "lang": langs,
    267       "locales": locales,
    268       "region": regions,
    269       "zone": zones
    270   }
    271 
    272   # Merge the machine-generated list into the human-generated list.
    273   for kind, res_files in kind_to_res_files.items():
    274     for res_file in sorted(res_files):
    275       if '.' not in res_file:
    276         res_file = res_file + '.res'
    277       new_add_list.append(res_file)
    278 
    279   if VERBOSE:
    280     for kind, res_files in kind_to_res_files.items():
    281       print "%s=%s" % (kind, sorted(res_files))
    282 
    283   # Write the genrb input files.
    284 
    285   # First add_list.txt, the argument to icupkg -a...
    286   f = open(os.path.join(TMP_DAT_PATH, "add_list.txt"), "w")
    287   for line in new_add_list:
    288     if line.startswith('#'):
    289       continue
    290     f.write("%s\n" % line)
    291   f.close()
    292 
    293   # Second res_index.txt, used below by genrb.
    294   res_index = "res_index.txt"
    295   WriteIndex(os.path.join(TMP_DAT_PATH, res_index), locales)
    296   for kind, res_files in kind_to_res_files.items():
    297     if kind == "locales":
    298       continue
    299     res_index_filename = os.path.join(TMP_DAT_PATH, kind, res_index)
    300     WriteIndex(res_index_filename, ResFilesToLocales(res_files))
    301     if VERY_VERBOSE:
    302       DumpFile(res_index_filename)
    303 
    304   # Useful if you need to see the temporary input files we generated.
    305   if VERY_VERBOSE:
    306     DumpFile('%s/add_list.txt' % TMP_DAT_PATH)
    307     DumpFile('%s/res_index.txt' % TMP_DAT_PATH)
    308 
    309   # Call genrb to generate new res_index.res.
    310   InvokeIcuTool("genrb", TMP_DAT_PATH, [res_index])
    311   for kind, res_files in kind_to_res_files.items():
    312     if kind == "locales":
    313       continue
    314     InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, kind), [res_index])
    315 
    316 
    317 def CopyAndroidCnvFiles(stubdata_dir):
    318   android_specific_cnv = ["gsm-03.38-2000.cnv",
    319                           "iso-8859_16-2001.cnv",
    320                           "docomo-shift_jis-2012.cnv",
    321                           "kddi-jisx-208-2007.cnv",
    322                           "kddi-shift_jis-2012.cnv",
    323                           "softbank-jisx-208-2007.cnv",
    324                           "softbank-shift_jis-2012.cnv"]
    325   for cnv_file in android_specific_cnv:
    326     src_path = os.path.join(stubdata_dir, "cnv", cnv_file)
    327     dst_path = os.path.join(TMP_DAT_PATH, cnv_file)
    328     shutil.copyfile(src_path, dst_path)
    329     if VERBOSE:
    330       print "copy " + src_path + " " + dst_path
    331 
    332 
    333 def main():
    334   global ANDROID_BUILD_TOP  # $ANDROID_BUILD_TOP
    335   global ICU4C_DIR          # $ANDROID_BUILD_TOP/external/icu4c
    336   global ICU_PREBUILT_DIR   # Directory containing pre-built ICU tools.
    337   global ICU_DATA           # e.g. "icudt50l"
    338   global TMP_DAT_PATH       # Temporary directory to store all resource files and
    339                             # intermediate dat files.
    340   global VERBOSE, VERY_VERBOSE
    341 
    342   VERBOSE = VERY_VERBOSE = False
    343 
    344   show_help = False
    345   try:
    346     opts, args = getopt.getopt(sys.argv[1:], "hv", ["help", "verbose", "very-verbose"])
    347   except getopt.error:
    348     PrintHelpAndExit()
    349   for opt, _ in opts:
    350     if opt in ("-h", "--help"):
    351       show_help = True
    352     elif opt in ("-v", "--verbose"):
    353       VERBOSE = True
    354     elif opt in ("--very-verbose"):
    355       VERY_VERBOSE = VERBOSE = True
    356   if args:
    357     show_help = True
    358 
    359   if show_help:
    360     PrintHelpAndExit()
    361 
    362   ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP")
    363   if not ANDROID_BUILD_TOP:
    364     print "$ANDROID_BUILD_TOP not set! Run 'env_setup.sh'."
    365     sys.exit(1)
    366   ICU4C_DIR = os.path.join(ANDROID_BUILD_TOP, "external", "icu4c")
    367   stubdata_dir = os.path.join(ICU4C_DIR, "stubdata")
    368 
    369   # Work out the ICU version from the source .dat filename, so we can find the
    370   # appropriate pre-built ICU tools.
    371   source_dat = os.path.basename(glob.glob(os.path.join(stubdata_dir, "icudt*.dat"))[0])
    372   icu_version = re.sub(r"([^0-9])", "", source_dat)
    373   ICU_PREBUILT_DIR = os.path.join(os.environ.get("ANDROID_BUILD_TOP"),
    374       "prebuilts", "misc", "linux-x86_64", "icu-%s%s" % (icu_version[0], icu_version[1]))
    375   if not os.path.exists(ICU_PREBUILT_DIR):
    376     print "%s does not exist!" % ICU_PREBUILT_DIR
    377 
    378   ICU_DATA = "icudt" + icu_version + "l"
    379 
    380   # Check that icudtXXl-all.dat exists (since we build the other .dat files from that).
    381   full_data_filename = os.path.join(stubdata_dir, ICU_DATA + "-all.dat")
    382   if not os.path.isfile(full_data_filename):
    383     print "%s not present." % full_data_filename
    384     sys.exit(1)
    385 
    386   # Create a temporary working directory.
    387   TMP_DAT_PATH = os.path.join(ICU4C_DIR, "tmp")
    388   if os.path.exists(TMP_DAT_PATH):
    389     shutil.rmtree(TMP_DAT_PATH)
    390   os.mkdir(TMP_DAT_PATH)
    391 
    392   # Extract resource files from icudtXXl-all.dat to TMP_DAT_PATH.
    393   ExtractAllResourceFilesToTmpDir()
    394 
    395   input_file = os.path.join(stubdata_dir, "icu-data-default.txt")
    396   output_file = os.path.join(stubdata_dir, ICU_DATA + "-default.dat")
    397   MakeDat(input_file, stubdata_dir)
    398   shutil.copyfile(os.path.join(TMP_DAT_PATH, ICU_DATA + ".dat"), output_file)
    399   print "Generated ICU data: %s" % output_file
    400 
    401   # Cleanup temporary working directory and icudtXXl.dat
    402   shutil.rmtree(TMP_DAT_PATH)
    403   os.remove(os.path.join(stubdata_dir, ICU_DATA + ".dat"))
    404 
    405 if __name__ == "__main__":
    406   main()
    407