Home | History | Annotate | Download | only in stubdata
      1 #!/usr/bin/python
      2 #
      3 # Copyright (C) 2010 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 #
     17 # Generate ICU dat files for locale relevant resources.
     18 #
     19 # Usage:
     20 #    icu_dat_generator.py [-v] [-h]
     21 #
     22 # Sample usage:
     23 #   $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py --verbose
     24 #
     25 #  Add new dat file:
     26 #   1. Add icudtxxl-<datname>.txt to $ANDROID_BUILD_TOP/external/icu4c/stubdata.
     27 #      Check the example file under
     28 #      $ANDROID_BUILD_TOP/external/icu4c/stubdata/icudt48l-us.txt
     29 #   2. Add an entry to main() --> datlist[]
     30 #   3. Run this script to generate dat files.
     31 #
     32 #  For ICU upgrade
     33 #    We cannot get CLDR version from dat file unless calling ICU function.
     34 #    If there is a CLDR version change, please modify "global CLDR_VERSION".
     35 
     36 import getopt
     37 import glob
     38 import os.path
     39 import re
     40 import shutil
     41 import subprocess
     42 import sys
     43 
     44 
     45 def PrintHelpAndExit():
     46   print "Usage:"
     47   print "  icu_dat_generator.py [-v|--verbose] [-h|--help]"
     48   print "Example:"
     49   print "  $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py"
     50   sys.exit(1)
     51 
     52 
     53 def InvokeIcuTool(tool, working_dir, args):
     54   command_list = [os.path.join(ICU_PREBUILT_DIR, tool)]
     55   command_list.extend(args)
     56 
     57   if VERBOSE:
     58     command = "[%s] %s" % (working_dir, " ".join(command_list))
     59     print command
     60 
     61   ret = subprocess.call(command_list, cwd=working_dir)
     62   if ret != 0:
     63     sys.exit(command_list[0:])
     64 
     65 
     66 def ExtractAllResourceToTempDir():
     67   # copy icudtxxl-all.dat to icudtxxl.dat
     68   src_dat = os.path.join(ICU4C_DIR, "stubdata", ICUDATA + "-all.dat")
     69   dst_dat = os.path.join(ICU4C_DIR, "stubdata", ICUDATA + ".dat")
     70   shutil.copyfile(src_dat, dst_dat)
     71   InvokeIcuTool("icupkg", None, [dst_dat, "-x", "*", "-d", TMP_DAT_PATH])
     72 
     73 
     74 def MakeDat(input_file, stubdata_dir):
     75   print "------ Processing '%s'..." % (input_file)
     76   if not os.path.isfile(input_file):
     77     print "%s not a file!" % input_file
     78     sys.exit(1)
     79   GenResIndex(input_file)
     80   CopyAndroidCnvFiles(stubdata_dir)
     81   # Run "icupkg -tl -s icudt48l -a icudt48l-us.txt new icudt48l.dat".
     82   args = ["-tl", "-s", TMP_DAT_PATH, "-a", input_file, "new", ICUDATA + ".dat"]
     83   InvokeIcuTool("icupkg", TMP_DAT_PATH, args)
     84 
     85 
     86 def WriteIndex(path, locales, cldr_version=None):
     87   empty_value = " {\"\"}\n"  # key-value pair for all locale entries
     88 
     89   f = open(path, "w")
     90   f.write("res_index:table(nofallback) {\n")
     91   if cldr_version:
     92     f.write("  CLDRVersion { %s }\n" % cldr_version)
     93   f.write("  InstalledLocales {\n")
     94   for locale in locales:
     95     f.write(locale + empty_value)
     96 
     97   f.write("  }\n")
     98   f.write("}\n")
     99   f.close()
    100 
    101 
    102 def AddResFile(collection, path):
    103   # There are two consumers of the the input .txt file: this script and
    104   # icupkg. We only care about .res files, but icupkg needs files they depend
    105   # on too, so it's not an error to have to ignore non-.res files here.
    106   end = path.find(".res")
    107   if end > 0:
    108     collection.add(path[path.find("/")+1:end])
    109   return
    110 
    111 
    112 # Open input file (such as icudt48l-us.txt).
    113 # Go through the list and generate res_index.txt for locales, brkitr,
    114 # coll, et cetera.
    115 def GenResIndex(input_file):
    116   res_index = "res_index.txt"
    117 
    118   brkitrs = set()
    119   colls = set()
    120   currs = set()
    121   langs = set()
    122   locales = set()
    123   regions = set()
    124   zones = set()
    125 
    126   for line in open(input_file, "r"):
    127     if "root." in line or "res_index" in line or "_.res" in line:
    128       continue
    129     if "brkitr/" in line:
    130       AddResFile(brkitrs, line)
    131     elif "coll/" in line:
    132       AddResFile(colls, line)
    133     elif "curr/" in line:
    134       AddResFile(currs, line)
    135     elif "lang/" in line:
    136       AddResFile(langs, line)
    137     elif "region/" in line:
    138       AddResFile(regions, line)
    139     elif "zone/" in line:
    140       AddResFile(zones, line)
    141     elif ".res" in line:
    142       # We need to determine the resource is locale resource or misc resource.
    143       # To determine the locale resource, we assume max script length is 3.
    144       end = line.find(".res")
    145       if end <= 3 or (line.find("_") <= 3 and line.find("_") > 0):
    146         locales.add(line[:end])
    147 
    148   kind_to_locales = {
    149       "brkitr": brkitrs,
    150       "coll": colls,
    151       "curr": currs,
    152       "lang": langs,
    153       "locales": locales,
    154       "region": regions,
    155       "zone": zones
    156   }
    157 
    158   # Find every locale we've mentioned, for whatever reason.
    159   every_locale = set()
    160   for locales in kind_to_locales.itervalues():
    161     every_locale = every_locale.union(locales)
    162 
    163   if VERBOSE:
    164     for kind, locales in kind_to_locales.items():
    165       print "%s=%s" % (kind, sorted(locales))
    166 
    167   # Print a human-readable list of the languages supported.
    168   every_language = set()
    169   for locale in every_locale:
    170     language = re.sub(r"(_.*)", "", locale)
    171     if language != "pool" and language != "supplementalData":
    172       every_language.add(language)
    173   input_basename = os.path.basename(input_file)
    174   print "%s includes %s." % (input_basename, ", ".join(sorted(every_language)))
    175 
    176   # Find cases where we've included only part of a locale's data.
    177   missing_files = []
    178   for locale in every_locale:
    179     for kind, locales in kind_to_locales.items():
    180       p = os.path.join(ICU4C_DIR, "data", kind, locale + ".txt")
    181       if not locale in locales and os.path.exists(p):
    182         missing_files.append(p)
    183 
    184   # Warn about the missing files.
    185   for missing_file in sorted(missing_files):
    186     relative_path = "/".join(missing_file.split("/")[-2:])
    187     print "warning: missing data for supported locale: %s" % relative_path
    188 
    189   # Write the genrb input files.
    190   WriteIndex(os.path.join(TMP_DAT_PATH, res_index), locales, CLDR_VERSION)
    191   for kind, locales in kind_to_locales.items():
    192     if kind == "locales":
    193       continue
    194     WriteIndex(os.path.join(TMP_DAT_PATH, kind, res_index), locales)
    195 
    196   # Call genrb to generate new res_index.res.
    197   InvokeIcuTool("genrb", TMP_DAT_PATH, [res_index])
    198   for kind, locales in kind_to_locales.items():
    199     if kind == "locales":
    200       continue
    201     InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, kind), [res_index])
    202 
    203 
    204 def CopyAndroidCnvFiles(stubdata_dir):
    205   android_specific_cnv = ["gsm-03.38-2000.cnv",
    206                           "iso-8859_16-2001.cnv",
    207                           "docomo-shift_jis-2007.cnv",
    208                           "kddi-jisx-208-2007.cnv",
    209                           "kddi-shift_jis-2007.cnv",
    210                           "softbank-jisx-208-2007.cnv",
    211                           "softbank-shift_jis-2007.cnv"]
    212   for cnv_file in android_specific_cnv:
    213     src_path = os.path.join(stubdata_dir, "cnv", cnv_file)
    214     dst_path = os.path.join(TMP_DAT_PATH, cnv_file)
    215     shutil.copyfile(src_path, dst_path)
    216     if VERBOSE:
    217       print "copy " + src_path + " " + dst_path
    218 
    219 
    220 def main():
    221   global ANDROID_BUILD_TOP  # $ANDROID_BUILD_TOP
    222   global ICU4C_DIR          # $ANDROID_BUILD_TOP/external/icu4c
    223   global ICU_PREBUILT_DIR   # Directory containing pre-built ICU tools.
    224   global ICUDATA       # e.g. "icudt48l"
    225   global CLDR_VERSION  # CLDR version. The value varies between ICU releases.
    226   global TMP_DAT_PATH  # temp directory to store all resource files and
    227                        # intermediate dat files.
    228   global VERBOSE
    229 
    230   CLDR_VERSION = "2.0.1"
    231   VERBOSE = False
    232 
    233   show_help = False
    234   try:
    235     opts, args = getopt.getopt(sys.argv[1:], "hv", ["help", "verbose"])
    236   except getopt.error:
    237     PrintHelpAndExit()
    238   for opt, _ in opts:
    239     if opt in ("-h", "--help"):
    240       show_help = True
    241     elif opt in ("-v", "--verbose"):
    242       VERBOSE = True
    243   if args:
    244     show_help = True
    245 
    246   if show_help:
    247     PrintHelpAndExit()
    248 
    249   ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP")
    250   if not ANDROID_BUILD_TOP:
    251     print "$ANDROID_BUILD_TOP not set! Run 'env_setup.sh'."
    252     sys.exit(1)
    253   ICU4C_DIR = os.path.join(ANDROID_BUILD_TOP, "external", "icu4c")
    254   stubdata_dir = os.path.join(ICU4C_DIR, "stubdata")
    255 
    256   # Find all the input files.
    257   input_files = glob.glob(os.path.join(stubdata_dir, "icudt[0-9][0-9]l-*.txt"))
    258 
    259   # Work out the ICU version from the input filenames, so we can find the
    260   # appropriate pre-built ICU tools.
    261   icu_version = re.sub(r"([^0-9])", "", os.path.basename(input_files[0]))
    262   ICU_PREBUILT_DIR = os.path.join(os.environ.get("ANDROID_BUILD_TOP"),
    263       "prebuilt", "linux-x86_64", "icu-%s.%s" % (icu_version[0], icu_version[1]))
    264   if not os.path.exists(ICU_PREBUILT_DIR):
    265     print "%s does not exist!" % ICU_PREBUILT_DIR
    266 
    267   ICUDATA = "icudt" + icu_version + "l"
    268 
    269   # Check that -all.dat exists (since we build the other .dat files from that).
    270   full_data_filename = os.path.join(stubdata_dir, ICUDATA + "-all.dat")
    271   if not os.path.isfile(full_data_filename):
    272     print "%s not present." % full_data_filename
    273     sys.exit(1)
    274 
    275   # Create a temporary working directory.
    276   TMP_DAT_PATH = os.path.join(ICU4C_DIR, "tmp")
    277   if os.path.exists(TMP_DAT_PATH):
    278     shutil.rmtree(TMP_DAT_PATH)
    279   os.mkdir(TMP_DAT_PATH)
    280 
    281   # Extract resource files from icudtxxl-all.dat to TMP_DAT_PATH.
    282   ExtractAllResourceToTempDir()
    283 
    284   # Process each input file in turn.
    285   for input_file in sorted(input_files):
    286     output_file = input_file[:-3] + "dat"
    287     MakeDat(input_file, stubdata_dir)
    288     shutil.copyfile(os.path.join(TMP_DAT_PATH, ICUDATA + ".dat"), output_file)
    289     print "Generated ICU data: %s" % output_file
    290 
    291   # Cleanup temporary working directory and icudtxxl.dat
    292   shutil.rmtree(TMP_DAT_PATH)
    293   os.remove(os.path.join(stubdata_dir, ICUDATA + ".dat"))
    294 
    295 if __name__ == "__main__":
    296   main()
    297