Home | History | Annotate | Download | only in stubdata
      1 #!/usr/bin/python
      2 #
      3 # Copyright (C) 2010 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 # Generates icudtXXl-default.dat from icudtXXl-all.dat and icu-data-default.txt.
     18 #
     19 # Usage:
     20 #    icu_dat_generator.py [-v] [-h]
     21 #
     22 # Sample usage:
     23 #   $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py --verbose
     24 
     25 import getopt
     26 import glob
     27 import os.path
     28 import re
     29 import shutil
     30 import subprocess
     31 import sys
     32 
     33 
     34 def PrintHelpAndExit():
     35   print "Usage:"
     36   print "  icu_dat_generator.py [-v|--verbose] [-h|--help]"
     37   print "Example:"
     38   print "  $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py"
     39   sys.exit(1)
     40 
     41 
     42 def InvokeIcuTool(tool, working_dir, args):
     43   command_list = [os.path.join(ICU_PREBUILT_DIR, tool)]
     44   command_list.extend(args)
     45 
     46   if VERBOSE:
     47     command = "[%s] %s" % (working_dir, " ".join(command_list))
     48     print command
     49 
     50   ret = subprocess.call(command_list, cwd=working_dir)
     51   if ret != 0:
     52     sys.exit(command_list[0:])
     53 
     54 
     55 def ExtractAllResourceFilesToTmpDir():
     56   # copy icudtXXl-all.dat to icudtXXl.dat
     57   src_dat = os.path.join(ICU4C_DIR, "stubdata", ICU_DATA + "-all.dat")
     58   dst_dat = os.path.join(ICU4C_DIR, "stubdata", ICU_DATA + ".dat")
     59   shutil.copyfile(src_dat, dst_dat)
     60   InvokeIcuTool("icupkg", None, [dst_dat, "-x", "*", "-d", TMP_DAT_PATH])
     61 
     62 
     63 def MakeDat(input_file, stubdata_dir):
     64   print "------ Processing '%s'..." % (input_file)
     65   if not os.path.isfile(input_file):
     66     print "%s not a file!" % input_file
     67     sys.exit(1)
     68   GenResIndex(input_file)
     69   CopyAndroidCnvFiles(stubdata_dir)
     70   # Run "icupkg -tl -s icudtXXl -a icu-data-default.txt new icudtXXl.dat".
     71   args = ["-tl", "-s", TMP_DAT_PATH, "-a", input_file, "new", ICU_DATA + ".dat"]
     72   InvokeIcuTool("icupkg", TMP_DAT_PATH, args)
     73 
     74 
     75 def WriteIndex(path, locales):
     76   empty_value = " {\"\"}\n"  # key-value pair for all locale entries
     77 
     78   f = open(path, "w")
     79   f.write("res_index:table(nofallback) {\n")
     80   f.write("  InstalledLocales {\n")
     81   for locale in locales:
     82     f.write(locale + empty_value)
     83 
     84   f.write("  }\n")
     85   f.write("}\n")
     86   f.close()
     87 
     88 
     89 def AddResFile(collection, path):
     90   # There are two consumers of the the input .txt file: this script and
     91   # icupkg. We only care about .res files, but icupkg needs files they depend
     92   # on too, so it's not an error to have to ignore non-.res files here.
     93   end = path.find(".res")
     94   if end > 0:
     95     collection.add(path[path.find("/")+1:end])
     96   return
     97 
     98 
     99 # Open input file (such as icu-data-default.txt).
    100 # Go through the list and generate res_index.txt for locales, brkitr,
    101 # coll, et cetera.
    102 def GenResIndex(input_file):
    103   res_index = "res_index.txt"
    104 
    105   brkitrs = set()
    106   colls = set()
    107   currs = set()
    108   langs = set()
    109   locales = set()
    110   regions = set()
    111   zones = set()
    112 
    113   for line in open(input_file, "r"):
    114     if "root." in line or "res_index" in line or "_.res" in line:
    115       continue
    116     if "brkitr/" in line:
    117       AddResFile(brkitrs, line)
    118     elif "coll/" in line:
    119       AddResFile(colls, line)
    120     elif "curr/" in line:
    121       AddResFile(currs, line)
    122     elif "lang/" in line:
    123       AddResFile(langs, line)
    124     elif "region/" in line:
    125       AddResFile(regions, line)
    126     elif "zone/" in line:
    127       AddResFile(zones, line)
    128     elif ".res" in line:
    129       # We need to determine the resource is locale resource or misc resource.
    130       # To determine the locale resource, we assume max script length is 3.
    131       end = line.find(".res")
    132       if end <= 3 or (line.find("_") <= 3 and line.find("_") > 0):
    133         locales.add(line[:end])
    134 
    135   kind_to_locales = {
    136       "brkitr": brkitrs,
    137       "coll": colls,
    138       "curr": currs,
    139       "lang": langs,
    140       "locales": locales,
    141       "region": regions,
    142       "zone": zones
    143   }
    144 
    145   # Find every locale we've mentioned, for whatever reason.
    146   every_locale = set()
    147   for locales in kind_to_locales.itervalues():
    148     every_locale = every_locale.union(locales)
    149 
    150   if VERBOSE:
    151     for kind, locales in kind_to_locales.items():
    152       print "%s=%s" % (kind, sorted(locales))
    153 
    154   # Print a human-readable list of the languages supported.
    155   every_language = set()
    156   for locale in every_locale:
    157     language = re.sub(r"(_.*)", "", locale)
    158     if language != "pool" and language != "supplementalData":
    159       every_language.add(language)
    160   input_basename = os.path.basename(input_file)
    161   print "%s includes %s." % (input_basename, ", ".join(sorted(every_language)))
    162 
    163   # Find cases where we've included only part of a locale's data.
    164   missing_files = []
    165   for locale in every_locale:
    166     for kind, locales in kind_to_locales.items():
    167       p = os.path.join(ICU4C_DIR, "data", kind, locale + ".txt")
    168       if not locale in locales and os.path.exists(p):
    169         missing_files.append(p)
    170 
    171   # Warn about the missing files.
    172   for missing_file in sorted(missing_files):
    173     relative_path = "/".join(missing_file.split("/")[-2:])
    174     print "warning: missing data for supported locale: %s" % relative_path
    175 
    176   # Write the genrb input files.
    177   WriteIndex(os.path.join(TMP_DAT_PATH, res_index), locales)
    178   for kind, locales in kind_to_locales.items():
    179     if kind == "locales":
    180       continue
    181     WriteIndex(os.path.join(TMP_DAT_PATH, kind, res_index), locales)
    182 
    183   # Call genrb to generate new res_index.res.
    184   InvokeIcuTool("genrb", TMP_DAT_PATH, [res_index])
    185   for kind, locales in kind_to_locales.items():
    186     if kind == "locales":
    187       continue
    188     InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, kind), [res_index])
    189 
    190 
    191 def CopyAndroidCnvFiles(stubdata_dir):
    192   android_specific_cnv = ["gsm-03.38-2000.cnv",
    193                           "iso-8859_16-2001.cnv",
    194                           "docomo-shift_jis-2007.cnv",
    195                           "kddi-jisx-208-2007.cnv",
    196                           "kddi-shift_jis-2007.cnv",
    197                           "softbank-jisx-208-2007.cnv",
    198                           "softbank-shift_jis-2007.cnv"]
    199   for cnv_file in android_specific_cnv:
    200     src_path = os.path.join(stubdata_dir, "cnv", cnv_file)
    201     dst_path = os.path.join(TMP_DAT_PATH, cnv_file)
    202     shutil.copyfile(src_path, dst_path)
    203     if VERBOSE:
    204       print "copy " + src_path + " " + dst_path
    205 
    206 
    207 def main():
    208   global ANDROID_BUILD_TOP  # $ANDROID_BUILD_TOP
    209   global ICU4C_DIR          # $ANDROID_BUILD_TOP/external/icu4c
    210   global ICU_PREBUILT_DIR   # Directory containing pre-built ICU tools.
    211   global ICU_DATA           # e.g. "icudt50l"
    212   global TMP_DAT_PATH       # Temporary directory to store all resource files and
    213                             # intermediate dat files.
    214   global VERBOSE
    215 
    216   VERBOSE = False
    217 
    218   show_help = False
    219   try:
    220     opts, args = getopt.getopt(sys.argv[1:], "hv", ["help", "verbose"])
    221   except getopt.error:
    222     PrintHelpAndExit()
    223   for opt, _ in opts:
    224     if opt in ("-h", "--help"):
    225       show_help = True
    226     elif opt in ("-v", "--verbose"):
    227       VERBOSE = True
    228   if args:
    229     show_help = True
    230 
    231   if show_help:
    232     PrintHelpAndExit()
    233 
    234   ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP")
    235   if not ANDROID_BUILD_TOP:
    236     print "$ANDROID_BUILD_TOP not set! Run 'env_setup.sh'."
    237     sys.exit(1)
    238   ICU4C_DIR = os.path.join(ANDROID_BUILD_TOP, "external", "icu4c")
    239   stubdata_dir = os.path.join(ICU4C_DIR, "stubdata")
    240 
    241   # Work out the ICU version from the source .dat filename, so we can find the
    242   # appropriate pre-built ICU tools.
    243   source_dat = os.path.basename(glob.glob(os.path.join(stubdata_dir, "icudt*.dat"))[0])
    244   icu_version = re.sub(r"([^0-9])", "", source_dat)
    245   ICU_PREBUILT_DIR = os.path.join(os.environ.get("ANDROID_BUILD_TOP"),
    246       "prebuilts", "misc", "linux-x86_64", "icu-%s%s" % (icu_version[0], icu_version[1]))
    247   if not os.path.exists(ICU_PREBUILT_DIR):
    248     print "%s does not exist!" % ICU_PREBUILT_DIR
    249 
    250   ICU_DATA = "icudt" + icu_version + "l"
    251 
    252   # Check that icudtXXl-all.dat exists (since we build the other .dat files from that).
    253   full_data_filename = os.path.join(stubdata_dir, ICU_DATA + "-all.dat")
    254   if not os.path.isfile(full_data_filename):
    255     print "%s not present." % full_data_filename
    256     sys.exit(1)
    257 
    258   # Create a temporary working directory.
    259   TMP_DAT_PATH = os.path.join(ICU4C_DIR, "tmp")
    260   if os.path.exists(TMP_DAT_PATH):
    261     shutil.rmtree(TMP_DAT_PATH)
    262   os.mkdir(TMP_DAT_PATH)
    263 
    264   # Extract resource files from icudtXXl-all.dat to TMP_DAT_PATH.
    265   ExtractAllResourceFilesToTmpDir()
    266 
    267   input_file = os.path.join(stubdata_dir, "icu-data-default.txt")
    268   output_file = os.path.join(stubdata_dir, ICU_DATA + "-default.dat")
    269   MakeDat(input_file, stubdata_dir)
    270   shutil.copyfile(os.path.join(TMP_DAT_PATH, ICU_DATA + ".dat"), output_file)
    271   print "Generated ICU data: %s" % output_file
    272 
    273   # Cleanup temporary working directory and icudtXXl.dat
    274   shutil.rmtree(TMP_DAT_PATH)
    275   os.remove(os.path.join(stubdata_dir, ICU_DATA + ".dat"))
    276 
    277 if __name__ == "__main__":
    278   main()
    279