1 #!/usr/bin/python 2 # 3 # Copyright (C) 2010 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # Generates icudtXXl-default.dat from icudtXXl-all.dat and icu-data-default.txt. 18 # 19 # Usage: 20 # icu_dat_generator.py [-v] [-h] 21 # 22 # Sample usage: 23 # $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py --verbose 24 25 import getopt 26 import glob 27 import os 28 import os.path 29 import re 30 import shutil 31 import subprocess 32 import sys 33 34 35 def PrintHelpAndExit(): 36 print "Usage:" 37 print " icu_dat_generator.py [-v|--verbose] [-h|--help]" 38 print "Example:" 39 print " $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py" 40 sys.exit(1) 41 42 43 def InvokeIcuTool(tool, working_dir, args): 44 command_list = [os.path.join(ICU_PREBUILT_DIR, tool)] 45 command_list.extend(args) 46 47 if VERBOSE: 48 command = "[%s] %s" % (working_dir, " ".join(command_list)) 49 print command 50 51 ret = subprocess.call(command_list, cwd=working_dir) 52 if ret != 0: 53 sys.exit(command_list[0:]) 54 55 56 def ExtractAllResourceFilesToTmpDir(): 57 # copy icudtXXl-all.dat to icudtXXl.dat 58 src_dat = os.path.join(ICU4C_DIR, "stubdata", ICU_DATA + "-all.dat") 59 dst_dat = os.path.join(ICU4C_DIR, "stubdata", ICU_DATA + ".dat") 60 shutil.copyfile(src_dat, dst_dat) 61 InvokeIcuTool("icupkg", None, [dst_dat, "-x", "*", "-d", TMP_DAT_PATH]) 62 63 64 def MakeDat(input_file, stubdata_dir): 65 print "------ Processing '%s'..." % (input_file) 66 if not os.path.isfile(input_file): 67 print "%s not a file!" % input_file 68 sys.exit(1) 69 GenResIndex(input_file) 70 CopyAndroidCnvFiles(stubdata_dir) 71 # Run "icupkg -tl -s icudtXXl -a icu-data-default.txt new icudtXXl.dat". 72 args = ["-tl", "-s", TMP_DAT_PATH, "-a", "add_list.txt", "new", ICU_DATA + ".dat"] 73 InvokeIcuTool("icupkg", TMP_DAT_PATH, args) 74 75 76 def ResFilesToLocales(res_files): 77 locales = [] 78 for res_file in res_files: 79 # res_file is something like 'coll/en_US.res'. 80 if not '/' in res_file: 81 locales.append(res_file) 82 else: 83 locales.append(res_file.split('/')[1].replace('.res', '')) 84 return locales 85 86 87 def WriteIndex(path, locales): 88 empty_value = " {\"\"}\n" # key-value pair for all locale entries 89 90 f = open(path, "w") 91 f.write("res_index:table(nofallback) {\n") 92 f.write(" InstalledLocales {\n") 93 for locale in sorted(locales): 94 f.write(locale + empty_value) 95 96 f.write(" }\n") 97 f.write("}\n") 98 f.close() 99 100 101 def AddResFile(collection, path): 102 # There are two consumers of the the input .txt file: this script and 103 # icupkg. We only care about .res files, but icupkg needs files they depend 104 # on too, so it's not an error to have to ignore non-.res files here. 105 end = path.find(".res") 106 if end > 0: 107 collection.add(path[path.find("/")+1:end]) 108 return 109 110 111 def AddAllResFiles(collection, dir_name, language): 112 pattern1 = '%s/data/%s/%s.txt' % (ICU4C_DIR, dir_name, language) 113 pattern2 = '%s/data/%s/%s_*.txt' % (ICU4C_DIR, dir_name, language) 114 for path in glob.glob(pattern1) + glob.glob(pattern2): 115 if 'TRADITIONAL' in path or 'PHONEBOOK' in path: 116 continue 117 parts = path.split('/') 118 if dir_name == 'locales': 119 path = parts[-1].replace('.txt', '') 120 else: 121 path = parts[-2] + '/' + parts[-1].replace('.txt', '.res') 122 collection.add(path) 123 124 125 def DumpFile(filename): 126 print ' ----------------- %s' % filename 127 os.system("cat %s" % filename) 128 print ' ----------------- END' 129 130 131 # Open input file (such as icu-data-default.txt). 132 # Go through the list and generate res_index.res for locales, brkitr, 133 # coll, et cetera. 134 def GenResIndex(input_file): 135 brkitrs = set() 136 colls = set() 137 currs = set() 138 langs = set() 139 locales = set() 140 regions = set() 141 zones = set() 142 143 languages = [ 144 # Group 0. 145 'en', 146 147 # Group 1. 148 'ar', 149 'zh', 150 'nl', 151 'fr', 152 'de', 153 'it', 154 'ja', 155 'ko', 156 'pl', 157 'pt', 158 'ru', 159 'es', 160 'th', 161 'tr', 162 163 # Group 2. 164 'bg', 165 'ca', 166 'hr', 167 'cs', 168 'da', 169 'fil','tl', 170 'fi', 171 'el', 172 'iw','he', 173 'hi', 174 'hu', 175 'id','in', 176 'lv', 177 'lt', 178 'nb', 179 'ro', 180 'sr', 181 'sk', 182 'sl', 183 'sv', 184 'uk', 185 'vi', 186 'fa', 187 188 # Group 3. 189 'af', 190 'am', 191 'bn', 192 'et', 193 'is', 194 'ms', 195 'mr', 196 'sw', 197 'ta', 198 'zu', 199 200 # Group 4. 201 'eu', 202 'gl', 203 'gu', 204 'kn', 205 'ml', 206 'te', 207 'ur', 208 209 # Group 5. 210 'km', 211 'lo', 212 'ne', 213 'si', 214 'ka', 215 'hy', 216 'mn', 217 'cy', 218 219 # Others. 220 'az', 221 'be', 222 'rm', 223 ] 224 225 for language in languages: 226 AddAllResFiles(brkitrs, 'brkitr', language) 227 AddAllResFiles(colls, 'coll', language) 228 AddAllResFiles(currs, 'curr', language) 229 AddAllResFiles(langs, 'lang', language) 230 AddAllResFiles(regions, 'region', language) 231 AddAllResFiles(zones, 'zone', language) 232 AddAllResFiles(locales, 'locales', language) 233 234 # We need to merge the human-edited icu-data-default.txt with the 235 # machine-generated list of files needed to support the various languages. 236 new_add_list = [] 237 238 for line in open(input_file, "r"): 239 new_add_list.append(line) 240 if "root." in line or "res_index" in line or "_.res" in line: 241 continue 242 if "brkitr/" in line: 243 AddResFile(brkitrs, line) 244 elif "coll/" in line: 245 AddResFile(colls, line) 246 elif "curr/" in line: 247 AddResFile(currs, line) 248 elif "lang/" in line: 249 AddResFile(langs, line) 250 elif "region/" in line: 251 AddResFile(regions, line) 252 elif "zone/" in line: 253 AddResFile(zones, line) 254 elif ".res" in line: 255 # TODO: these should all now be misc resources! 256 # We need to determine the resource is locale resource or misc resource. 257 # To determine the locale resource, we assume max script length is 3. 258 end = line.find(".res") 259 if end <= 3 or (line.find("_") <= 3 and line.find("_") > 0): 260 locales.add(line[:end]) 261 262 kind_to_res_files = { 263 "brkitr": brkitrs, 264 "coll": colls, 265 "curr": currs, 266 "lang": langs, 267 "locales": locales, 268 "region": regions, 269 "zone": zones 270 } 271 272 # Merge the machine-generated list into the human-generated list. 273 for kind, res_files in kind_to_res_files.items(): 274 for res_file in sorted(res_files): 275 if '.' not in res_file: 276 res_file = res_file + '.res' 277 new_add_list.append(res_file) 278 279 if VERBOSE: 280 for kind, res_files in kind_to_res_files.items(): 281 print "%s=%s" % (kind, sorted(res_files)) 282 283 # Write the genrb input files. 284 285 # First add_list.txt, the argument to icupkg -a... 286 f = open(os.path.join(TMP_DAT_PATH, "add_list.txt"), "w") 287 for line in new_add_list: 288 if line.startswith('#'): 289 continue 290 f.write("%s\n" % line) 291 f.close() 292 293 # Second res_index.txt, used below by genrb. 294 res_index = "res_index.txt" 295 WriteIndex(os.path.join(TMP_DAT_PATH, res_index), locales) 296 for kind, res_files in kind_to_res_files.items(): 297 if kind == "locales": 298 continue 299 res_index_filename = os.path.join(TMP_DAT_PATH, kind, res_index) 300 WriteIndex(res_index_filename, ResFilesToLocales(res_files)) 301 if VERY_VERBOSE: 302 DumpFile(res_index_filename) 303 304 # Useful if you need to see the temporary input files we generated. 305 if VERY_VERBOSE: 306 DumpFile('%s/add_list.txt' % TMP_DAT_PATH) 307 DumpFile('%s/res_index.txt' % TMP_DAT_PATH) 308 309 # Call genrb to generate new res_index.res. 310 InvokeIcuTool("genrb", TMP_DAT_PATH, [res_index]) 311 for kind, res_files in kind_to_res_files.items(): 312 if kind == "locales": 313 continue 314 InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, kind), [res_index]) 315 316 317 def CopyAndroidCnvFiles(stubdata_dir): 318 android_specific_cnv = ["gsm-03.38-2000.cnv", 319 "iso-8859_16-2001.cnv", 320 "docomo-shift_jis-2012.cnv", 321 "kddi-jisx-208-2007.cnv", 322 "kddi-shift_jis-2012.cnv", 323 "softbank-jisx-208-2007.cnv", 324 "softbank-shift_jis-2012.cnv"] 325 for cnv_file in android_specific_cnv: 326 src_path = os.path.join(stubdata_dir, "cnv", cnv_file) 327 dst_path = os.path.join(TMP_DAT_PATH, cnv_file) 328 shutil.copyfile(src_path, dst_path) 329 if VERBOSE: 330 print "copy " + src_path + " " + dst_path 331 332 333 def main(): 334 global ANDROID_BUILD_TOP # $ANDROID_BUILD_TOP 335 global ICU4C_DIR # $ANDROID_BUILD_TOP/external/icu4c 336 global ICU_PREBUILT_DIR # Directory containing pre-built ICU tools. 337 global ICU_DATA # e.g. "icudt50l" 338 global TMP_DAT_PATH # Temporary directory to store all resource files and 339 # intermediate dat files. 340 global VERBOSE, VERY_VERBOSE 341 342 VERBOSE = VERY_VERBOSE = False 343 344 show_help = False 345 try: 346 opts, args = getopt.getopt(sys.argv[1:], "hv", ["help", "verbose", "very-verbose"]) 347 except getopt.error: 348 PrintHelpAndExit() 349 for opt, _ in opts: 350 if opt in ("-h", "--help"): 351 show_help = True 352 elif opt in ("-v", "--verbose"): 353 VERBOSE = True 354 elif opt in ("--very-verbose"): 355 VERY_VERBOSE = VERBOSE = True 356 if args: 357 show_help = True 358 359 if show_help: 360 PrintHelpAndExit() 361 362 ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP") 363 if not ANDROID_BUILD_TOP: 364 print "$ANDROID_BUILD_TOP not set! Run 'env_setup.sh'." 365 sys.exit(1) 366 ICU4C_DIR = os.path.join(ANDROID_BUILD_TOP, "external", "icu4c") 367 stubdata_dir = os.path.join(ICU4C_DIR, "stubdata") 368 369 # Work out the ICU version from the source .dat filename, so we can find the 370 # appropriate pre-built ICU tools. 371 source_dat = os.path.basename(glob.glob(os.path.join(stubdata_dir, "icudt*.dat"))[0]) 372 icu_version = re.sub(r"([^0-9])", "", source_dat) 373 ICU_PREBUILT_DIR = os.path.join(os.environ.get("ANDROID_BUILD_TOP"), 374 "prebuilts", "misc", "linux-x86_64", "icu-%s%s" % (icu_version[0], icu_version[1])) 375 if not os.path.exists(ICU_PREBUILT_DIR): 376 print "%s does not exist!" % ICU_PREBUILT_DIR 377 378 ICU_DATA = "icudt" + icu_version + "l" 379 380 # Check that icudtXXl-all.dat exists (since we build the other .dat files from that). 381 full_data_filename = os.path.join(stubdata_dir, ICU_DATA + "-all.dat") 382 if not os.path.isfile(full_data_filename): 383 print "%s not present." % full_data_filename 384 sys.exit(1) 385 386 # Create a temporary working directory. 387 TMP_DAT_PATH = os.path.join(ICU4C_DIR, "tmp") 388 if os.path.exists(TMP_DAT_PATH): 389 shutil.rmtree(TMP_DAT_PATH) 390 os.mkdir(TMP_DAT_PATH) 391 392 # Extract resource files from icudtXXl-all.dat to TMP_DAT_PATH. 393 ExtractAllResourceFilesToTmpDir() 394 395 input_file = os.path.join(stubdata_dir, "icu-data-default.txt") 396 output_file = os.path.join(stubdata_dir, ICU_DATA + "-default.dat") 397 MakeDat(input_file, stubdata_dir) 398 shutil.copyfile(os.path.join(TMP_DAT_PATH, ICU_DATA + ".dat"), output_file) 399 print "Generated ICU data: %s" % output_file 400 401 # Cleanup temporary working directory and icudtXXl.dat 402 shutil.rmtree(TMP_DAT_PATH) 403 os.remove(os.path.join(stubdata_dir, ICU_DATA + ".dat")) 404 405 if __name__ == "__main__": 406 main() 407