Home | History | Annotate | Download | only in tool
      1 /*
      2  * Created on May 19, 2005
      3  * Copyright (C) 2004-2011, Unicode, Inc., International Business Machines Corporation, and others.
      4  * For terms of use, see http://www.unicode.org/terms_of_use.html
      5  */
      6 package org.unicode.cldr.tool;
      7 
      8 import java.util.HashMap;
      9 import java.util.Iterator;
     10 import java.util.Map;
     11 import java.util.Set;
     12 import java.util.TreeMap;
     13 import java.util.TreeSet;
     14 
     15 import org.unicode.cldr.icu.CollationMapper;
     16 import org.unicode.cldr.icu.IcuData;
     17 import org.unicode.cldr.util.LocaleIDParser;
     18 import org.unicode.cldr.util.Log;
     19 
     20 import com.ibm.icu.text.Collator;
     21 import com.ibm.icu.text.RuleBasedCollator;
     22 import com.ibm.icu.text.Transliterator;
     23 import com.ibm.icu.util.ULocale;
     24 
     25 public class GenerateCldrCollationTests {
     26     String sourceDir;
     27     Set<String> validLocales = new TreeSet<String>();
     28     Map<String, Object> ulocale_rules = new TreeMap<String, Object>(GenerateCldrTests.ULocaleComparator);
     29     Map<String, Map<String, RuleBasedCollator>> locale_types_rules = new TreeMap<String, Map<String, RuleBasedCollator>>();
     30     Map<RuleBasedCollator, RuleBasedCollator> collation_collation = new HashMap<RuleBasedCollator, RuleBasedCollator>();
     31     RuleBasedCollator emptyCollator = (RuleBasedCollator) Collator.getInstance(new ULocale(""));
     32 
     33     public Set<String> getAvailableSet() {
     34         return ulocale_rules.keySet();
     35     }
     36 
     37     public RuleBasedCollator getInstance(String locale) {
     38         return (RuleBasedCollator) ulocale_rules.get(locale);
     39     }
     40 
     41     void show() {
     42         Log.logln("Showing Locales");
     43         Log.logln("Unique Collators: " + collation_collation.size());
     44         for (Iterator it2 = ulocale_rules.keySet().iterator(); it2.hasNext();) {
     45             ULocale locale = (ULocale) it2.next();
     46             RuleBasedCollator col = (RuleBasedCollator) ulocale_rules.get(locale);
     47             Log.logln("\t" + locale + ", " + col.getRules());
     48         }
     49     }
     50 
     51     GenerateCldrCollationTests(String sourceDir, String localeRegex, Set<String> locales) throws Exception {
     52         this.sourceDir = sourceDir;
     53         Set<String> s = GenerateCldrTests.getMatchingXMLFiles(sourceDir, localeRegex);
     54         for (Iterator<String> it = s.iterator(); it.hasNext();) {
     55             getCollationRules(it.next());
     56         }
     57 
     58         // now fixup the validLocales, adding in what they inherit
     59         // TODO, add check: validSubLocales are masked by intervening locales.
     60         for (Iterator<String> it = validLocales.iterator(); it.hasNext();) {
     61             String locale = it.next();
     62             Map<String, RuleBasedCollator> types_rules = locale_types_rules.get(locale);
     63             if (types_rules != null)
     64                 Log.logln("Weird: overlap in validLocales: " + locale);
     65             else {
     66                 for (String parentlocale = LocaleIDParser.getSimpleParent(locale); parentlocale != null; parentlocale = LocaleIDParser
     67                     .getSimpleParent(parentlocale)) {
     68                     types_rules = locale_types_rules.get(parentlocale);
     69                     if (types_rules != null) {
     70                         locale_types_rules.put(locale, types_rules);
     71                         break;
     72                     }
     73                 }
     74             }
     75         }
     76         // now generate the @-style locales
     77         ulocale_rules.put("root", Collator.getInstance(ULocale.ROOT));
     78 
     79         for (Iterator<String> it = locale_types_rules.keySet().iterator(); it.hasNext();) {
     80             String locale = it.next();
     81             Map<String, RuleBasedCollator> types_rules = locale_types_rules.get(locale);
     82             for (Iterator<String> it2 = types_rules.keySet().iterator(); it2.hasNext();) {
     83                 String type = it2.next();
     84                 // TODO fix HACK
     85                 if (type.equals("unihan")) {
     86                     if (!locale.startsWith("zh")) continue;
     87                 }
     88                 RuleBasedCollator col = (RuleBasedCollator) types_rules.get(type);
     89                 String name = type.equals("standard") ? locale : locale + "@collation=" + type;
     90                 ulocale_rules.put(name, col);
     91             }
     92         }
     93         // now flesh out
     94         // Collator root = Collator.getInstance(ULocale.ROOT);
     95         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
     96             String locale = it.next();
     97             if (ulocale_rules.get(locale) != null) continue;
     98             String parent = LocaleIDParser.getSimpleParent(locale); // GenerateCldrTests.getParent(locale);
     99             if (parent == null) continue;
    100             try {
    101                 ulocale_rules.put(locale, ulocale_rules.get(parent));
    102             } catch (RuntimeException e) {
    103                 throw e;
    104             }
    105         }
    106     }
    107 
    108     static Transliterator fromHex = Transliterator.getInstance("hex-any");
    109 
    110     private void getCollationRules(String locale) throws Exception {
    111         System.out.println("Loading collation:\t" + locale);
    112         CollationMapper mapper = new CollationMapper(sourceDir, null);
    113         StringBuilder stringBuilder = new StringBuilder();
    114         TreeMap<String, RuleBasedCollator> types_rules = new TreeMap<String, RuleBasedCollator>();
    115         IcuData[] dataList = mapper.fillFromCldr(locale);
    116         IcuData icuData = dataList[0];
    117         for (String rbPath : icuData.keySet()) {
    118             if (!rbPath.endsWith("/Sequence")) continue;
    119             // remove the \ u's, because they blow up
    120             stringBuilder.setLength(0);
    121             for (String line : icuData.get(rbPath).get(0)) {
    122                 stringBuilder.append(line);
    123             }
    124             String originalRules = stringBuilder.toString();
    125             String rules = fromHex.transliterate(originalRules);
    126             String name = rbPath.split("/")[2];
    127             RuleBasedCollator fixed = generateCollator(locale, name, rules);
    128             if (fixed != null) {
    129                 Log.logln("Rules for: " + locale + ", " + name);
    130                 Log.logln(rules);
    131                 if (!rules.equals(originalRules)) {
    132                     Log.logln("Original Rules from Ram: ");
    133                     Log.logln(originalRules);
    134                 }
    135                 types_rules.put(name, fixed);
    136             }
    137             locale_types_rules.put(locale, types_rules);
    138         }
    139         // now get the valid sublocales
    140         for (int i = 1; i < dataList.length; i++) {
    141             IcuData subLocale = dataList[i];
    142             Log.logln("Valid Sub Locale: " + subLocale.getName());
    143             validLocales.add(subLocale.getName());
    144         }
    145     }
    146 
    147     /**
    148      * @param locale
    149      * @param current
    150      * @param foo
    151      * @param rules
    152      */
    153     private RuleBasedCollator generateCollator(String locale, String current, String rules) {
    154         RuleBasedCollator fixed = null;
    155         try {
    156             if (rules.equals(""))
    157                 fixed = emptyCollator;
    158             else {
    159                 rules = GenerateCldrTests.replace(rules, "[optimize[", "[optimize [");
    160                 rules = GenerateCldrTests.replace(rules, "[suppressContractions[", "[suppressContractions [");
    161                 RuleBasedCollator col = new RuleBasedCollator(rules);
    162                 fixed = (RuleBasedCollator) collation_collation.get(col);
    163                 if (fixed == null) {
    164                     collation_collation.put(col, col);
    165                     fixed = col;
    166                 }
    167             }
    168         } catch (Exception e) {
    169             Log.logln("***Cannot create collator from: " + locale + ", " + current + ", " + rules);
    170             e.printStackTrace(Log.getLog());
    171             RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(new ULocale(locale));
    172             String oldrules = coll.getRules();
    173             Log.logln("Old ICU4J: " + oldrules);
    174             Log.logln("Equal?: " + oldrules.equals(rules));
    175         }
    176         return fixed;
    177     }
    178 }
    179