Home | History | Annotate | Download | only in tool
      1 package org.unicode.cldr.tool;
      2 
      3 import java.util.HashMap;
      4 import java.util.HashSet;
      5 import java.util.LinkedHashMap;
      6 import java.util.List;
      7 import java.util.Map;
      8 import java.util.Map.Entry;
      9 import java.util.Set;
     10 import java.util.TreeMap;
     11 import java.util.TreeSet;
     12 
     13 import org.unicode.cldr.util.CLDRFile;
     14 import org.unicode.cldr.util.CLDRPaths;
     15 import org.unicode.cldr.util.Factory;
     16 import org.unicode.cldr.util.LanguageTagParser;
     17 import org.unicode.cldr.util.LocaleIDParser;
     18 import org.unicode.cldr.util.SupplementalDataInfo;
     19 
     20 import com.ibm.icu.impl.Relation;
     21 import com.ibm.icu.impl.Row.R2;
     22 import com.ibm.icu.text.UnicodeSet;
     23 
     24 public class GenerateAliases {
     25     public static void main(String[] args) {
     26         new Builder().getAliases();
     27     }
     28 
     29     static class Builder {
     30         Map<String, String> aliasMap = new LinkedHashMap<String, String>();
     31         Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
     32 
     33         SupplementalDataInfo dataInfo = SupplementalDataInfo.getInstance();
     34         Set<String> defaultContents = dataInfo.getDefaultContentLocales();
     35         LikelySubtags likelySubtags = new LikelySubtags();
     36         Map<String, Map<String, R2<List<String>, String>>> aliasInfo = dataInfo.getLocaleAliasInfo();
     37 
     38         Relation<String, String> goodToBadLanguages = getGoodToBad(aliasInfo, "language");
     39         Relation<String, String> goodToBadTerritories = getGoodToBad(aliasInfo, "territory");
     40         Relation<String, String> goodToBadScripts = getGoodToBad(aliasInfo, "script");
     41 
     42         // sh //ldml/alias[@source="sr_Latn"][@path="//ldml"]
     43         LanguageTagParser ltp = new LanguageTagParser();
     44         final Set<String> available = factory.getAvailable();
     45 
     46         Builder() {
     47 
     48             for (String localeID : available) {
     49                 String targetID = getDefaultContents(localeID);
     50                 if (targetID == null) {
     51                     targetID = localeID;
     52                 }
     53                 addAlias("deprecated", localeID, targetID);
     54                 // special hack for sh
     55                 if (localeID.startsWith("sr_Latn")) {
     56                     addAlias("deprecated", "sh" + localeID.substring(7), localeID);
     57                 }
     58             }
     59 
     60             Map<String, String> likely = new TreeMap<String, String>();
     61 
     62             // get all the combinations
     63             for (String max : likelySubtags.getToMaximized().values()) {
     64                 likely.put(max, getDefaultContents(max));
     65                 ltp.set(max);
     66                 ltp.setScript("");
     67                 addToLikely(likely);
     68                 ltp.set(max);
     69                 ltp.setRegion("");
     70                 addToLikely(likely);
     71                 ltp.setScript("");
     72                 addToLikely(likely);
     73             }
     74 
     75             for (Entry<String, String> small2large : likely.entrySet()) {
     76                 String localeID = small2large.getKey();
     77                 String targetID = small2large.getValue();
     78                 if (localeID.equals(targetID)) {
     79                     continue;
     80                 }
     81                 String base = ltp.set(localeID).getLanguage();
     82                 if (!available.contains(base)) { // skip seed locales
     83                     continue;
     84                 }
     85                 // if (!localeID.contains("_")) {
     86                 // continue; // skip languages not represented
     87                 // }
     88                 if (available.contains(localeID) && !isWholeAlias(factory, localeID)) {
     89                     continue;
     90                 }
     91                 targetID = getDefaultContents(targetID);
     92                 addAlias("default", localeID, targetID);
     93             }
     94 
     95             for (String localeID : available) {
     96                 if (aliasMap.get(localeID) != null) {
     97                     continue;
     98                 }
     99                 if (isWholeAlias(factory, localeID)) {
    100                     System.out.println("missing" + "\t" + localeID);
    101                 }
    102             }
    103 
    104             // System.out.println(CollectionUtilities.join(aliasMap.entrySet(), "\n"));
    105         }
    106 
    107         private void addToLikely(Map<String, String> likely) {
    108             String partial = ltp.toString();
    109             final String target = getDefaultContents(partial);
    110             String parent = LocaleIDParser.getSimpleParent(partial);
    111             if (target.equals(parent)) {
    112                 return;
    113             }
    114             likely.put(partial, target);
    115         }
    116 
    117         static final Set<String> HAS_MULTIPLE_SCRIPTS = org.unicode.cldr.util.Builder.with(new HashSet<String>())
    118             .addAll("ha", "ku", "zh", "sr", "uz", "sh").freeze();
    119 
    120         private boolean hasMultipleScripts(String localeID) {
    121             LanguageTagParser ltp = new LanguageTagParser().set(localeID);
    122             return HAS_MULTIPLE_SCRIPTS.contains(ltp.getLanguage());
    123         }
    124 
    125         private String getDefaultContents(String localeID) {
    126             String targetID = hasMultipleScripts(localeID) ? likelySubtags.maximize(localeID) : likelySubtags
    127                 .minimize(localeID);
    128 
    129             if (targetID == null) {
    130                 System.out.println("missingLikely" + "\t" + localeID);
    131                 return localeID;
    132             }
    133             while (defaultContents.contains(targetID)) {
    134                 String parent = LocaleIDParser.getSimpleParent(targetID);
    135                 if (parent == null || parent.equals("root)")) {
    136                     break;
    137                 }
    138                 targetID = parent;
    139             }
    140             return targetID;
    141         }
    142 
    143         public Map<String, String> getAliases() {
    144             return aliasMap;
    145         }
    146 
    147         static final UnicodeSet NUMBERS = new UnicodeSet("[0-9]");
    148 
    149         private Relation<String, String> getGoodToBad(Map<String, Map<String, R2<List<String>, String>>> aliasInfo,
    150             String tag) {
    151             Relation<String, String> result = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
    152             Map<String, R2<List<String>, String>> map = aliasInfo.get(tag);
    153             for (Entry<String, R2<List<String>, String>> entity : map.entrySet()) {
    154                 final String key = entity.getKey();
    155                 final R2<List<String>, String> listAndReason = entity.getValue();
    156                 final List<String> list = listAndReason.get0();
    157                 final String reason = listAndReason.get1();
    158                 if (reason.equals("overlong")) {
    159                     continue;
    160                 }
    161                 if (list == null) {
    162                     continue;
    163                 }
    164                 if (NUMBERS.containsAll(key)) { // special check for items like 172
    165                     continue;
    166                 }
    167                 result.put(list.iterator().next(), key);
    168             }
    169             return result;
    170         }
    171 
    172         private void addAlias(String title, String localeID, String targetID) {
    173             ltp.set(localeID);
    174             Set<String> languages = addExtras(ltp.getLanguage(), goodToBadLanguages);
    175             Set<String> scripts = addExtras(ltp.getScript(), goodToBadScripts);
    176             Set<String> territories = addExtras(ltp.getRegion(), goodToBadTerritories);
    177             for (String language : languages) {
    178                 try {
    179                     ltp.set(language); // whole language tag
    180                 } catch (Exception e) {
    181                     continue;
    182                 }
    183                 if (!ltp.getVariants().isEmpty()) { // skip variants
    184                     continue;
    185                 }
    186                 for (String script : scripts) {
    187                     ltp.setScript(script);
    188                     for (String territory : territories) {
    189                         ltp.setRegion(territory);
    190                         String newTag = ltp.toString().replace('-', '_');
    191                         main: {
    192                             if (newTag.equals(targetID)) {
    193                                 break main;
    194                             }
    195                             String old = aliasMap.get(newTag);
    196                             if (old != null) {
    197                                 if (!old.equals(targetID)) {
    198                                     System.out.println(newTag + "\t\t" + targetID + "\tconflict with\t" + old);
    199                                 }
    200                                 break main;
    201                             }
    202                             final boolean wholeAlias = isWholeAlias(factory, newTag);
    203                             if (!available.contains(newTag) || wholeAlias) {
    204                                 System.out.println(title + "\t" + newTag + "\t\t" + targetID
    205                                     + (wholeAlias ? "\talias-already" : ""));
    206                                 aliasMap.put(newTag, targetID);
    207                             }
    208                         }
    209                     }
    210                 }
    211             }
    212         }
    213 
    214         /*
    215          * Problems
    216          * missingLikely tl
    217          * missingLikely tl_PH
    218          * sr_YU -> conflict with sr
    219          * sr_CS -> conflict with sr_Cyrl_CS
    220          * sr_CS -> conflict with sr_Cyrl_CS
    221          * sh_CS -> conflict with sr_Latn_CS
    222          * sh_YU -> conflict with sr_Latn_RS
    223          */
    224 
    225         private Set<String> addExtras(String language, Relation<String, String> goodToBadLanguages) {
    226             Set<String> languages = new TreeSet<String>();
    227             languages.add(language);
    228             Set<String> badLanguages = goodToBadLanguages.get(language);
    229             if (badLanguages != null) {
    230                 languages.addAll(badLanguages);
    231             }
    232             return languages;
    233         }
    234 
    235         Map<String, Boolean> wholeAliasCache = new HashMap<String, Boolean>();
    236 
    237         private boolean isWholeAlias(Factory factory, String localeID) {
    238             Boolean result = wholeAliasCache.get(localeID);
    239             if (result != null) {
    240                 return result;
    241             }
    242             CLDRFile cldrFile;
    243             try {
    244                 cldrFile = factory.make(localeID, false);
    245             } catch (Exception e) {
    246                 wholeAliasCache.put(localeID, false);
    247                 return false;
    248             }
    249             for (String path : cldrFile) {
    250                 if (path.startsWith("//ldml/identity")) {
    251                     continue;
    252                 } else if (path.startsWith("//ldml/alias")) {
    253                     wholeAliasCache.put(localeID, true);
    254                     return true;
    255                 }
    256                 wholeAliasCache.put(localeID, false);
    257                 return false;
    258             }
    259             wholeAliasCache.put(localeID, false);
    260             return false;
    261         }
    262     }
    263 }
    264