Home | History | Annotate | Download | only in tool
      1 package org.unicode.cldr.tool;
      2 
      3 import java.util.Collections;
      4 import java.util.HashMap;
      5 import java.util.HashSet;
      6 import java.util.LinkedHashMap;
      7 import java.util.LinkedHashSet;
      8 import java.util.List;
      9 import java.util.Locale;
     10 import java.util.Map;
     11 import java.util.Map.Entry;
     12 import java.util.Set;
     13 import java.util.TreeMap;
     14 import java.util.TreeSet;
     15 
     16 import org.unicode.cldr.util.Builder;
     17 import org.unicode.cldr.util.CLDRConfig;
     18 import org.unicode.cldr.util.CldrUtility;
     19 import org.unicode.cldr.util.LanguageTagParser;
     20 import org.unicode.cldr.util.StandardCodes;
     21 import org.unicode.cldr.util.StringIterables;
     22 
     23 import com.ibm.icu.impl.Row.R2;
     24 import com.ibm.icu.util.ULocale;
     25 
     26 public class LanguageCodeConverter {
     27     private static Map<String, String> languageNameToCode = new TreeMap<String, String>();
     28     private static Set<String> exceptionCodes = new TreeSet<String>();
     29     private static Set<String> parseErrors = new LinkedHashSet<String>();
     30 
     31     private static Map<String, R2<List<String>, String>> languageAliases = CLDRConfig.getInstance().getSupplementalDataInfo().getLocaleAliasInfo()
     32         .get("language");
     33 
     34     /**
     35      * Public only for testing.
     36      *
     37      * @internal
     38      */
     39     public static final Map<String, String> GOOGLE_CLDR = Builder.with(new LinkedHashMap<String, String>()) // preserve order
     40         .put("iw", "he")
     41         .put("jw", "jv")
     42         .put("no", "nb")
     43         .put("tl", "fil")
     44         .put("pt-BR", "pt")
     45         .put("xx-bork", "x_bork")
     46         .put("xx-elmer", "x_elmer")
     47         .put("xx-hacker", "x_hacker")
     48         .put("xx-pirate", "x_pirate")
     49         .put("xx-klingon", "tlh")
     50         .put("zh-CN", "zh")
     51         .put("zh-TW", "zh_Hant")
     52         .put("zh-HK", "zh_Hant_HK")
     53         .put("sit-NP", "lif")
     54         .put("ut", "und")
     55         .put("un", "und")
     56         .put("xx", "und")
     57 
     58         // .put("sh", "fil")
     59         .freeze();
     60 
     61     /**
     62      * Public only for testing.
     63      *
     64      * @internal
     65      */
     66     public static final Map<String, String> CLDR_GOOGLE = Builder.with(new HashMap<String, String>())
     67         .putAllTransposed(GOOGLE_CLDR)
     68         .freeze();
     69 
     70     /**
     71      * Public only for testing.
     72      *
     73      * @internal
     74      */
     75     public static final Map<String, String> EXTRA_SCRIPTS = Builder.with(new HashMap<String, String>())
     76         .on("crs", "pcm", "tlh").put("Latn")
     77         .freeze();
     78 
     79     static {
     80         // Reads the CLDR copy of
     81         // http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
     82         Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes.getLStreg();
     83         Map<String, Map<String, String>> languages = lstreg.get("language");
     84         Set<String> validCodes = new HashSet<String>();
     85 
     86         for (Entry<String, Map<String, String>> codeInfo : languages.entrySet()) {
     87             String code = codeInfo.getKey();
     88             R2<List<String>, String> replacement = languageAliases.get(code);
     89             // Returns "sh" -> <{"sr_Latn"}, reason>
     90             if (replacement != null) {
     91                 List<String> replacements = replacement.get0();
     92                 if (replacements.size() != 1) {
     93                     continue;
     94                 }
     95                 code = replacements.get(0);
     96                 if (code.contains("_")) {
     97                     continue;
     98                 }
     99             }
    100             // if (languageAliases.containsKey(code)) {
    101             // continue;
    102             // }
    103             final Map<String, String> info = codeInfo.getValue();
    104             String deprecated = info.get("Deprecated");
    105             if (deprecated != null) {
    106                 continue;
    107             }
    108             String name = info.get("Description");
    109             if (name.equals("Private use")) {
    110                 continue;
    111             }
    112             validCodes.add(code);
    113             if (name.contains(StandardCodes.DESCRIPTION_SEPARATOR)) {
    114                 for (String namePart : name.split(StandardCodes.DESCRIPTION_SEPARATOR)) {
    115                     addNameToCode("lstr", code, namePart);
    116                 }
    117             } else {
    118                 addNameToCode("lstr", code, name);
    119             }
    120         }
    121 
    122         // CLDRFile english; // = testInfo.getEnglish();
    123         for (String code : validCodes) {
    124             String icuName = ULocale.getDisplayName(code, "en");
    125             addNameToCode("cldr", code, icuName);
    126             // if (languageAliases.containsKey(code)) {
    127             // continue;
    128             // }
    129             // String cldrName = english.getName("language", code);
    130             // if (cldrName != null && !cldrName.equals("private-use")) {
    131             // addNameToCode("cldr", code, cldrName);
    132             // }
    133         }
    134         // add exceptions
    135         LanguageTagParser ltp = new LanguageTagParser();
    136         for (String line : StringIterables.in(CldrUtility.getUTF8Data("external/alternate_language_names.txt"))) {
    137             String[] parts = CldrUtility.cleanSemiFields(line);
    138             if (parts == null || parts.length == 0) continue;
    139             String code = parts[0];
    140             if (!validCodes.contains(code)) {
    141                 if (code.equals("*OMIT")) {
    142                     parseErrors.add("Skipping " + line);
    143                     continue;
    144                 }
    145                 String base = ltp.set(code).getLanguage();
    146                 if (!validCodes.contains(base)) {
    147                     R2<List<String>, String> alias = languageAliases.get(base);
    148                     if (alias != null) {
    149                         code = alias.get0().get(0);
    150                     } else {
    151                         parseErrors.add("Skipping " + line);
    152                         continue;
    153                     }
    154                 }
    155             }
    156             exceptionCodes.add(toUnderbarLocale(code));
    157             if (parts.length < 2) {
    158                 continue;
    159             }
    160             String name = parts[1];
    161             if (parts.length > 2) {
    162                 name += ";" + parts[2]; // HACK
    163             }
    164             addNameToCode("exception", code, name);
    165         }
    166         for (String cldr : GOOGLE_CLDR.values()) {
    167             String goodCode = toUnderbarLocale(cldr);
    168             exceptionCodes.add(goodCode);
    169         }
    170         languageNameToCode = Collections.unmodifiableMap(languageNameToCode);
    171         exceptionCodes = Collections.unmodifiableSet(exceptionCodes);
    172         parseErrors = Collections.unmodifiableSet(parseErrors);
    173     }
    174 
    175     private static void addNameToCode(final String type, final String code, String name) {
    176         if (code.equals("mru") && name.equals("mru")) {
    177             // mru=Mono (Cameroon)
    178             // mro=Mru
    179             // Ignore the CLDR mapping of the code to itself,
    180             // to avoid clobbering the mapping of the real name Mru to the real code mro.
    181             return;
    182         }
    183         name = name.toLowerCase(Locale.ENGLISH);
    184         String oldCode = languageNameToCode.get(name);
    185         if (oldCode != null) {
    186             if (!oldCode.equals(code)) {
    187                 parseErrors.add("Name Collision! " + type + ": " + name + " <" + oldCode + ", " + code + ">");
    188             } else {
    189                 return;
    190             }
    191         }
    192         languageNameToCode.put(name, code);
    193     }
    194 
    195     public static String toGoogleLocaleId(String localeId) {
    196         // TODO fix to do languages, etc. field by field
    197         localeId = localeId.replace("-", "_");
    198         String result = CLDR_GOOGLE.get(localeId);
    199         result = result == null ? localeId : result;
    200         return result.replace("_", "-");
    201     }
    202 
    203     public static String fromGoogleLocaleId(String localeId) {
    204         localeId = localeId.replace("_", "-");
    205         // TODO fix to do languages, etc. field by field
    206         String result = GOOGLE_CLDR.get(localeId);
    207         result = result == null ? localeId : result;
    208         return result.replace("-", "_");
    209     }
    210 
    211     public static String toUnderbarLocale(String localeId) {
    212         return localeId.replace("-", "_");
    213     }
    214 
    215     public static String toHyphenLocale(String localeId) {
    216         return localeId.replace("_", "-");
    217     }
    218 
    219     public static String getCodeForName(String languageName) {
    220         return languageNameToCode.get(languageName.toLowerCase(Locale.ENGLISH));
    221     }
    222 
    223     public static Set<String> getExceptionCodes() {
    224         return exceptionCodes;
    225     }
    226 
    227     public static Set<String> getParseErrors() {
    228         return parseErrors;
    229     }
    230 
    231     public static Map<String, String> getLanguageNameToCode() {
    232         return languageNameToCode;
    233     }
    234 
    235 }
    236