Home | History | Annotate | Download | only in util
      1 /*
      2  **********************************************************************
      3  * Copyright (c) 2002-2011, International Business Machines
      4  * Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  * Author: Mark Davis
      7  **********************************************************************
      8  */
      9 package org.unicode.cldr.util;
     10 
     11 import java.io.BufferedReader;
     12 import java.io.IOException;
     13 import java.util.ArrayList;
     14 import java.util.Arrays;
     15 import java.util.Collections;
     16 import java.util.Comparator;
     17 import java.util.EnumMap;
     18 import java.util.EnumSet;
     19 import java.util.HashMap;
     20 import java.util.HashSet;
     21 import java.util.Iterator;
     22 import java.util.LinkedHashMap;
     23 import java.util.LinkedHashSet;
     24 import java.util.List;
     25 import java.util.Locale;
     26 import java.util.Map;
     27 import java.util.Map.Entry;
     28 import java.util.Set;
     29 import java.util.TreeMap;
     30 import java.util.TreeSet;
     31 import java.util.regex.Pattern;
     32 
     33 import org.unicode.cldr.draft.ScriptMetadata;
     34 import org.unicode.cldr.draft.ScriptMetadata.IdUsage;
     35 import org.unicode.cldr.util.Iso639Data.Type;
     36 import org.unicode.cldr.util.ZoneParser.ZoneLine;
     37 
     38 import com.ibm.icu.impl.Relation;
     39 import com.ibm.icu.lang.UCharacter;
     40 import com.ibm.icu.text.UnicodeSet;
     41 import com.ibm.icu.util.ICUUncheckedIOException;
     42 import com.ibm.icu.util.Output;
     43 
     44 /**
     45  * Provides access to various codes used by CLDR: RFC 3066, ISO 4217, Olson
     46  * tzids
     47  */
     48 public class StandardCodes {
     49 
     50     public enum CodeType {
     51         language, script, territory, extlang, grandfathered, redundant, variant, currency, tzid;
     52         public static CodeType from(String name) {
     53             if ("region".equals(name)) {
     54                 return territory;
     55             }
     56             return CodeType.valueOf(name);
     57         }
     58     }
     59 
     60     private static final Set<CodeType> TypeSet = Collections.unmodifiableSet(EnumSet.allOf(CodeType.class));
     61 
     62     private static final Set<String> TypeStringSet;
     63     static {
     64         LinkedHashSet<String> foo = new LinkedHashSet<String>();
     65         for (CodeType x : CodeType.values()) {
     66             foo.add(x.toString());
     67         }
     68         TypeStringSet = Collections.unmodifiableSet(foo);
     69     }
     70 
     71     public static final String DESCRIPTION_SEPARATOR = "\u25AA";
     72 
     73     public static final String NO_COUNTRY = "001";
     74 
     75     private static StandardCodes singleton;
     76 
     77     private EnumMap<CodeType, Map<String, List<String>>> type_code_data = new EnumMap<CodeType, Map<String, List<String>>>(
     78         CodeType.class);
     79 
     80     private EnumMap<CodeType, Map<String, List<String>>> type_name_codes = new EnumMap<CodeType, Map<String, List<String>>>(
     81         CodeType.class);
     82 
     83     private EnumMap<CodeType, Map<String, String>> type_code_preferred = new EnumMap<CodeType, Map<String, String>>(
     84         CodeType.class);
     85 
     86     private Map<String, Set<String>> country_modernCurrency = new TreeMap<String, Set<String>>();
     87 
     88     private Map<CodeType, Set<String>> goodCodes = new TreeMap<CodeType, Set<String>>();
     89 
     90     private static final boolean DEBUG = false;
     91 
     92     /**
     93      * Get the singleton copy of the standard codes.
     94      */
     95     static public synchronized StandardCodes make() {
     96         if (singleton == null)
     97             singleton = new StandardCodes();
     98         return singleton;
     99     }
    100 
    101     /**
    102      * The data is the name in the case of RFC3066 codes, and the country code in
    103      * the case of TZIDs and ISO currency codes. If the country code is missing,
    104      * uses ZZ.
    105      */
    106     public String getData(String type, String code) {
    107         Map<String, List<String>> code_data = getCodeData(type);
    108         if (code_data == null)
    109             return null;
    110         List<String> list = code_data.get(code);
    111         if (list == null)
    112             return null;
    113         return list.get(0);
    114     }
    115 
    116     /**
    117      * @return the full data for the type and code For the data in lstreg, it is
    118      *         description | date | canonical_value | recommended_prefix #
    119      *         comments
    120      */
    121     public List<String> getFullData(String type, String code) {
    122         Map<String, List<String>> code_data = getCodeData(type);
    123         if (code_data == null)
    124             return null;
    125         return code_data.get(code);
    126     }
    127 
    128     /**
    129      * @return the full data for the type and code For the data in lstreg, it is
    130      *         description | date | canonical_value | recommended_prefix #
    131      *         comments
    132      */
    133     public List<String> getFullData(CodeType type, String code) {
    134         Map<String, List<String>> code_data = type_code_data.get(type);
    135         if (code_data == null)
    136             return null;
    137         return code_data.get(code);
    138     }
    139 
    140     private Map<String, List<String>> getCodeData(String type) {
    141         return getCodeData(CodeType.from(type));
    142     }
    143 
    144     private Map<String, List<String>> getCodeData(CodeType type) {
    145         return type_code_data.get(type);
    146     }
    147 
    148     /**
    149      * Get at the language registry values, as a Map from label to value.
    150      *
    151      * @param type
    152      * @param code
    153      * @return
    154      */
    155     public Map<String, String> getLangData(String type, String code) {
    156         try {
    157             if (type.equals("territory"))
    158                 type = "region";
    159             else if (type.equals("variant")) code = code.toLowerCase(Locale.ENGLISH);
    160             return (Map) ((Map) getLStreg().get(type)).get(code);
    161         } catch (RuntimeException e) {
    162             return null;
    163         }
    164     }
    165 
    166     /**
    167      * Return a replacement code, if available. If not, return null.
    168      *
    169      */
    170     public String getReplacement(String type, String code) {
    171         if (type.equals("currency"))
    172             return null; // no replacement codes for currencies
    173         List<String> data = getFullData(type, code);
    174         if (data == null)
    175             return null;
    176         // if available, the replacement is a non-empty value other than --, in
    177         // position 2.
    178         if (data.size() < 3)
    179             return null;
    180         String replacement = (String) data.get(2);
    181         if (!replacement.equals("") && !replacement.equals("--"))
    182             return replacement;
    183         return null;
    184     }
    185 
    186     /**
    187      * Return the list of codes that have the same data. For example, returns all
    188      * currency codes for a country. If there is a preferred one, it is first.
    189      *
    190      * @param type
    191      * @param data
    192      * @return
    193      */
    194     @Deprecated
    195     public List<String> getCodes(String type, String data) {
    196         return getCodes(CodeType.valueOf(type), data);
    197     }
    198 
    199     /**
    200      * Return the list of codes that have the same data. For example, returns all
    201      * currency codes for a country. If there is a preferred one, it is first.
    202      */
    203     public List<String> getCodes(CodeType type, String data) {
    204         Map<String, List<String>> data_codes = type_name_codes.get(type);
    205         if (data_codes == null)
    206             return null;
    207         return Collections.unmodifiableList(data_codes.get(data));
    208     }
    209 
    210     /**
    211      * Where there is a preferred code, return it.
    212      */
    213     @Deprecated
    214     public String getPreferred(String type, String code) {
    215         return getPreferred(CodeType.valueOf(type), code);
    216     }
    217 
    218     /**
    219      * Where there is a preferred code, return it.
    220      */
    221 
    222     public String getPreferred(CodeType type, String code) {
    223         Map<String, String> code_preferred = type_code_preferred.get(type);
    224         if (code_preferred == null)
    225             return code;
    226         String newCode = code_preferred.get(code);
    227         if (newCode == null)
    228             return code;
    229         return newCode;
    230     }
    231 
    232     /**
    233      * Get all the available types
    234      */
    235     public Set<String> getAvailableTypes() {
    236         return TypeStringSet;
    237     }
    238 
    239     /**
    240      * Get all the available types
    241      */
    242     public Set<CodeType> getAvailableTypesEnum() {
    243         return TypeSet;
    244     }
    245 
    246     /**
    247      * Get all the available codes for a given type
    248      *
    249      * @param type
    250      * @return
    251      */
    252     public Set<String> getAvailableCodes(String type) {
    253         return getAvailableCodes(CodeType.from(type));
    254     }
    255 
    256     /**
    257      * Get all the available codes for a given type
    258      *
    259      * @param type
    260      * @return
    261      */
    262     public Set<String> getAvailableCodes(CodeType type) {
    263         Map<String, List<String>> code_name = type_code_data.get(type);
    264         return Collections.unmodifiableSet(code_name.keySet());
    265     }
    266 
    267     public Set<String> getGoodAvailableCodes(String stringType) {
    268         return getGoodAvailableCodes(CodeType.from(stringType));
    269     }
    270 
    271     /**
    272      * Get all the available "real" codes for a given type, excluding private use,
    273      * but including some deprecated codes. Use SupplementalDataInfo getLocaleAliases to
    274      * exclude others.
    275      *
    276      * @param type
    277      * @return
    278      */
    279     public Set<String> getGoodAvailableCodes(CodeType type) {
    280         Set<String> result = goodCodes.get(type);
    281         if (result == null) {
    282             synchronized (goodCodes) {
    283                 Map<String, List<String>> code_name = getCodeData(type);
    284                 SupplementalDataInfo sd = SupplementalDataInfo.getInstance();
    285                 if (code_name == null)
    286                     return null;
    287                 result = new TreeSet<String>(code_name.keySet());
    288                 switch (type) {
    289                 case currency:
    290                     break; // nothing special
    291                 case language:
    292                     return sd.getCLDRLanguageCodes();
    293                 case script:
    294                     return sd.getCLDRScriptCodes();
    295                 case tzid:
    296                     break; // nothing special
    297                 default:
    298                     for (Iterator<String> it = result.iterator(); it.hasNext();) {
    299                         String code = (String) it.next();
    300                         if (code.equals("root") || code.equals("QO"))
    301                             continue;
    302                         List<String> data = getFullData(type, code);
    303                         if (data.size() < 3) {
    304                             if (DEBUG)
    305                                 System.out.println(code + "\t" + data);
    306                         }
    307                         if ("PRIVATE USE".equalsIgnoreCase(data.get(0))
    308                             || (!data.get(2).equals("") && !data.get(2).equals("--"))) {
    309                             // System.out.println("Removing: " + code);
    310                             it.remove();
    311                         }
    312                     }
    313                 }
    314                 result = Collections.unmodifiableSet(result);
    315                 goodCodes.put(type, result);
    316             }
    317         }
    318         return result;
    319     }
    320 
    321     private static Set<String> GOOD_COUNTRIES;
    322 
    323     public Set<String> getGoodCountries() {
    324         synchronized (goodCodes) {
    325             if (GOOD_COUNTRIES == null) {
    326                 Set<String> temp = new LinkedHashSet<String>();
    327                 for (String s : getGoodAvailableCodes(CodeType.territory)) {
    328                     if (isCountry(s)) {
    329                         temp.add(s);
    330                     }
    331                 }
    332                 GOOD_COUNTRIES = Collections.unmodifiableSet(temp);
    333             }
    334         }
    335         return GOOD_COUNTRIES;
    336     }
    337 
    338     /**
    339      * Gets the modern currency.
    340      */
    341     public Set<String> getMainCurrencies(String countryCode) {
    342         return country_modernCurrency.get(countryCode);
    343     }
    344 
    345     private EnumMap<Organization, Map<String, Level>> platform_locale_level = null;
    346     private EnumMap<Organization, Relation<Level, String>> platform_level_locale = null;
    347     private Map<String, Map<String, String>> platform_locale_levelString = null;
    348 
    349 //    /**
    350 //     * Get rid of this
    351 //     *
    352 //     * @param type
    353 //     * @return
    354 //     * @throws IOException
    355 //     * @deprecated
    356 //     */
    357 //    public String getEffectiveLocaleType(String type) throws IOException {
    358 //        if ((type != null) && (getLocaleCoverageOrganizations().contains(Organization.valueOf(type)))) {
    359 //            return type;
    360 //        } else {
    361 //            return null; // the default.. for now..
    362 //        }
    363 //    }
    364 
    365     static Comparator caseless = new Comparator() {
    366 
    367         public int compare(Object arg0, Object arg1) {
    368             String s1 = (String) arg0;
    369             String s2 = (String) arg1;
    370             return s1.compareToIgnoreCase(s2);
    371         }
    372 
    373     };
    374 
    375     /**
    376      * Returns locales according to status. It returns a Map of Maps, key 1 is
    377      * either IBM or Java (perhaps more later), key 2 is the Level.
    378      *
    379      * @deprecated
    380      */
    381     public Map<Organization, Map<String, Level>> getLocaleTypes() {
    382         synchronized (StandardCodes.class) {
    383             if (platform_locale_level == null) {
    384                 loadPlatformLocaleStatus();
    385             }
    386         }
    387         return platform_locale_level;
    388     }
    389 
    390     /**
    391      * Return map of locales to levels
    392      * @param org
    393      * @return
    394      */
    395     public Map<String, Level> getLocaleToLevel(Organization org) {
    396         return getLocaleTypes().get(org);
    397     }
    398 
    399     public Level getLocaleCoverageLevel(String organization, String desiredLocale) {
    400         return getLocaleCoverageLevel(Organization.fromString(organization), desiredLocale);
    401     }
    402 
    403     public Level getLocaleCoverageLevel(Organization organization, String desiredLocale) {
    404         return getLocaleCoverageLevel(organization, desiredLocale, new Output<LocaleCoverageType>());
    405     }
    406 
    407     public enum LocaleCoverageType {
    408         explicit, parent, star, undetermined
    409     }
    410 
    411     /**
    412      * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing.
    413      * A locale of "*" in the data means "everything else".
    414      */
    415     public Level getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType) {
    416         synchronized (StandardCodes.class) {
    417             if (platform_locale_level == null) {
    418                 loadPlatformLocaleStatus();
    419             }
    420         }
    421         coverageType.value = LocaleCoverageType.undetermined;
    422         if (organization == null) {
    423             return Level.UNDETERMINED;
    424         }
    425         Map<String, Level> locale_status = platform_locale_level.get(organization);
    426         if (locale_status == null) {
    427             return Level.UNDETERMINED;
    428         }
    429         // see if there is a parent
    430         String originalLocale = desiredLocale;
    431         while (desiredLocale != null) {
    432             Level status = locale_status.get(desiredLocale);
    433             if (status != null && status != Level.UNDETERMINED) {
    434                 coverageType.value = originalLocale == desiredLocale ? LocaleCoverageType.explicit : LocaleCoverageType.parent;
    435                 return status;
    436             }
    437             desiredLocale = LocaleIDParser.getParent(desiredLocale);
    438         }
    439         Level status = locale_status.get("*");
    440         if (status != null && status != Level.UNDETERMINED) {
    441             coverageType.value = LocaleCoverageType.star;
    442             return status;
    443         }
    444         return Level.UNDETERMINED;
    445     }
    446 
    447     /**
    448      * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing.
    449      */
    450     public Level getDefaultLocaleCoverageLevel(Organization organization) {
    451         return getLocaleCoverageLevel(organization, "*");
    452     }
    453 
    454     public Set<Organization> getLocaleCoverageOrganizations() {
    455         synchronized (StandardCodes.class) {
    456             if (platform_locale_level == null) {
    457                 loadPlatformLocaleStatus();
    458             }
    459         }
    460         return platform_locale_level.keySet();
    461     }
    462 
    463     public Set<String> getLocaleCoverageOrganizationStrings() {
    464         synchronized (StandardCodes.class) {
    465             if (platform_locale_level == null) {
    466                 loadPlatformLocaleStatus();
    467             }
    468         }
    469         return platform_locale_levelString.keySet();
    470     }
    471 
    472     public Set<String> getLocaleCoverageLocales(String organization) {
    473         return getLocaleCoverageLocales(Organization.fromString(organization));
    474     }
    475 
    476     public Set<String> getLocaleCoverageLocales(Organization organization) {
    477         synchronized (StandardCodes.class) {
    478             if (platform_locale_level == null) {
    479                 loadPlatformLocaleStatus();
    480             }
    481         }
    482         return platform_locale_level.get(organization).keySet();
    483     }
    484 
    485     public Relation<Level, String> getLevelsToLocalesFor(Organization organization) {
    486         synchronized (StandardCodes.class) {
    487             if (platform_level_locale == null) {
    488                 loadPlatformLocaleStatus();
    489             }
    490         }
    491         return platform_level_locale.get(organization);
    492     }
    493 
    494     public Set<String> getLocaleCoverageLocales(Organization organization, Set<Level> choice) {
    495         Set<String> result = new LinkedHashSet<String>();
    496         for (String locale : getLocaleCoverageLocales(organization)) {
    497             if (choice.contains(getLocaleCoverageLevel(organization, locale))) {
    498                 result.add(locale);
    499             }
    500         }
    501         return result;
    502     }
    503 
    504     private void loadPlatformLocaleStatus() {
    505         LocaleIDParser parser = new LocaleIDParser();
    506         platform_locale_level = new EnumMap<Organization, Map<String, Level>>(Organization.class);
    507         SupplementalDataInfo sd = SupplementalDataInfo.getInstance();
    508         Set<String> defaultContentLocales = sd.getDefaultContentLocales();
    509         String line;
    510         try {
    511             BufferedReader lstreg = CldrUtility.getUTF8Data("Locales.txt");
    512             while (true) {
    513                 line = lstreg.readLine();
    514                 if (line == null)
    515                     break;
    516                 int commentPos = line.indexOf('#');
    517                 if (commentPos >= 0) {
    518                     line = line.substring(0, commentPos);
    519                 }
    520                 line = line.trim();
    521                 if (line.length() == 0)
    522                     continue;
    523                 List<String> stuff = CldrUtility.splitList(line, ';', true);
    524                 Organization organization;
    525 
    526                 // verify that the organization is valid
    527                 try {
    528                     organization = Organization.fromString(stuff.get(0));
    529                 } catch (Exception e) {
    530                     throw new IllegalArgumentException("Invalid organization in Locales.txt: " + line);
    531                 }
    532 
    533                 // verify that the locale is valid BCP47
    534                 String locale = (String) stuff.get(1);
    535                 if (!locale.equals("*")) {
    536                     parser.set(locale);
    537                     String valid = validate(parser);
    538                     if (valid.length() != 0) {
    539                         throw new IllegalArgumentException("Invalid locale in Locales.txt: " + line);
    540                     }
    541                     locale = parser.toString(); // normalize
    542 
    543                     // verify that the locale is not a default content locale
    544                     if (defaultContentLocales.contains(locale)) {
    545                         throw new IllegalArgumentException("Cannot have default content locale in Locales.txt: " + line);
    546                     }
    547                 }
    548 
    549                 Level status = Level.get((String) stuff.get(2));
    550                 if (status == Level.UNDETERMINED) {
    551                     System.out.println("Warning: Level unknown on: " + line);
    552                 }
    553                 Map<String, Level> locale_status = platform_locale_level.get(organization);
    554                 if (locale_status == null) {
    555                     platform_locale_level.put(organization, locale_status = new TreeMap<String, Level>());
    556                 }
    557                 locale_status.put(locale, status);
    558                 if (!locale.equals("*")) {
    559                     String scriptLoc = parser.getLanguageScript();
    560                     if (locale_status.get(scriptLoc) == null)
    561                         locale_status.put(scriptLoc, status);
    562                     String lang = parser.getLanguage();
    563                     if (locale_status.get(lang) == null)
    564                         locale_status.put(lang, status);
    565                 }
    566             }
    567         } catch (IOException e) {
    568             throw new ICUUncheckedIOException("Internal Error", e);
    569         }
    570 
    571         // now reset the parent to be the max of the children
    572         for (Organization platform : platform_locale_level.keySet()) {
    573             Map<String, Level> locale_level = platform_locale_level.get(platform);
    574             for (String locale : locale_level.keySet()) {
    575                 parser.set(locale);
    576                 Level childLevel = locale_level.get(locale);
    577 
    578                 String language = parser.getLanguage();
    579                 if (!language.equals(locale)) {
    580                     Level languageLevel = (Level) locale_level.get(language);
    581                     if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) {
    582                         locale_level.put(language, childLevel);
    583                     }
    584                 }
    585                 String oldLanguage = language;
    586                 language = parser.getLanguageScript();
    587                 if (!language.equals(oldLanguage)) {
    588                     Level languageLevel = (Level) locale_level.get(language);
    589                     if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) {
    590                         locale_level.put(language, childLevel);
    591                     }
    592                 }
    593             }
    594         }
    595         // backwards compat hack
    596         platform_locale_levelString = new TreeMap<String, Map<String, String>>();
    597         platform_level_locale = new EnumMap<>(Organization.class);
    598         for (Organization platform : platform_locale_level.keySet()) {
    599             Map<String, String> locale_levelString = new TreeMap<String, String>();
    600             platform_locale_levelString.put(platform.toString(), locale_levelString);
    601             Map<String, Level> locale_level = platform_locale_level.get(platform);
    602             for (String locale : locale_level.keySet()) {
    603                 locale_levelString.put(locale, locale_level.get(locale).toString());
    604             }
    605             Relation level_locale = Relation.of(new EnumMap(Level.class), HashSet.class);
    606             level_locale.addAllInverted(locale_level).freeze();
    607             platform_level_locale.put(platform, level_locale);
    608         }
    609         CldrUtility.protectCollection(platform_level_locale);
    610         platform_locale_level = CldrUtility.protectCollection(platform_locale_level);
    611         platform_locale_levelString = CldrUtility.protectCollection(platform_locale_levelString);
    612     }
    613 
    614     private String validate(LocaleIDParser parser) {
    615         String message = "";
    616         String lang = parser.getLanguage();
    617         if (lang.length() == 0) {
    618             message += ", Missing language";
    619         } else if (!getAvailableCodes("language").contains(lang)) {
    620             message += ", Invalid language code: " + lang;
    621         }
    622         String script = parser.getScript();
    623         if (script.length() != 0 && !getAvailableCodes("script").contains(script)) {
    624             message += ", Invalid script code: " + script;
    625         }
    626         String territory = parser.getRegion();
    627         if (territory.length() != 0 && !getAvailableCodes("territory").contains(territory)) {
    628             message += ", Invalid territory code: " + lang;
    629         }
    630         return message.length() == 0 ? message : message.substring(2);
    631     }
    632 
    633     /**
    634      * Ascertain that the given locale in in the given group specified by the
    635      * organization
    636      *
    637      * @param locale
    638      * @param group
    639      * @param org
    640      * @return boolean
    641      */
    642     public boolean isLocaleInGroup(String locale, String group, Organization org) {
    643         return group.equals(getGroup(locale, org));
    644     }
    645 
    646     public boolean isLocaleInGroup(String locale, String group, String org) {
    647         return isLocaleInGroup(locale, group, Organization.fromString(org));
    648     }
    649 
    650     public String getGroup(String locale, String org) {
    651         return getGroup(locale, Organization.fromString(org));
    652     }
    653 
    654     /**
    655      * Gets the coverage group given a locale and org
    656      *
    657      * @param locale
    658      * @param org
    659      * @return group if availble, null if not
    660      */
    661     public String getGroup(String locale, Organization org) {
    662         Level l = getLocaleCoverageLevel(org, locale);
    663         if (l.equals(Level.UNDETERMINED)) {
    664             return null;
    665         } else {
    666             return l.toString();
    667         }
    668     }
    669 
    670     // ========== PRIVATES ==========
    671 
    672     private StandardCodes() {
    673         String[] files = { /* "lstreg.txt", */"ISO4217.txt" }; // , "TZID.txt"
    674         type_code_preferred.put(CodeType.tzid, new TreeMap<String, String>());
    675         add(CodeType.language, "root", "Root");
    676         String originalLine = null;
    677         for (int fileIndex = 0; fileIndex < files.length; ++fileIndex) {
    678             try {
    679                 BufferedReader lstreg = CldrUtility.getUTF8Data(files[fileIndex]);
    680                 while (true) {
    681                     String line = originalLine = lstreg.readLine();
    682                     if (line == null)
    683                         break;
    684                     if (line.startsWith("\uFEFF")) {
    685                         line = line.substring(1);
    686                     }
    687                     line = line.trim();
    688                     int commentPos = line.indexOf('#');
    689                     String comment = "";
    690                     if (commentPos >= 0) {
    691                         comment = line.substring(commentPos + 1).trim();
    692                         line = line.substring(0, commentPos);
    693                     }
    694                     if (line.length() == 0)
    695                         continue;
    696                     List<String> pieces = CldrUtility.splitList(line, '|', true,
    697                         new ArrayList<String>());
    698                     CodeType type = CodeType.from(pieces.get(0));
    699                     pieces.remove(0);
    700 
    701                     String code = pieces.get(0);
    702                     pieces.remove(0);
    703                     if (type.equals("date")) {
    704                         continue;
    705                     }
    706 
    707                     String oldName = pieces.get(0);
    708                     int pos = oldName.indexOf(';');
    709                     if (pos >= 0) {
    710                         oldName = oldName.substring(0, pos).trim();
    711                         pieces.set(0, oldName);
    712                     }
    713 
    714                     List<String> data = pieces;
    715                     if (comment.indexOf("deprecated") >= 0) {
    716                         // System.out.println(originalLine);
    717                         if (data.get(2).toString().length() == 0) {
    718                             data.set(2, "--");
    719                         }
    720                     }
    721                     if (oldName.equalsIgnoreCase("PRIVATE USE")) {
    722                         int separatorPos = code.indexOf("..");
    723                         if (separatorPos < 0) {
    724                             add(type, code, data);
    725                         } else {
    726                             String current = code.substring(0, separatorPos);
    727                             String end = code.substring(separatorPos + 2);
    728                             // System.out.println(">>" + code + "\t" + current + "\t" + end);
    729                             for (; current.compareTo(end) <= 0; current = nextAlpha(current)) {
    730                                 // System.out.println(">" + current);
    731                                 add(type, current, data);
    732                             }
    733                         }
    734                         continue;
    735                     }
    736                     if (!type.equals("tzid")) {
    737                         add(type, code, data);
    738                         if (type.equals("currency")) {
    739                             // currency | TPE | Timor Escudo | TP | EAST TIMOR | O
    740                             if (data.get(3).equals("C")) {
    741                                 String country = (String) data.get(1);
    742                                 Set<String> codes = country_modernCurrency.get(country);
    743                                 if (codes == null) {
    744                                     country_modernCurrency.put(country, codes = new TreeSet<String>());
    745                                 }
    746                                 codes.add(code);
    747                             }
    748                         }
    749                         continue;
    750                     }
    751                     // type = tzid
    752                     // List codes = (List) Utility.splitList(code, ',', true, new
    753                     // ArrayList());
    754                     String preferred = null;
    755                     for (int i = 0; i < pieces.size(); ++i) {
    756                         code = (String) pieces.get(i);
    757                         add(type, code, data);
    758                         if (preferred == null)
    759                             preferred = code;
    760                         else {
    761                             Map<String, String> code_preferred = type_code_preferred.get(type);
    762                             code_preferred.put(code, preferred);
    763                         }
    764                     }
    765                 }
    766                 lstreg.close();
    767             } catch (Exception e) {
    768                 System.err.println("WARNING: " + files[fileIndex]
    769                     + " may be a corrupted UTF-8 file. Please check.");
    770                 throw (IllegalArgumentException) new IllegalArgumentException(
    771                     "Can't read " + files[fileIndex] + "\t" + originalLine)
    772                         .initCause(e);
    773             }
    774             country_modernCurrency = CldrUtility.protectCollection(country_modernCurrency);
    775         }
    776 
    777         // data is: description | date | canonical_value | recommended_prefix #
    778         // comments
    779         // HACK, just rework
    780 
    781         Map<String, Map<String, Map<String, String>>> languageRegistry = getLStreg();
    782         // languageRegistry = CldrUtility.protectCollection(languageRegistry);
    783 
    784         for (String type : languageRegistry.keySet()) {
    785             CodeType type2 = CodeType.from(type);
    786             Map<String, Map<String, String>> m = languageRegistry.get(type);
    787             for (String code : m.keySet()) {
    788                 Map<String, String> mm = m.get(code);
    789                 List<String> data = new ArrayList<String>(0);
    790                 data.add(mm.get("Description"));
    791                 data.add(mm.get("Added"));
    792                 String pref = mm.get("Preferred-Value");
    793                 if (pref == null) {
    794                     pref = mm.get("Deprecated");
    795                     if (pref == null)
    796                         pref = "";
    797                     else
    798                         pref = "deprecated";
    799                 }
    800                 data.add(pref);
    801                 if (type.equals("variant")) {
    802                     code = code.toUpperCase();
    803                 }
    804                 // data.add(mm.get("Recommended_Prefix"));
    805                 // {"region", "BQ", "Description", "British Antarctic Territory",
    806                 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"},
    807                 add(type2, code, data);
    808             }
    809         }
    810 
    811         Map<String, List<String>> m = getZoneData();
    812         for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) {
    813             String code = it.next();
    814             add(CodeType.tzid, code, m.get(code).toString());
    815         }
    816     }
    817 
    818     /**
    819      * @param current
    820      * @return
    821      */
    822     private static String nextAlpha(String current) {
    823         // Don't care that this is inefficient
    824         int value = 0;
    825         for (int i = 0; i < current.length(); ++i) {
    826             char c = current.charAt(i);
    827             c -= c < 'a' ? 'A' : 'a';
    828             value = value * 26 + c;
    829         }
    830         value += 1;
    831         String result = "";
    832         for (int i = 0; i < current.length(); ++i) {
    833             result = (char) ((value % 26) + 'A') + result;
    834             value = value / 26;
    835         }
    836         if (UCharacter.toLowerCase(current).equals(current)) {
    837             result = UCharacter.toLowerCase(result);
    838         } else if (UCharacter.toUpperCase(current).equals(current)) {
    839             // do nothing
    840         } else {
    841             result = UCharacter.toTitleCase(result, null);
    842         }
    843         return result;
    844     }
    845 
    846     /**
    847      * @param string
    848      * @param string2
    849      * @param string3
    850      */
    851     private void add(CodeType type, String string2, String string3) {
    852         List<String> l = new ArrayList<String>();
    853         l.add(string3);
    854         add(type, string2, l);
    855     }
    856 
    857     private void add(CodeType type, String code, List<String> otherData) {
    858         // hack
    859         if (type == CodeType.script) {
    860             if (code.equals("Qaai")) {
    861                 otherData = new ArrayList<String>(otherData);
    862                 otherData.set(0, "Inherited");
    863             } else if (code.equals("Zyyy")) {
    864                 otherData = new ArrayList<String>(otherData);
    865                 otherData.set(0, "Common");
    866             }
    867         }
    868 
    869         // assume name is the first item
    870 
    871         String name = otherData.get(0);
    872 
    873         // add to main list
    874         Map<String, List<String>> code_data = getCodeData(type);
    875         if (code_data == null) {
    876             code_data = new TreeMap<String, List<String>>();
    877             type_code_data.put(type, code_data);
    878         }
    879         List<String> lastData = code_data.get(code);
    880         if (lastData != null) {
    881             lastData.addAll(otherData);
    882         } else {
    883             code_data.put(code, otherData);
    884         }
    885 
    886         // now add mapping from name to codes
    887         Map<String, List<String>> name_codes = type_name_codes.get(type);
    888         if (name_codes == null) {
    889             name_codes = new TreeMap<String, List<String>>();
    890             type_name_codes.put(type, name_codes);
    891         }
    892         List<String> codes = name_codes.get(name);
    893         if (codes == null) {
    894             codes = new ArrayList<String>();
    895             name_codes.put(name, codes);
    896         }
    897         codes.add(code);
    898     }
    899 
    900     private List<String> DELETED3166 = Collections.unmodifiableList(Arrays
    901         .asList(new String[] { "BQ", "BU", "CT", "DD", "DY", "FQ", "FX", "HV",
    902             "JT", "MI", "NH", "NQ", "NT", "PC", "PU", "PZ", "RH", "SU", "TP",
    903             "VD", "WK", "YD", "YU", "ZR" }));
    904 
    905     public List<String> getOld3166() {
    906         return DELETED3166;
    907     }
    908 
    909     private Map<String, List<String>> WorldBankInfo;
    910 
    911     public Map<String, List<String>> getWorldBankInfo() {
    912         if (WorldBankInfo == null) {
    913             List<String> temp = fillFromCommaFile("WorldBankInfo.txt", false);
    914             WorldBankInfo = new HashMap<String, List<String>>();
    915             for (String line : temp) {
    916                 List<String> row = CldrUtility.splitList(line, ';', true);
    917                 String key = row.get(0);
    918                 row.remove(0);
    919                 WorldBankInfo.put(key, row);
    920             }
    921             WorldBankInfo = CldrUtility.protectCollection(WorldBankInfo);
    922         }
    923         return WorldBankInfo;
    924     }
    925 
    926     Set<String> moribundLanguages;
    927 
    928     public Set<String> getMoribundLanguages() {
    929         if (moribundLanguages == null) {
    930             List<String> temp = fillFromCommaFile("moribund_languages.txt", true);
    931             moribundLanguages = new TreeSet<String>();
    932             moribundLanguages.addAll(temp);
    933             moribundLanguages = CldrUtility.protectCollection(moribundLanguages);
    934         }
    935         return moribundLanguages;
    936     }
    937 
    938     // produces a list of the 'clean' lines
    939     private List<String> fillFromCommaFile(String filename, boolean trim) {
    940         try {
    941             List<String> result = new ArrayList<String>();
    942             String line;
    943             BufferedReader lstreg = CldrUtility.getUTF8Data(filename);
    944             while (true) {
    945                 line = lstreg.readLine();
    946                 if (line == null)
    947                     break;
    948                 int commentPos = line.indexOf('#');
    949                 if (commentPos >= 0) {
    950                     line = line.substring(0, commentPos);
    951                 }
    952                 if (trim) {
    953                     line = line.trim();
    954                 }
    955                 if (line.length() == 0)
    956                     continue;
    957                 result.add(line);
    958             }
    959             return result;
    960         } catch (Exception e) {
    961             throw (RuntimeException) new IllegalArgumentException(
    962                 "Can't process file: data/" + filename).initCause(e);
    963         }
    964     }
    965 
    966     // return a complex map. language -> arn -> {"Comments" -> "x",
    967     // "Description->y,...}
    968     static String[][] extras = {
    969         { "language", "root", "Description", "Root", "CLDR", "True" },
    970         // { "language", "cch", "Description", "Atsam", "CLDR", "True" },
    971         // { "language", "kaj", "Description", "Jju", "CLDR", "True" },
    972         // { "language", "kcg", "Description", "Tyap", "CLDR", "True" },
    973         // { "language", "kfo", "Description", "Koro", "CLDR", "True" },
    974         // { "language", "mfe", "Description", "Morisyen", "CLDR", "True" },
    975         // { "region", "172", "Description", "Commonwealth of Independent States", "CLDR", "True" },
    976         // { "region", "062", "Description", "South-Central Asia", "CLDR", "True" },
    977         // { "region", "003", "Description", "North America", "CLDR", "True" },
    978         //        { "variant", "POLYTONI", "Description", "Polytonic Greek", "CLDR", "True", "Preferred-Value", "POLYTON" },
    979         { "variant", "REVISED", "Description", "Revised Orthography", "CLDR", "True" },
    980         { "variant", "SAAHO", "Description", "Dialect", "CLDR", "True" },
    981         { "variant", "POSIX", "Description", "Computer-Style", "CLDR", "True" },
    982         // {"region", "172", "Description", "Commonwealth of Independent States",
    983         // "CLDR", "True"},
    984         // { "region", "", "Description", "European Union", "CLDR", "True" },
    985         { "region", "ZZ", "Description", "Unknown or Invalid Region", "CLDR", "True" },
    986         { "region", "QO", "Description", "Outlying Oceania", "CLDR", "True" },
    987         { "region", "XK", "Description", "Kosovo", "CLDR", "True" },
    988         { "script", "Qaai", "Description", "Inherited", "CLDR", "True" },
    989         // {"region", "003", "Description", "North America", "CLDR", "True"},
    990         // {"region", "062", "Description", "South-central Asia", "CLDR", "True"},
    991         // {"region", "200", "Description", "Czechoslovakia", "CLDR", "True"},
    992         // {"region", "830", "Description", "Channel Islands", "CLDR", "True"},
    993         // {"region", "833", "Description", "Isle of Man", "CLDR", "True"},
    994 
    995         // {"region", "NT", "Description", "Neutral Zone (formerly between Saudi
    996         // Arabia & Iraq)", "CLDR", "True", "Deprecated", "True"},
    997         // {"region", "SU", "Description", "Union of Soviet Socialist Republics",
    998         // "CLDR", "True", "Deprecated", "True"},
    999         // {"region", "BQ", "Description", "British Antarctic Territory",
   1000         // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"},
   1001         // {"region", "CT", "Description", "Canton and Enderbury Islands",
   1002         // "Preferred-Value", "KI", "CLDR", "True", "Deprecated", "True"},
   1003         // {"region", "FQ", "Description", "French Southern and Antarctic Territories
   1004         // (now split between AQ and TF)", "CLDR", "True", "Deprecated", "True"},
   1005         // {"region", "JT", "Description", "Johnston Island", "Preferred-Value", "UM",
   1006         // "CLDR", "True", "Deprecated", "True"},
   1007         // {"region", "MI", "Description", "Midway Islands", "Preferred-Value", "UM",
   1008         // "CLDR", "True", "Deprecated", "True"},
   1009         // {"region", "NQ", "Description", "Dronning Maud Land", "Preferred-Value",
   1010         // "AQ", "CLDR", "True", "Deprecated", "True"},
   1011         // {"region", "PC", "Description", "Pacific Islands Trust Territory (divided
   1012         // into FM, MH, MP, and PW)", "Preferred-Value", "AQ", "CLDR", "True",
   1013         // "Deprecated", "True"},
   1014         // {"region", "PU", "Description", "U.S. Miscellaneous Pacific Islands",
   1015         // "Preferred-Value", "UM", "CLDR", "True", "Deprecated", "True"},
   1016         // {"region", "PZ", "Description", "Panama Canal Zone", "Preferred-Value",
   1017         // "PA", "CLDR", "True", "Deprecated", "True"},
   1018         // {"region", "VD", "Description", "North Vietnam", "Preferred-Value", "VN",
   1019         // "CLDR", "True", "Deprecated", "True"},
   1020         // {"region", "WK", "Description", "Wake Island", "Preferred-Value", "UM",
   1021         // "CLDR", "True", "Deprecated", "True"},
   1022     };
   1023 
   1024     static final String registryName = CldrUtility.getProperty("registry", "language-subtag-registry");
   1025 
   1026     public enum LstrType {
   1027         language("und", "zxx", "mul", "mis", "root"),
   1028         script("Zzzz", "Zsym", "Zxxx", "Zmth"),
   1029         region("ZZ"),
   1030         variant(),
   1031         extlang(true, false),
   1032         grandfathered(true, false),
   1033         redundant(true, false),
   1034         /** specialized codes for validity; TODO: rename LstrType **/
   1035         currency(false, true, "XXX"),
   1036         subdivision(false, true),
   1037         unit(false, true);
   1038 
   1039         public final Set<String> specials;
   1040         public final String unknown;
   1041         public final boolean isLstr;
   1042         public final boolean isUnicode;
   1043 
   1044         private LstrType(String... unknownValue) {
   1045             this(true, true, unknownValue);
   1046         }
   1047 
   1048         private LstrType(boolean lstr, boolean unicode, String... unknownValue) {
   1049             unknown = unknownValue.length == 0 ? null : unknownValue[0];
   1050             LinkedHashSet<String> set = new LinkedHashSet<>(Arrays.asList(unknownValue));
   1051             if (unknown != null) {
   1052                 set.remove(unknown);
   1053             }
   1054             specials = Collections.unmodifiableSet(set);
   1055             isLstr = lstr;
   1056             isUnicode = unicode;
   1057         }
   1058 
   1059         //
   1060         static final Pattern WELLFORMED = Pattern.compile("([0-9]{3}|[a-zA-Z]{2})[a-zA-Z0-9]{1,4}");
   1061 
   1062         boolean isWellFormed(String candidate) {
   1063             switch (this) {
   1064             case subdivision:
   1065                 return WELLFORMED.matcher(candidate).matches();
   1066             default:
   1067                 throw new UnsupportedOperationException();
   1068             }
   1069         }
   1070     }
   1071 
   1072     public enum LstrField {
   1073         Type, Subtag, Description, Added, Scope, Tag, Suppress_Script, Macrolanguage, Deprecated, Preferred_Value, Comments, Prefix, CLDR;
   1074         public static LstrField from(String s) {
   1075             return LstrField.valueOf(s.trim().replace("-", "_"));
   1076         }
   1077     }
   1078 
   1079     static Map<String, Map<String, Map<String, String>>> LSTREG;
   1080     static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_ENUM;
   1081     static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_RAW;
   1082 
   1083     /**
   1084      * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br>
   1085      * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by
   1086      * DESCRIPTION_SEPARATOR.
   1087      *
   1088      * @return
   1089      */
   1090     public static Map<String, Map<String, Map<String, String>>> getLStreg() {
   1091         if (LSTREG == null) {
   1092             initLstr();
   1093         }
   1094         return LSTREG;
   1095     }
   1096 
   1097     /**
   1098      * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br>
   1099      * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by
   1100      * DESCRIPTION_SEPARATOR.
   1101      *
   1102      * @return
   1103      */
   1104     public static Map<LstrType, Map<String, Map<LstrField, String>>> getEnumLstreg() {
   1105         if (LSTREG_ENUM == null) {
   1106             initLstr();
   1107         }
   1108         return LSTREG_ENUM;
   1109     }
   1110 
   1111     public static Map<LstrType, Map<String, Map<LstrField, String>>> getLstregEnumRaw() {
   1112         if (LSTREG_ENUM == null) {
   1113             initLstr();
   1114         }
   1115         return LSTREG_RAW;
   1116     }
   1117 
   1118     private static void initLstr() {
   1119         Map<LstrType, Map<String, Map<LstrField, String>>> result2 = new TreeMap<LstrType, Map<String, Map<LstrField, String>>>();
   1120 
   1121         int lineNumber = 1;
   1122 
   1123         Set<String> funnyTags = new TreeSet<String>();
   1124         String line;
   1125         try {
   1126             BufferedReader lstreg = CldrUtility.getUTF8Data(registryName);
   1127             LstrType lastType = null;
   1128             String lastTag = null;
   1129             Map<String, Map<LstrField, String>> subtagData = null;
   1130             Map<LstrField, String> currentData = null;
   1131             LstrField lastLabel = null;
   1132             String lastRest = null;
   1133             boolean inRealContent = false;
   1134 //            Map<String, String> translitCache = new HashMap<String, String>();
   1135             for (;; ++lineNumber) {
   1136                 line = lstreg.readLine();
   1137                 if (line == null)
   1138                     break;
   1139                 if (line.length() == 0)
   1140                     continue; // skip blanks
   1141                 if (line.startsWith("File-Date: ")) {
   1142                     if (DEBUG) System.out.println("Language Subtag Registry: " + line);
   1143                     inRealContent = true;
   1144                     continue;
   1145                 }
   1146                 if (!inRealContent) {
   1147                     // skip until we get to real content
   1148                     continue;
   1149                 }
   1150                 // skip cruft
   1151                 if (line.startsWith("Internet-Draft")) {
   1152                     continue;
   1153                 }
   1154                 if (line.startsWith("Ewell")) {
   1155                     continue;
   1156                 }
   1157                 if (line.startsWith("\f")) {
   1158                     continue;
   1159                 }
   1160                 if (line.startsWith("4.  Security Considerations")) {
   1161                     break;
   1162                 }
   1163 
   1164                 if (line.startsWith("%%"))
   1165                     continue; // skip separators (ok, since data starts with Type:
   1166                 if (line.startsWith(" ")) {
   1167                     currentData.put(lastLabel, lastRest + " " + line.trim());
   1168                     continue;
   1169                 }
   1170 
   1171                 /*
   1172                  * Type: language Subtag: aa Description: Afar Added: 2005-10-16
   1173                  * Suppress-Script: Latn
   1174                  */
   1175                 int pos2 = line.indexOf(':');
   1176                 LstrField label = LstrField.from(line.substring(0, pos2));
   1177                 String rest = line.substring(pos2 + 1).trim();
   1178                 if (label == LstrField.Type) {
   1179                     subtagData = CldrUtility.get(result2, lastType = LstrType.valueOf(rest));
   1180                     if (subtagData == null) {
   1181                         result2.put(LstrType.valueOf(rest), subtagData = new TreeMap<String, Map<LstrField, String>>());
   1182                     }
   1183                 } else if (label == LstrField.Subtag
   1184                     || label == LstrField.Tag) {
   1185                     lastTag = rest;
   1186                     String endTag = null;
   1187                     // Subtag: qaa..qtz
   1188                     int pos = lastTag.indexOf("..");
   1189                     if (pos >= 0) {
   1190                         endTag = lastTag.substring(pos + 2);
   1191                         lastTag = lastTag.substring(0, pos);
   1192                     }
   1193                     currentData = new TreeMap<LstrField, String>();
   1194                     if (endTag == null) {
   1195                         putSubtagData(lastTag, subtagData, currentData);
   1196                         languageCount.add(lastType, 1);
   1197                         // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag);
   1198                     } else {
   1199                         for (; lastTag.compareTo(endTag) <= 0; lastTag = nextAlpha(lastTag)) {
   1200                             // System.out.println(">" + current);
   1201                             putSubtagData(lastTag, subtagData, currentData);
   1202                             languageCount.add(lastType, 1);
   1203                             // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag);
   1204                         }
   1205 
   1206                     }
   1207                     // label.equalsIgnoreCase("Added") || label.equalsIgnoreCase("Suppress-Script")) {
   1208                     // skip
   1209                     // } else if (pieces.length < 2) {
   1210                     // System.out.println("Odd Line: " + lastType + "\t" + lastTag + "\t" + line);
   1211                 } else {
   1212                     lastLabel = label;
   1213                     // The following code was removed because in the standard tests (TestAll) both lastRest and rest were always equal.
   1214                     //                    if(!translitCache.containsKey(rest)) {
   1215                     //                        lastRest = TransliteratorUtilities.fromXML.transliterate(rest);
   1216                     //                        translitCache.put(rest, lastRest);
   1217                     //                        if (!lastRest.equals(rest)) {
   1218                     //                            System.out.println(System.currentTimeMillis()+" initLStr: LastRest: '"+lastRest+"' Rest: '"+rest+"'");
   1219                     //                        }
   1220                     //                    } else {
   1221                     //                        lastRest = translitCache.get(rest);
   1222                     //                    }
   1223                     lastRest = rest;
   1224                     String oldValue = (String) CldrUtility.get(currentData, lastLabel);
   1225                     if (oldValue != null) {
   1226                         lastRest = oldValue + DESCRIPTION_SEPARATOR + lastRest;
   1227                     }
   1228                     currentData.put(lastLabel, lastRest);
   1229                 }
   1230             }
   1231         } catch (Exception e) {
   1232             throw (RuntimeException) new IllegalArgumentException(
   1233                 "Can't process file: data/"
   1234                     + registryName + ";\t at line " + lineNumber).initCause(e);
   1235         } finally {
   1236             if (!funnyTags.isEmpty()) {
   1237                 if (DEBUG)
   1238                     System.out.println("Funny tags: " + funnyTags);
   1239             }
   1240         }
   1241         // copy raw
   1242         Map<LstrType, Map<String, Map<LstrField, String>>> rawLstreg = new TreeMap<LstrType, Map<String, Map<LstrField, String>>>();
   1243         for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry1 : result2.entrySet()) {
   1244             LstrType key1 = entry1.getKey();
   1245             TreeMap<String, Map<LstrField, String>> raw1 = new TreeMap<String, Map<LstrField, String>>();
   1246             rawLstreg.put(key1, raw1);
   1247             for (Entry<String, Map<LstrField, String>> entry2 : entry1.getValue().entrySet()) {
   1248                 String key2 = entry2.getKey();
   1249                 final Map<LstrField, String> value2 = entry2.getValue();
   1250                 TreeMap<LstrField, String> raw2 = new TreeMap<LstrField, String>();
   1251                 raw2.putAll(value2);
   1252                 raw1.put(key2, raw2);
   1253             }
   1254         }
   1255         LSTREG_RAW = CldrUtility.protectCollection(rawLstreg);
   1256 
   1257         // add extras
   1258         for (int i = 0; i < extras.length; ++i) {
   1259             Map<String, Map<LstrField, String>> subtagData = CldrUtility.get(result2, LstrType.valueOf(extras[i][0]));
   1260             if (subtagData == null) {
   1261                 result2.put(LstrType.valueOf(extras[i][0]), subtagData = new TreeMap<String, Map<LstrField, String>>());
   1262             }
   1263             Map<LstrField, String> labelData = new TreeMap<LstrField, String>();
   1264             for (int j = 2; j < extras[i].length; j += 2) {
   1265                 labelData.put(LstrField.from(extras[i][j]), extras[i][j + 1]);
   1266             }
   1267             Map<LstrField, String> old = CldrUtility.get(subtagData, extras[i][1]);
   1268             if (old != null) {
   1269                 if (!"Private use".equals(CldrUtility.get(old, LstrField.Description))) {
   1270                     throw new IllegalArgumentException("REPLACING data for " + extras[i][1] + "\t" + old + "\twith"
   1271                         + labelData);
   1272                 }
   1273             }
   1274             if (false) {
   1275                 System.out.println((old != null ? "REPLACING" + "\t" + old : "ADDING") +
   1276                     " data for " + extras[i][1] + "\twith" + labelData);
   1277             }
   1278             subtagData.put(extras[i][1], labelData);
   1279         }
   1280         // build compatibility map
   1281         Map<String, Map<String, Map<String, String>>> result = new LinkedHashMap<String, Map<String, Map<String, String>>>();
   1282         for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : result2.entrySet()) {
   1283             Map<String, Map<String, String>> copy2 = new LinkedHashMap<String, Map<String, String>>();
   1284             result.put(entry.getKey().toString(), copy2);
   1285             for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) {
   1286                 Map<String, String> copy3 = new LinkedHashMap<String, String>();
   1287                 copy2.put(entry2.getKey(), copy3);
   1288                 for (Entry<LstrField, String> entry3 : entry2.getValue().entrySet()) {
   1289                     copy3.put(entry3.getKey().toString(), entry3.getValue());
   1290                 }
   1291             }
   1292         }
   1293         LSTREG = CldrUtility.protectCollection(result);
   1294         LSTREG_ENUM = CldrUtility.protectCollection(result2);
   1295     }
   1296 
   1297     private static <K, K2, V> Map<K2, V> putSubtagData(K lastTag, Map<K, Map<K2, V>> subtagData, Map<K2, V> currentData) {
   1298         Map<K2, V> oldData = subtagData.get(lastTag);
   1299         if (oldData != null) {
   1300             if (oldData.get("CLDR") != null) {
   1301                 System.out.println("overriding: " + lastTag + ", " + oldData);
   1302             } else {
   1303                 throw new IllegalArgumentException("Duplicate tag: " + lastTag);
   1304             }
   1305         }
   1306         return subtagData.put(lastTag, currentData);
   1307     }
   1308 
   1309     static Counter<LstrType> languageCount = new Counter<LstrType>();
   1310 
   1311     public static Counter<LstrType> getLanguageCount() {
   1312         return languageCount;
   1313     }
   1314 
   1315     ZoneParser zoneParser = new ZoneParser();
   1316 
   1317     // static public final Set<String> MODERN_SCRIPTS = Collections
   1318     // .unmodifiableSet(new TreeSet(
   1319     // // "Bali " +
   1320     // // "Bugi " +
   1321     // // "Copt " +
   1322     // // "Hano " +
   1323     // // "Osma " +
   1324     // // "Qaai " +
   1325     // // "Sylo " +
   1326     // // "Syrc " +
   1327     // // "Tagb " +
   1328     // // "Tglg " +
   1329     // Arrays
   1330     // .asList("Hans Hant Jpan Hrkt Kore Arab Armn Bali Beng Bopo Cans Cham Cher Cyrl Deva Ethi Geor Grek Gujr Guru Hani Hang Hebr Hira Knda Kana Kali Khmr Laoo Latn Lepc Limb Mlym Mong Mymr Talu Nkoo Olck Orya Saur Sinh Tale Taml Telu Thaa Thai Tibt Tfng Vaii Yiii"
   1331     // .split("\\s+"))));
   1332 
   1333     // updated to http://www.unicode.org/reports/tr31/tr31-9.html#Specific_Character_Adjustments
   1334 
   1335     /**
   1336      * @deprecated
   1337      */
   1338     public Map<String, List<ZoneLine>> getZone_rules() {
   1339         return zoneParser.getZone_rules();
   1340     }
   1341 
   1342     /**
   1343      * @deprecated
   1344      */
   1345     public Map<String, List<String>> getZoneData() {
   1346         return zoneParser.getZoneData();
   1347     }
   1348 
   1349     /**
   1350      * @deprecated
   1351      */
   1352     public Set<String> getCanonicalTimeZones() {
   1353         return zoneParser.getZoneData().keySet();
   1354     }
   1355 
   1356     /**
   1357      * @deprecated
   1358      */
   1359     public Map<String, Set<String>> getCountryToZoneSet() {
   1360         return zoneParser.getCountryToZoneSet();
   1361     }
   1362 
   1363     /**
   1364      * @deprecated
   1365      */
   1366     public List<String> getDeprecatedZoneIDs() {
   1367         return zoneParser.getDeprecatedZoneIDs();
   1368     }
   1369 
   1370     /**
   1371      * @deprecated
   1372      */
   1373     public Comparator<String> getTZIDComparator() {
   1374         return zoneParser.getTZIDComparator();
   1375     }
   1376 
   1377     /**
   1378      * @deprecated
   1379      */
   1380     public Map<String, Set<String>> getZoneLinkNew_OldSet() {
   1381         return zoneParser.getZoneLinkNew_OldSet();
   1382     }
   1383 
   1384     /**
   1385      * @deprecated
   1386      */
   1387     public Map<String, String> getZoneLinkold_new() {
   1388         return zoneParser.getZoneLinkold_new();
   1389     }
   1390 
   1391     /**
   1392      * @deprecated
   1393      */
   1394     public Map getZoneRuleID_rules() {
   1395         return zoneParser.getZoneRuleID_rules();
   1396     }
   1397 
   1398     /**
   1399      * @deprecated
   1400      */
   1401     public Map<String, String> getZoneToCounty() {
   1402         return zoneParser.getZoneToCounty();
   1403     }
   1404 
   1405     /**
   1406      * @deprecated
   1407      */
   1408     public String getZoneVersion() {
   1409         return zoneParser.getVersion();
   1410     }
   1411 
   1412     public static String fixLanguageTag(String languageSubtag) {
   1413         if (languageSubtag.equals("mo")) { // fix special cases
   1414             return "ro";
   1415         } else if (languageSubtag.equals("no")) {
   1416             return "nb";
   1417         }
   1418         return languageSubtag;
   1419     }
   1420 
   1421     public boolean isModernLanguage(String languageCode) {
   1422         if (getMoribundLanguages().contains(languageCode)) return false;
   1423         Type type = Iso639Data.getType(languageCode);
   1424         if (type == Type.Living) return true;
   1425         if (languageCode.equals("eo")) return true; // exception for Esperanto
   1426         // Scope scope = Iso639Data.getScope(languageCode);
   1427         // if (scope == Scope.Collection) return false;
   1428         return false;
   1429     }
   1430 
   1431     public static boolean isScriptModern(String script) {
   1432         ScriptMetadata.Info info = ScriptMetadata.getInfo(script);
   1433         if (info == null) {
   1434             if (false) throw new IllegalArgumentException("No script metadata for: " + script);
   1435             return false;
   1436         }
   1437         IdUsage idUsage = info.idUsage;
   1438         return idUsage != IdUsage.EXCLUSION && idUsage != IdUsage.UNKNOWN;
   1439     }
   1440 
   1441     static final Pattern whitespace = PatternCache.get("\\s+");
   1442     static Set<String> filteredCurrencies = null;
   1443 
   1444     public Set<String> getSurveyToolDisplayCodes(String type) {
   1445         return getGoodAvailableCodes(type);
   1446     }
   1447 
   1448     static UnicodeSet COUNTRY = new UnicodeSet("[a-zA-Z]").freeze();
   1449 
   1450     /**
   1451      * Quick check for whether valid country. Not complete: should use Validity
   1452      * @param territory
   1453      * @return
   1454      */
   1455     public static boolean isCountry(String territory) {
   1456         switch (territory) {
   1457         case "ZZ":
   1458         case "QO":
   1459         case "EU":
   1460         case "UN":
   1461         case "EZ":
   1462             return false;
   1463         default:
   1464             return territory.length() == 2 && COUNTRY.containsAll(territory);
   1465         }
   1466     }
   1467 
   1468     public boolean isLstregPrivateUse(String type, String code) {
   1469         Map<String, String> lStregData = getLStreg().get(type).get(code);
   1470         return lStregData.get("Description").equalsIgnoreCase("private use");
   1471     }
   1472 
   1473     public boolean isLstregDeprecated(String type, String code) {
   1474         Map<String, String> lStregData = getLStreg().get(type).get(code);
   1475         return lStregData.get("Deprecated") != null;
   1476     }
   1477 }
   1478