Home | History | Annotate | Download | only in unittest
      1 package org.unicode.cldr.unittest;
      2 
      3 import java.io.IOException;
      4 import java.util.ArrayList;
      5 import java.util.Arrays;
      6 import java.util.Collections;
      7 import java.util.HashMap;
      8 import java.util.HashSet;
      9 import java.util.LinkedHashSet;
     10 import java.util.List;
     11 import java.util.Map;
     12 import java.util.Map.Entry;
     13 import java.util.Set;
     14 import java.util.TreeMap;
     15 import java.util.TreeSet;
     16 import java.util.regex.Matcher;
     17 
     18 import org.unicode.cldr.draft.ScriptMetadata;
     19 import org.unicode.cldr.draft.ScriptMetadata.Info;
     20 import org.unicode.cldr.tool.GenerateMaximalLocales;
     21 import org.unicode.cldr.tool.LikelySubtags;
     22 import org.unicode.cldr.util.Builder;
     23 import org.unicode.cldr.util.CLDRConfig;
     24 import org.unicode.cldr.util.CLDRFile;
     25 import org.unicode.cldr.util.CLDRLocale;
     26 import org.unicode.cldr.util.ChainedMap;
     27 import org.unicode.cldr.util.ChainedMap.M3;
     28 import org.unicode.cldr.util.CldrUtility;
     29 import org.unicode.cldr.util.LanguageTagParser;
     30 import org.unicode.cldr.util.LocaleIDParser;
     31 import org.unicode.cldr.util.PatternCache;
     32 import org.unicode.cldr.util.StandardCodes;
     33 import org.unicode.cldr.util.SupplementalDataInfo;
     34 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
     35 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
     36 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
     37 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
     38 import org.unicode.cldr.util.XPathParts;
     39 
     40 import com.ibm.icu.dev.test.TestFmwk;
     41 import com.ibm.icu.dev.util.CollectionUtilities;
     42 import com.ibm.icu.impl.Relation;
     43 import com.ibm.icu.impl.Row.R2;
     44 
     45 public class TestInheritance extends TestFmwk {
     46 
     47     static CLDRConfig testInfo = CLDRConfig.getInstance();
     48 
     49     private static boolean DEBUG = CldrUtility.getProperty("DEBUG", false);
     50 
     51     private static Matcher pathMatcher = PatternCache.get(
     52         CldrUtility.getProperty("XPATH", ".*")).matcher("");
     53 
     54     public static void main(String[] args) throws IOException {
     55         new TestInheritance().run(args);
     56     }
     57 
     58     private static final SupplementalDataInfo dataInfo = SupplementalDataInfo
     59         .getInstance();
     60     private static final Set<String> defaultContents = dataInfo
     61         .getDefaultContentLocales();
     62 
     63     private static final boolean EXPECT_EQUALITY = false;
     64 
     65     private static Set<String> availableLocales = testInfo.getFullCldrFactory().getAvailable();
     66 
     67     public void TestLocalesHaveOfficial() {
     68         // If we have a language, we have all the region locales where the
     69         // language is official
     70         Set<String> SKIP_TERRITORIES = new HashSet<String>(Arrays.asList("001",
     71             "150"));
     72         for (Entry<String, R2<List<String>, String>> s : dataInfo
     73             .getLocaleAliasInfo().get("territory").entrySet()) {
     74             SKIP_TERRITORIES.add(s.getKey());
     75         }
     76 
     77         LanguageTagParser ltp = new LanguageTagParser();
     78 
     79         Relation<String, String> languageLocalesSeen = Relation.of(
     80             new TreeMap<String, Set<String>>(), TreeSet.class);
     81 
     82         Set<String> testOrg = testInfo.getStandardCodes()
     83             .getLocaleCoverageLocales("google");
     84         ChainedMap.M4<String, OfficialStatus, String, Boolean> languageToOfficialChildren = ChainedMap
     85             .of(new TreeMap<String, Object>(),
     86                 new TreeMap<OfficialStatus, Object>(),
     87                 new TreeMap<String, Object>(), Boolean.class);
     88 
     89         // gather the data
     90 
     91         for (String language : dataInfo
     92             .getLanguagesForTerritoriesPopulationData()) {
     93             for (String territory : dataInfo
     94                 .getTerritoriesForPopulationData(language)) {
     95                 if (SKIP_TERRITORIES.contains(territory)) {
     96                     continue;
     97                 }
     98                 PopulationData data = dataInfo
     99                     .getLanguageAndTerritoryPopulationData(language,
    100                         territory);
    101                 OfficialStatus status = data.getOfficialStatus();
    102                 if (data.getOfficialStatus() != OfficialStatus.unknown) {
    103                     String locale = removeScript(language + "_" + territory);
    104                     String lang = removeScript(ltp.set(locale).getLanguage());
    105                     languageToOfficialChildren.put(lang, status, locale,
    106                         Boolean.TRUE);
    107                     languageLocalesSeen.put(lang, locale);
    108                 }
    109             }
    110         }
    111 
    112         // flesh it out by adding 'clean' codes.
    113         // also get the child locales in cldr.
    114 
    115         Relation<String, String> languageToChildren = Relation.of(
    116             new TreeMap<String, Set<String>>(), TreeSet.class);
    117         for (String locale : testInfo.getCldrFactory().getAvailable()) {
    118             String lang = ltp.set(locale).getLanguage();
    119             if (SKIP_TERRITORIES.contains(ltp.getRegion())) {
    120                 continue;
    121             }
    122             lang = removeScript(lang);
    123             locale = removeScript(locale);
    124 
    125             if (!lang.equals(locale)) {
    126                 languageToChildren.put(lang, locale);
    127                 Set<String> localesSeen = languageLocalesSeen.get(lang);
    128                 if (localesSeen == null || !localesSeen.contains(locale)) {
    129                     languageToOfficialChildren.put(lang,
    130                         OfficialStatus.unknown, locale, Boolean.TRUE);
    131                 }
    132             }
    133         }
    134 
    135         for (Entry<String, Set<String>> languageAndChildren : languageToChildren
    136             .keyValuesSet()) {
    137             String language = languageAndChildren.getKey();
    138             Set<String> children = languageAndChildren.getValue();
    139             M3<OfficialStatus, String, Boolean> officalStatusToChildren = languageToOfficialChildren
    140                 .get(language);
    141             for (Entry<OfficialStatus, Map<String, Boolean>> entry : officalStatusToChildren) {
    142                 OfficialStatus status = entry.getKey();
    143                 if (status != OfficialStatus.official
    144                     && status != OfficialStatus.de_facto_official) {
    145                     continue;
    146                 }
    147                 Set<String> officalChildren = entry.getValue().keySet();
    148                 if (!children.containsAll(officalChildren)) {
    149                     Set<String> missing = new TreeSet<String>(officalChildren);
    150                     missing.removeAll(children);
    151                     String message = "Missing CLDR locales for " + status
    152                         + " languages: " + missing;
    153                     errln(message);
    154                 } else {
    155                     logln("CLDR locales " + children + " cover " + status
    156                         + " locales " + officalChildren);
    157                 }
    158 
    159             }
    160         }
    161 
    162         if (DEBUG) {
    163             Set<String> languages = new TreeSet<String>(
    164                 languageToChildren.keySet());
    165             languages.addAll(languageToOfficialChildren.keySet());
    166             System.out.print("\ncode\tlanguage");
    167             for (OfficialStatus status : OfficialStatus.values()) {
    168                 System.out.print("\tNo\t" + status);
    169             }
    170             System.out.println();
    171             for (String language : languages) {
    172                 if (!testOrg.contains(language)) {
    173                     continue;
    174                 }
    175                 System.out.print(language + "\t"
    176                     + testInfo.getEnglish().getName(language));
    177 
    178                 M3<OfficialStatus, String, Boolean> officialChildren = languageToOfficialChildren
    179                     .get(language);
    180                 for (OfficialStatus status : OfficialStatus.values()) {
    181                     Map<String, Boolean> children = officialChildren
    182                         .get(status);
    183                     if (children == null) {
    184                         System.out.print("\t" + 0 + "\t");
    185                     } else {
    186                         System.out.print("\t" + children.size() + "\t"
    187                             + show(children.keySet(), false));
    188                     }
    189                 }
    190                 System.out.println();
    191             }
    192         }
    193     }
    194 
    195     private String show(Set<String> joint, boolean showStatus) {
    196         StringBuffer b = new StringBuffer();
    197         for (String s : joint) {
    198             if (b.length() != 0) {
    199                 b.append(", ");
    200             }
    201             LanguageTagParser ltp = new LanguageTagParser().set(s);
    202             String script = ltp.getScript();
    203             if (script.length() != 0) {
    204                 b.append(testInfo.getEnglish().getName(CLDRFile.SCRIPT_NAME,
    205                     script));
    206             }
    207             String region = ltp.getRegion();
    208             if (region.length() != 0) {
    209                 if (script.length() != 0) {
    210                     b.append("-");
    211                 }
    212                 b.append(testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME,
    213                     region));
    214             }
    215             b.append(" [").append(s);
    216             if (showStatus) {
    217                 PopulationData data = dataInfo
    218                     .getLanguageAndTerritoryPopulationData(
    219                         ltp.getLanguage(), region);
    220                 if (data == null) {
    221                     data = dataInfo.getLanguageAndTerritoryPopulationData(
    222                         ltp.getLanguageScript(), region);
    223                 }
    224                 b.append("; ");
    225                 b.append(data == null ? "?" : data.getOfficialStatus());
    226             }
    227             b.append("]");
    228 
    229         }
    230         return b.toString();
    231     }
    232 
    233     private String removeScript(String lang) {
    234         if (!lang.contains("_")) {
    235             return lang;
    236         }
    237         LanguageTagParser ltp = new LanguageTagParser().set(lang);
    238         // String ls = ltp.getLanguageScript();
    239         // if (defaultContents.contains(ls)) {
    240         ltp.setScript("");
    241         // }
    242         return ltp.toString();
    243     }
    244 
    245     public void TestLikelyAndDefaultConsistency() {
    246         LikelySubtags likelySubtags = new LikelySubtags();
    247         LanguageTagParser ltp = new LanguageTagParser();
    248         // find multiscript locales
    249         Relation<String, String> base2scripts = Relation.of(
    250             new TreeMap<String, Set<String>>(), TreeSet.class);
    251         Map<String, String> parent2default = new TreeMap<String, String>();
    252         Map<String, String> default2parent = new TreeMap<String, String>();
    253         Relation<String, String> base2locales = Relation.of(
    254             new TreeMap<String, Set<String>>(), TreeSet.class);
    255 
    256         Set<String> knownMultiScriptLanguages = new HashSet<String>(Arrays.asList("bm", "ha"));
    257         // get multiscript locales
    258         for (String localeID : availableLocales) {
    259             String script = ltp.set(localeID).getScript();
    260             final String base = ltp.getLanguage();
    261             if (!availableLocales.contains(base)) {
    262                 errln("Missing base locale for: " + localeID);
    263             }
    264             base2locales.put(base, localeID);
    265             if (!script.isEmpty() && !base.equals("en")) { // HACK for en
    266                 base2scripts.put(base, script);
    267             }
    268             if (script.isEmpty() && knownMultiScriptLanguages.contains(base)) {
    269                 base2scripts.put(base, dataInfo.getDefaultScript(base));
    270             }
    271         }
    272 
    273         // get default contents
    274         for (String localeID : defaultContents) {
    275             checkLocale(localeID, false);
    276             String simpleParent = LocaleIDParser.getSimpleParent(localeID);
    277             parent2default.put(simpleParent, localeID);
    278             default2parent.put(localeID, simpleParent);
    279             // if (!available.contains(simpleParent)) {
    280             // // verify that base language has locale in CLDR (we don't want
    281             // others)
    282             // errln("Default contents contains locale not in CLDR:\t" +
    283             // simpleParent);
    284             // }
    285         }
    286 
    287         // get likely
    288         Map<String, String> likely2Maximized = likelySubtags.getToMaximized();
    289         for (Entry<String, String> likelyAndMaximized : likely2Maximized
    290             .entrySet()) {
    291             checkLocale(likelyAndMaximized.getKey(), true);
    292             checkLocale(likelyAndMaximized.getValue(), true);
    293         }
    294         Map<String, String> exceptionDcLikely = new HashMap<String, String>();
    295         Map<String, String> exceptionLikelyDc = new HashMap<String, String>();
    296         for (String[] s : new String[][] { { "ar_001", "ar_Arab_EG" }, }) {
    297             exceptionDcLikely.put(s[0], s[1]);
    298             exceptionLikelyDc.put(s[1], s[0]);
    299         }
    300 
    301         verifyDefaultContentsImplicationsForLikelySubtags(ltp, parent2default,
    302             likely2Maximized, exceptionDcLikely);
    303 
    304         verifyLikelySubtagsImplicationsForDefaultContents(ltp, base2scripts,
    305             parent2default, likely2Maximized, exceptionLikelyDc);
    306 
    307         verifyScriptsWithDefaultContents(ltp, base2scripts, parent2default,
    308             base2locales);
    309     }
    310 
    311     public void TestParentLocaleRelationships() {
    312         // Testing invariant relationships between locales - See
    313         // http://unicode.org/cldr/trac/ticket/5758
    314         Matcher langScript = PatternCache.get("^[a-z]{2,3}_[A-Z][a-z]{3}$")
    315             .matcher("");
    316         for (String loc : availableLocales) {
    317             if (langScript.reset(loc).matches()) {
    318                 String expectedParent = loc.split("_")[0];
    319                 if (!defaultContents.contains(loc)) {
    320                     expectedParent = "root";
    321                 }
    322                 String actualParent = dataInfo.getExplicitParentLocale(loc);
    323                 if (actualParent == null) {
    324                     actualParent = loc.split("_")[0];
    325                 }
    326                 if (!actualParent.equals(expectedParent)) {
    327                     errln("Unexpected parent locale for locale " + loc
    328                         + ". Expected: " + expectedParent + " Got: "
    329                         + actualParent);
    330                 }
    331 
    332                 if (dataInfo.getExplicitParentLocale(loc) != null
    333                     && defaultContents.contains(loc)) {
    334                     errln("Locale "
    335                         + loc
    336                         + " can't have an explicit parent AND be a default content locale");
    337                 }
    338             }
    339         }
    340     }
    341 
    342     public void TestParentLocaleInvariants() {
    343         // Testing invariant relationships in parent locales - See
    344         // http://unicode.org/cldr/trac/ticket/7887
    345         LocaleIDParser lp = new LocaleIDParser();
    346         for (String loc : availableLocales) {
    347             String parentLocale = dataInfo.getExplicitParentLocale(loc);
    348             if (parentLocale != null) {
    349                 if (!"root".equals(parentLocale)
    350                     && !lp.set(loc).getLanguage()
    351                         .equals(lp.set(parentLocale).getLanguage())) {
    352                     errln("Parent locale [" + parentLocale + "] for locale ["
    353                         + loc + "] cannot be a different language code.");
    354                 }
    355                 if (!"root".equals(parentLocale)
    356                     && !lp.set(loc).getScript()
    357                         .equals(lp.set(parentLocale).getScript())) {
    358                     errln("Parent locale [" + parentLocale + "] for locale ["
    359                         + loc + "] cannot be a different script code.");
    360                 }
    361                 lp.set(loc);
    362                 if (lp.getScript().length() == 0 && lp.getRegion().length() == 0) {
    363                     errln("Base language locale [" + loc + "] cannot have an explicit parent.");
    364                 }
    365 
    366             }
    367         }
    368     }
    369 
    370     public void TestParentLocalesForCycles() {
    371         // Testing for cyclic relationships in parent locales - See
    372         // http://unicode.org/cldr/trac/ticket/7887
    373         for (String loc : availableLocales) {
    374             String currentLoc = loc;
    375             boolean foundError = false;
    376             List<String> inheritanceChain = new ArrayList<String>(Arrays.asList(loc));
    377             while (currentLoc != null && !foundError) {
    378                 currentLoc = LocaleIDParser.getParent(currentLoc);
    379                 if (inheritanceChain.contains(currentLoc)) {
    380                     foundError = true;
    381                     inheritanceChain.add(currentLoc);
    382                     errln("Inheritance chain for locale [" + loc + "] contains a cyclic relationship. " + inheritanceChain.toString());
    383                 }
    384                 inheritanceChain.add(currentLoc);
    385             }
    386         }
    387     }
    388 
    389     private void verifyScriptsWithDefaultContents(LanguageTagParser ltp,
    390         Relation<String, String> base2scripts,
    391         Map<String, String> parent2default,
    392         Relation<String, String> base2locales) {
    393         Set<String> skip = Builder.with(new HashSet<String>())
    394             .addAll("root", "und")
    395             .freeze();
    396         Set<String> languagesWithOneOrLessLocaleScriptInCommon = new HashSet<String>(Arrays.asList("bm", "ha", "ms", "iu", "mn"));
    397         // for each base we have to have,
    398         // if multiscript, we have default contents for base+script,
    399         // base+script+region;
    400         // otherwise base+region.
    401         for (String base : base2locales.keySet()) {
    402             if (skip.contains(base)) {
    403                 continue;
    404             }
    405             String defaultContent = parent2default.get(base);
    406             // Set<String> likely = base2likely.get(base);
    407             // if (likely == null) {
    408             // errln("Missing likely subtags for: " + base + "  " +
    409             // suggestLikelySubtagFor(base));
    410             // }
    411             if (defaultContent == null) {
    412                 errln("Missing default content for: " + base + "  "
    413                     + suggestLikelySubtagFor(base));
    414                 continue;
    415             }
    416             Set<String> scripts = base2scripts.get(base);
    417             ltp.set(defaultContent);
    418             String script = ltp.getScript();
    419             String region = ltp.getRegion();
    420             if (scripts == null || languagesWithOneOrLessLocaleScriptInCommon.contains(base)) {
    421                 if (!script.isEmpty()) {
    422                     errln("Script should be empty in default content for: "
    423                         + base + "," + defaultContent);
    424                 }
    425                 if (region.isEmpty()) {
    426                     errln("Region must not be empty in default content for: "
    427                         + base + "," + defaultContent);
    428                 }
    429             } else {
    430                 if (script.isEmpty()) {
    431                     errln("Script should not be empty in default content for: "
    432                         + base + "," + defaultContent);
    433                 }
    434                 if (!region.isEmpty()) {
    435                     errln("Region should be empty in default content for: "
    436                         + base + "," + defaultContent);
    437                 }
    438                 String defaultContent2 = parent2default.get(defaultContent);
    439                 if (defaultContent2 == null) {
    440                     errln("Missing default content for: " + defaultContent);
    441                     continue;
    442                 }
    443                 ltp.set(defaultContent2);
    444                 region = ltp.getRegion();
    445                 if (region.isEmpty()) {
    446                     errln("Region must not be empty in default content for: "
    447                         + base + "," + defaultContent);
    448                 }
    449             }
    450         }
    451     }
    452 
    453     private void verifyLikelySubtagsImplicationsForDefaultContents(
    454         LanguageTagParser ltp, Relation<String, String> base2scripts,
    455         Map<String, String> parent2default,
    456         Map<String, String> likely2Maximized,
    457         Map<String, String> exceptionLikelyDc) {
    458         // Now check invariants for all LikelySubtags implications for Default
    459         // Contents
    460         // a) suppose likely max for la_Scrp => la_Scrp_RG
    461         // Then default contents la_Scrp => la_Scrp_RG
    462         // b) suppose likely max for la_RG => la_Scrp_RG
    463         // Then we can draw no conclusions // was default contents la_Scrp =>
    464         // la_Scrp_RG
    465         // c) suppose likely max for la => la_Scrp_RG
    466         // Then default contents la => la_Scrp && la_Scrp => la_Scrp_RG
    467         // or default contents la => la_RG && ! la_Scrp => la_Scrp_RG
    468 
    469         TreeSet<String> additionalDefaultContents = new TreeSet<String>();
    470 
    471         for (Entry<String, String> entry : likely2Maximized.entrySet()) {
    472             String source = entry.getKey();
    473             String likelyMax = entry.getValue();
    474             String sourceLang = ltp.set(source).getLanguage();
    475             if (sourceLang.equals("und") || source.equals("zh_Hani")
    476                 || source.equals("tl")) {
    477                 continue;
    478             }
    479             String sourceScript = ltp.getScript();
    480             String sourceRegion = ltp.getRegion();
    481 
    482             String likelyMaxLang = ltp.set(likelyMax).getLanguage();
    483             String likelyMaxScript = ltp.getScript();
    484             String likelyMaxRegion = ltp.getRegion();
    485 
    486             String dc = parent2default.get(source);
    487             String possibleException = exceptionLikelyDc.get(likelyMax);
    488             if (possibleException != null && possibleException.equals(dc)) {
    489                 continue;
    490             }
    491             String likelyLangScript = likelyMaxLang + "_" + likelyMaxScript;
    492             String dcFromLangScript = parent2default.get(likelyLangScript);
    493 
    494             boolean consistent = true;
    495             String caseNumber = null;
    496             if (consistent) {
    497                 if (!sourceScript.isEmpty()) {
    498                     caseNumber = "a";
    499                     if (dc == null) {
    500                         if (EXPECT_EQUALITY) {
    501                             String expected = likelyMax;
    502                             errln("Default contents null for " + source
    503                                 + ", expected:\t" + expected);
    504                             additionalDefaultContents.add(expected);
    505                         }
    506                         continue;
    507                     }
    508                     consistent = likelyMax.equals(dc);
    509                 } else if (!sourceRegion.isEmpty()) { // a
    510                     caseNumber = "b";
    511                     // consistent = likelyMax.equals(dcFromLangScript);
    512                 } else { // c
    513                     caseNumber = "c";
    514                     if (dc == null) {
    515                         if (EXPECT_EQUALITY) {
    516                             String expected = base2scripts.get(source) == null ? likelyMaxLang
    517                                 + "_" + likelyMaxRegion
    518                                 : likelyMaxLang + "_" + likelyMaxScript;
    519                             errln("Default contents null for " + source
    520                                 + ", expected:\t" + expected);
    521                             additionalDefaultContents.add(expected);
    522                         }
    523                         continue;
    524                     }
    525                     String dcScript = ltp.set(dc).getScript();
    526                     consistent = likelyLangScript.equals(dc)
    527                         && likelyMax.equals(dcFromLangScript)
    528                         || dcScript.isEmpty()
    529                             && !likelyMax.equals(dcFromLangScript);
    530                     // || dcScript.isEmpty() && dcRegion.equals(likelyMaxRegion)
    531                     // && dcFromLangScript == null;
    532                 }
    533             }
    534             if (!consistent) {
    535                 errln("default contents inconsistent with likely subtag: ("
    536                     + caseNumber + ")" + "\n\t" + source + " => (ls) "
    537                     + likelyMax + "\n\t" + source + " => (dc) " + dc
    538                     + "\n\t" + likelyLangScript + " => (dc) "
    539                     + dcFromLangScript);
    540             }
    541         }
    542         if (additionalDefaultContents.size() != 0) {
    543             errln("Suggested additions to supplementalMetadata/../defaultContent:\n"
    544                 + CollectionUtilities.join(additionalDefaultContents, " "));
    545         }
    546     }
    547 
    548     private void verifyDefaultContentsImplicationsForLikelySubtags(
    549         LanguageTagParser ltp, Map<String, String> parent2default,
    550         Map<String, String> likely2Maximized,
    551         Map<String, String> exceptionDcLikely) {
    552         // Now check invariants for all Default Contents implications for
    553         // LikelySubtags
    554         // a) suppose default contents la => la_Scrp.
    555         // Then the likely contents for la => la_Scrp_*
    556         // b) suppose default contents la => la_RG.
    557         // Then the likely contents for la => la_*_RG
    558         // c) suppose default contents la_Scrp => la_Scrp_RG.
    559         // Then the likely contents of la_Scrp => la_Scrp_RG OR likely contents
    560         // for la => la_*_*
    561         for (Entry<String, String> parentAndDefault : parent2default.entrySet()) {
    562             String source = parentAndDefault.getKey();
    563             String dc = parentAndDefault.getValue();
    564             String likelyMax = likely2Maximized.get(source);
    565 
    566             // skip special exceptions
    567             String possibleException = exceptionDcLikely.get(dc);
    568             if (possibleException != null
    569                 && possibleException.equals(likelyMax)) {
    570                 continue;
    571             }
    572 
    573             String sourceLang = ltp.set(source).getLanguage();
    574             String sourceScript = ltp.getScript();
    575             // there cannot be a sourceRegion
    576 
    577             String dcScript = ltp.set(dc).getScript();
    578             String dcRegion = ltp.getRegion();
    579 
    580             String likelyMaxLang = "", likelyMaxScript = "", likelyMaxRegion = "";
    581             if (likelyMax != null) {
    582                 likelyMaxLang = ltp.set(likelyMax).getLanguage();
    583                 likelyMaxScript = ltp.getScript();
    584                 likelyMaxRegion = ltp.getRegion();
    585             }
    586 
    587             String likelyMax2 = likely2Maximized.get(sourceLang);
    588 
    589             boolean consistent = true;
    590 
    591             if (sourceScript.isEmpty()) { // a or b
    592                 if (!dcScript.isEmpty()) { // a
    593                     consistent = likelyMaxLang.equals(source)
    594                         && likelyMaxScript.equals(dcScript);
    595                 } else { // b
    596                     consistent = likelyMaxLang.equals(source)
    597                         && likelyMaxRegion.equals(dcRegion);
    598                 }
    599             } else { // c
    600                 consistent = dc.equals(likelyMax) || likelyMax2 != null;
    601             }
    602             if (!consistent) {
    603                 errln("likely subtag inconsistent with default contents: "
    604                     + "\n\t"
    605                     + source
    606                     + " =>( dc) "
    607                     + dc
    608                     + "\n\t"
    609                     + source
    610                     + " => (ls) "
    611                     + likelyMax
    612                     + (source.equals(sourceLang) ? "" : "\n\t" + sourceLang
    613                         + " => (ls) " + likelyMax2));
    614             }
    615         }
    616     }
    617 
    618     /**
    619      * Suggest a likely subtag
    620      *
    621      * @param base
    622      * @return
    623      */
    624     static String suggestLikelySubtagFor(String base) {
    625         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
    626 
    627         CLDRLocale loc = CLDRLocale.getInstance(base);
    628 
    629         if (!loc.getLanguage().equals(base)) {
    630             return " (no suggestion- not a simple language locale)"; // no
    631             // suggestion
    632             // unless
    633             // just
    634             // a
    635             // language
    636             // locale.
    637         }
    638         Set<BasicLanguageData> basicData = sdi.getBasicLanguageData(base);
    639 
    640         for (BasicLanguageData bld : basicData) {
    641             if (bld.getType() == org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type.primary) {
    642                 Set<String> scripts = bld.getScripts();
    643                 Set<String> territories = bld.getTerritories();
    644 
    645                 if (scripts.size() == 1) {
    646                     if (territories.size() == 1) {
    647                         return createSuggestion(
    648                             loc,
    649                             CLDRLocale.getInstance(base + "_"
    650                                 + scripts.iterator().next() + "_"
    651                                 + territories.iterator().next()));
    652                     }
    653                 }
    654                 return "(no suggestion - multiple scripts or territories)";
    655             }
    656         }
    657         return ("(no suggestion- no data)");
    658     }
    659 
    660     /**
    661      * Format and return a suggested likelysubtag
    662      */
    663     private static String createSuggestion(CLDRLocale loc, CLDRLocale toLoc) {
    664         return " Suggest this to likelySubtags.xml:        <likelySubtag from=\""
    665             + loc
    666             + "\" to=\""
    667             + toLoc
    668             + "\"/>\n"
    669             + "        <!--{ "
    670             + loc.getDisplayName()
    671             + "; ?; ? } => { "
    672             + loc.getDisplayName()
    673             + "; "
    674             + toLoc.toULocale().getDisplayScript()
    675             + "; "
    676             + toLoc.toULocale().getDisplayCountry() + " }-->";
    677 
    678     }
    679 
    680     public void TestDeprecatedTerritoryDataLocaleIds() {
    681         HashSet<String> checked = new HashSet<String>();
    682         for (String language : dataInfo
    683             .getLanguagesForTerritoriesPopulationData()) {
    684             checkLocale(language, false); // checks la_Scrp and la
    685             for (String region : dataInfo
    686                 .getTerritoriesForPopulationData(language)) {
    687                 if (!checked.contains(region)) {
    688                     checkValidCode(language + "_" + region, "territory",
    689                         region, false);
    690                     checked.add(region);
    691                 }
    692             }
    693         }
    694         for (String language : dataInfo.getBasicLanguageDataLanguages()) {
    695             checkLocale(language, false); // checks la_Scrp and la
    696             Set<BasicLanguageData> data = dataInfo
    697                 .getBasicLanguageData(language);
    698             for (BasicLanguageData datum : data) {
    699                 for (String script : datum.getScripts()) {
    700                     checkValidCode(language + "_" + script, "script", script,
    701                         false);
    702                     checked.add(script);
    703                 }
    704                 for (String region : datum.getTerritories()) {
    705                     checkValidCode(language + "_" + region, "territory",
    706                         region, false);
    707                     checked.add(region);
    708                 }
    709             }
    710         }
    711 
    712     }
    713 
    714     public void TestBasicLanguageDataAgainstScriptMetadata() {
    715         // the invariants are:
    716         // if there is primary data, the script must be there
    717         // otherwise it must be in the secondary
    718         main: for (String script : ScriptMetadata.getScripts()) {
    719             Info info = ScriptMetadata.getInfo(script);
    720             String language = info.likelyLanguage;
    721             if (language.equals("und")) {
    722                 continue;
    723             }
    724             Map<Type, BasicLanguageData> data = dataInfo
    725                 .getBasicLanguageDataMap(language);
    726             if (data == null) {
    727                 logln("Warning: ScriptMetadata has " + language + " for "
    728                     + script + "," + " but " + language
    729                     + " is missing in language_script.txt");
    730                 continue;
    731             }
    732             for (BasicLanguageData entry : data.values()) {
    733                 if (entry.getScripts().contains(script)) {
    734                     continue main;
    735                 }
    736                 continue;
    737             }
    738             logln("Warning: ScriptMetadata has " + language + " for " + script
    739                 + "," + " but " + language + " doesn't have " + script
    740                 + " in language_script.txt");
    741         }
    742     }
    743 
    744     public void TestCldrFileConsistency() {
    745         boolean haveErrors = false;
    746         for (String locale : testInfo.getCldrFactory().getAvailable()) {
    747             CLDRFile cldrFileToCheck = testInfo.getCLDRFile(locale,
    748                 false);
    749             int errors = 0;
    750             for (String path : cldrFileToCheck) {
    751                 if (!pathMatcher.reset(path).find()) {
    752                     continue;
    753                 }
    754                 String fullPath = cldrFileToCheck.getFullXPath(path);
    755                 if (fullPath == null) {
    756                     // try again, for debugging
    757                     fullPath = cldrFileToCheck.getFullXPath(path);
    758                     String value = cldrFileToCheck.getStringValue(path);
    759                     if (DEBUG) {
    760                         errln("Invalid full path\t" + locale + ", " + path
    761                             + ", " + fullPath + ", " + value);
    762                     }
    763                     errors++;
    764                     haveErrors = true;
    765                 }
    766             }
    767             if (errors != 0) {
    768                 errln(locale
    769                     + (errors != 0 ? "\tinvalid getFullXPath() values:"
    770                         + errors : ""));
    771             } else {
    772                 logln(locale);
    773             }
    774         }
    775         if (haveErrors && !DEBUG) {
    776             errln("Use -DDEBUG to see details");
    777         }
    778     }
    779 
    780     static SupplementalDataInfo info = SupplementalDataInfo.getInstance();
    781     LanguageTagParser ltp = new LanguageTagParser();
    782 
    783     // public void TestAliases() {
    784     // Factory factory = Factory.make(CldrUtility.MAIN_DIRECTORY, fileMatcher);
    785     // Set<String> allLocales = Factory.make(CldrUtility.MAIN_DIRECTORY,
    786     // ".*").getAvailable();
    787     //
    788     // LanguageTagCanonicalizer languageTagCanonicalizer = new
    789     // LanguageTagCanonicalizer();
    790     //
    791     // Set<String> defaultContents = info.getDefaultContentLocales();
    792     //
    793     // Map<String, String> likelySubtags = info.getLikelySubtags();
    794     //
    795     // XPathParts xpp = new XPathParts();
    796     //
    797     // // get the top level aliases, and verify that they are consistent with
    798     // // maximization
    799     // Map<String, String> topLevelAliases = new TreeMap<String, String>();
    800     // Set<String> crossScriptSet = new TreeSet<String>();
    801     // Set<String> aliasPaths = new TreeSet<String>();
    802     // Set<String> locales = factory.getAvailable();
    803     //
    804     // // get the languages that need scripts
    805     // // TODO broaden to beyond CLDR
    806     // Set<String> needScripts = new TreeSet<String>();
    807     // for (String locale : locales) {
    808     // String script = ltp.set(locale).getScript();
    809     // if (script.length() != 0) {
    810     // needScripts.add(ltp.getLanguage());
    811     // }
    812     // }
    813     //
    814     // logln("Languages that have scripts:\t" + needScripts);
    815     //
    816     // for (String locale : locales) {
    817     //
    818     // // get alias locale
    819     // String aliasLocale = locale;
    820     // String explicitAlias = null;
    821     // String aliasPathNew = null;
    822     // CLDRFile cldrFileToCheck = factory.make(locale, false);
    823     // aliasPaths.clear();
    824     // // examples:
    825     // // in: <alias source="id" path="//ldml"/>
    826     // // ar_IR: <alias source="az_Arab_IR" path="//ldml"/>
    827     //
    828     // cldrFileToCheck.getPaths("//ldml/alias", null, aliasPaths);
    829     // if (aliasPaths.size() != 0) {
    830     // String aliasPath = aliasPaths.iterator().next();
    831     // String fullPath = cldrFileToCheck.getFullXPath(aliasPath);
    832     // explicitAlias = aliasLocale = xpp.set(fullPath).getAttributeValue(1,
    833     // "source");
    834     // String aliasParent = LocaleIDParser.getParent(aliasLocale);
    835     // if (!aliasParent.equals("root")) {
    836     // topLevelAliases.put(locale, aliasParent);
    837     // }
    838     // aliasPathNew = xpp.set(fullPath).getAttributeValue(1, "path");
    839     // if ("//ldml/".equals(aliasPathNew)) {
    840     // errln("Bad alias path:\t" + fullPath);
    841     // }
    842     // }
    843     //
    844     // checkAliasValues(cldrFileToCheck, allLocales);
    845     //
    846     // // get canonicalized
    847     // String canonicalizedLocale = languageTagCanonicalizer.transform(locale);
    848     // if (!locale.equals(canonicalizedLocale)) {
    849     // logln("Locale\t" + locale + " => " + canonicalizedLocale);
    850     // }
    851     //
    852     // String base = ltp.set(canonicalizedLocale).getLanguage();
    853     // String script = ltp.getScript();
    854     // if (canonicalizedLocale.equals(base)) { // eg, id, az
    855     // continue;
    856     // }
    857     //
    858     // // see if the locale's default script is the same as the base locale's
    859     //
    860     // String maximized = maximize(likelySubtags, canonicalizedLocale);
    861     // if (maximized == null) {
    862     // errln("Missing likely subtags for:\t" + locale + "  " +
    863     // suggestLikelySubtagFor(locale));
    864     // continue;
    865     // }
    866     // String maximizedScript = ltp.set(maximized).getScript();
    867     //
    868     // String minimized = minimize(likelySubtags, canonicalizedLocale);
    869     //
    870     // String baseMaximized = maximize(likelySubtags, base);
    871     // String baseScript = ltp.set(baseMaximized).getScript();
    872     //
    873     // if (script.length() != 0 && !script.equals(baseScript)) {
    874     // crossScriptSet.add(ltp.set(locale).getLanguageScript());
    875     // }
    876     //
    877     // // Finally, put together the expected alias for comparison.
    878     // // It is the "best" alias, in that the default-content locales are
    879     // skipped in favor of their parents
    880     //
    881     // String expectedAlias =
    882     // !baseScript.equals(maximizedScript) ? minimized :
    883     // !locale.equals(canonicalizedLocale) ? canonicalizedLocale :
    884     // // needScripts.contains(base) ? ltp.getLanguageScript() :
    885     // locale;
    886     //
    887     // if (!equals(aliasLocale, expectedAlias)) {
    888     // String aliasMaximized = maximize(likelySubtags, aliasLocale);
    889     // String expectedMaximized = maximize(likelySubtags, expectedAlias);
    890     // if (!equals(aliasMaximized, expectedMaximized)) {
    891     // errln("For locale:\t" + locale
    892     // + ",\tbase-script:\t" + baseScript
    893     // + ",\texpected alias Locale != actual alias Locale:\t"
    894     // + expectedAlias + ", " + aliasLocale);
    895     // } else if (explicitAlias == null) {
    896     // // skip, we don't care in this case
    897     // // but we emit warnings if the other conditions are true. The aliasing
    898     // could be simpler.
    899     // } else if (equals(expectedAlias, locale)) {
    900     // logln("Warning; alias could be omitted. For locale:\t" + locale
    901     // + ",\tbase-script:\t" + baseScript
    902     // + ",\texpected alias Locale != actual alias Locale:\t"
    903     // + expectedAlias + ", " + aliasLocale);
    904     // } else {
    905     // logln("Warning; alias could be minimized. For locale:\t" + locale
    906     // + ",\tbase-script:\t" + baseScript
    907     // + ",\texpected alias Locale != actual alias Locale:\t"
    908     // + expectedAlias + ", " + aliasLocale);
    909     // }
    910     // }
    911     // }
    912     //
    913     // // check the LocaleIDParser.TOP_LEVEL_ALIAS_LOCALES value and make sure
    914     // it matches what is in the files in main/
    915     //
    916     // if (!topLevelAliases.equals(LocaleIDParser.TOP_LEVEL_ALIAS_LOCALES)
    917     // && locales.equals(allLocales)) {
    918     // String diff = showDifferences(LocaleIDParser.TOP_LEVEL_ALIAS_LOCALES,
    919     // topLevelAliases);
    920     // if (!diff.isEmpty()) {
    921     // errln("LocaleIDParser.TOP_LEVEL_ALIAS_LOCALES  topLevelAliases: " +
    922     // diff);
    923     // }
    924     // StringBuilder result = new StringBuilder(
    925     // "Suggest changing LocaleIDParser.TOP_LEVEL_ALIAS_LOCALES to:\n");
    926     // for (Entry<String, String> entry : topLevelAliases.entrySet()) {
    927     // result.append("\t.put(\"")
    928     // .append(entry.getKey())
    929     // .append("\", \"")
    930     // .append(entry.getValue())
    931     // .append("\")\n");
    932     // }
    933     // errln(result.toString());
    934     // } else {
    935     // logln("Top Level Aliases:\t" + topLevelAliases);
    936     // }
    937     //
    938     // // verify that they are the same as what we would get if we were to
    939     // maximize
    940     // // all the locales and check against default_contents
    941     //
    942     // for (String locale : defaultContents) {
    943     // CLDRFile cldrFileToCheck = null;
    944     // try {
    945     // cldrFileToCheck = factory.make(locale, false);
    946     // } catch (Exception e) {}
    947     // if (cldrFileToCheck == null) {
    948     // logln("Present in default contents but has no XML file:\t" + locale);
    949     // continue;
    950     // }
    951     // logln("Locale:\t" + locale);
    952     // // verify empty, except for identity elements and alias
    953     // for (String path : cldrFileToCheck) {
    954     // if (path.contains("/identity/")) {
    955     // continue;
    956     // }
    957     // errln("Default content locale not empty:\t" + locale + ", " + path);
    958     // break;
    959     // }
    960     // }
    961     // }
    962 
    963     Matcher aliasMatcher = PatternCache.get("//ldml.*/alias.*").matcher("");
    964 
    965     private void checkAliasValues(CLDRFile cldrFileToCheck, Set<String> locales) {
    966         Set<String> aliasPaths = new TreeSet<String>();
    967         Set<String> allAliasPaths = cldrFileToCheck.getPaths("//ldml/",
    968             aliasMatcher, aliasPaths);
    969         XPathParts xpp = new XPathParts();
    970         for (String aliasPath : allAliasPaths) {
    971             if (aliasPath.startsWith("//ldml/alias")) {
    972                 continue; // we have different tests elsewhere
    973             }
    974             String fullPath = cldrFileToCheck.getFullXPath(aliasPath);
    975             String aliasLocale = xpp.set(fullPath).getAttributeValue(-1,
    976                 "source");
    977             // just check to make sure that the alias is in the locales
    978             if (aliasLocale != null && !aliasLocale.equals("locale")) {
    979                 if (!locales.contains(aliasLocale)) {
    980                     errln("Unknown Alias:\t" + aliasLocale + "\t in\t"
    981                         + fullPath);
    982                 }
    983             }
    984             String aliasPathNew = xpp.set(fullPath).getAttributeValue(-1,
    985                 "path");
    986             // just one check
    987             if (".".equals(aliasPathNew)) {
    988                 errln("Illegal path, must not be .:\t" + aliasLocale
    989                     + "\t in\t" + fullPath);
    990             }
    991 
    992         }
    993     }
    994 
    995     private String minimize(Map<String, String> likelySubtags, String locale) {
    996         String result = GenerateMaximalLocales.minimize(locale, likelySubtags,
    997             false);
    998         if (result == null) {
    999             LanguageTagParser ltp3 = new LanguageTagParser().set(locale);
   1000             List<String> variants = ltp3.getVariants();
   1001             Map<String, String> extensions = ltp3.getExtensions();
   1002             Set<String> emptySet = Collections.emptySet();
   1003             ltp3.setVariants(emptySet);
   1004             Map<String, String> emptyMap = Collections.emptyMap();
   1005             ltp3.setExtensions(emptyMap);
   1006             String newLocale = ltp3.toString();
   1007             result = GenerateMaximalLocales.minimize(newLocale, likelySubtags,
   1008                 false);
   1009             if (result != null) {
   1010                 ltp3.set(result);
   1011                 ltp3.setVariants(variants);
   1012                 ltp3.setExtensions(extensions);
   1013                 result = ltp3.toString();
   1014             }
   1015         }
   1016         return result;
   1017     }
   1018 
   1019     private String maximize(Map<String, String> likelySubtags, String locale) {
   1020         String result = GenerateMaximalLocales.maximize(locale, likelySubtags);
   1021         if (result == null) {
   1022             LanguageTagParser ltp3 = new LanguageTagParser().set(locale);
   1023             List<String> variants = ltp3.getVariants();
   1024             Map<String, String> extensions = ltp3.getExtensions();
   1025             Set<String> emptySet = Collections.emptySet();
   1026             ltp3.setVariants(emptySet);
   1027             Map<String, String> emptyMap = Collections.emptyMap();
   1028             ltp3.setExtensions(emptyMap);
   1029             String newLocale = ltp3.toString();
   1030             result = GenerateMaximalLocales.maximize(newLocale, likelySubtags);
   1031             if (result != null) {
   1032                 ltp3.set(result);
   1033                 ltp3.setVariants(variants);
   1034                 ltp3.setExtensions(extensions);
   1035                 result = ltp3.toString();
   1036             }
   1037         }
   1038         return result;
   1039     }
   1040 
   1041     // TODO move this into central utilities
   1042     public static boolean equals(CharSequence string, int codePoint) {
   1043         if (string == null) {
   1044             return false;
   1045         }
   1046         switch (string.length()) {
   1047         case 1:
   1048             return codePoint == string.charAt(0);
   1049         case 2:
   1050             return codePoint >= 0x10000
   1051                 && codePoint == Character.codePointAt(string, 0);
   1052         default:
   1053             return false;
   1054         }
   1055     }
   1056 
   1057     // TODO move this into central utilities
   1058 
   1059     private static final StandardCodes STANDARD_CODES = testInfo.getStandardCodes();
   1060     private static final Map<String, Map<String, R2<List<String>, String>>> DEPRECATED_INFO = dataInfo
   1061         .getLocaleAliasInfo();
   1062 
   1063     private void checkLocale(String localeID, boolean allowDeprecated) {
   1064         // verify that the localeID is valid
   1065         LanguageTagParser ltp = new LanguageTagParser().set(localeID);
   1066         String language = ltp.getLanguage();
   1067         String script = ltp.getScript();
   1068         String region = ltp.getRegion();
   1069         // TODO check variants, extensions also.
   1070         checkValidCode(localeID, "language", language, allowDeprecated);
   1071         checkValidCode(localeID, "script", script, allowDeprecated);
   1072         checkValidCode(localeID, "territory", region, allowDeprecated);
   1073     }
   1074 
   1075     private void checkValidCode(String localeID, String subtagType,
   1076         String subtag, boolean allowDeprecated) {
   1077         if (subtagType.equals("language")) {
   1078             if (subtag.equals("und")) {
   1079                 return;
   1080             }
   1081         } else {
   1082             if (subtag.isEmpty()) {
   1083                 return;
   1084             }
   1085         }
   1086         if (!STANDARD_CODES.getAvailableCodes(subtagType).contains(subtag)) {
   1087             errln("Locale " + localeID + " contains illegal "
   1088                 + showCode(subtagType, subtag));
   1089         } else if (!allowDeprecated) {
   1090             // "language" -> "sh" -> <{"sr_Latn"}, reason>
   1091             R2<List<String>, String> deprecatedInfo = DEPRECATED_INFO.get(
   1092                 subtagType).get(subtag);
   1093             if (deprecatedInfo != null) {
   1094                 errln("Locale " + localeID + " contains deprecated "
   1095                     + showCode(subtagType, subtag) + " "
   1096                     + deprecatedInfo.get1() + "; suggest "
   1097                     + showName(deprecatedInfo.get0(), subtagType));
   1098             }
   1099         }
   1100     }
   1101 
   1102     private String showName(List<String> deprecatedInfo, String subtagType) {
   1103         StringBuilder result = new StringBuilder();
   1104         for (String s : deprecatedInfo) {
   1105             result.append(showName(subtagType, s)).append(" ");
   1106         }
   1107         return result.toString();
   1108     }
   1109 
   1110     private String showCode(String subtagType, String subtag) {
   1111         return subtagType + " code: " + showName(subtagType, subtag);
   1112     }
   1113 
   1114     private String showName(String subtagType, String subtag) {
   1115         return subtag + " (" + getName(subtagType, subtag) + ")";
   1116     }
   1117 
   1118     private String getName(String subtagType, String subtag) {
   1119         Map<String, String> data = STANDARD_CODES.getLangData(subtagType,
   1120             subtag);
   1121         if (data == null) {
   1122             return "<no name>";
   1123         }
   1124         return data.get("Description");
   1125     }
   1126 
   1127     // TODO move this into central utilities
   1128     public static boolean equals(int codePoint, CharSequence string) {
   1129         return equals(string, codePoint);
   1130     }
   1131 
   1132     // TODO move this into central utilities
   1133     public static boolean equals(Object a, Object b) {
   1134         return a == b ? true : a == null || b == null ? false : a.equals(b);
   1135     }
   1136 
   1137     // TODO move this into central utilities
   1138     private <K, V> String showDifferences(Map<K, V> a, Map<K, V> b) {
   1139         StringBuilder result = new StringBuilder();
   1140         Set<K> keys = new LinkedHashSet<K>();
   1141         keys.addAll(a.keySet());
   1142         keys.addAll(b.keySet());
   1143         for (K key : keys) {
   1144             if (!a.containsKey(key)) {
   1145                 result.append(key).append("").append(a.get(key))
   1146                     .append(",; ");
   1147             } else if (!b.containsKey(key)) {
   1148                 result.append(key).append(",").append(b.get(key))
   1149                     .append("; ");
   1150             } else {
   1151                 V aKey = a.get(key);
   1152                 V bKey = b.get(key);
   1153                 if (!equals(aKey, bKey)) {
   1154                     result.append(key).append("").append(a.get(key))
   1155                         .append(",").append(b.get(key)).append("; ");
   1156                 }
   1157             }
   1158         }
   1159         return result.toString();
   1160     }
   1161 
   1162     public void TestLanguageTagParser() {
   1163         LanguageTagParser ltp = new LanguageTagParser();
   1164         ltp.set("en-Cyrl-US");
   1165         assertEquals(null, "en", ltp.getLanguage());
   1166         assertEquals(null, "en_Cyrl", ltp.getLanguageScript());
   1167         assertEquals(null, "Cyrl", ltp.getScript());
   1168         assertEquals(null, "US", ltp.getRegion());
   1169         try {
   1170             ltp.set("$");
   1171             assertFalse("expected exception", true);
   1172         } catch (Exception e) {
   1173             logln(e.getMessage());
   1174         }
   1175     }
   1176 }
   1177