Home | History | Annotate | Download | only in unittest
      1 package org.unicode.cldr.unittest;
      2 
      3 import java.util.ArrayList;
      4 import java.util.Arrays;
      5 import java.util.Collection;
      6 import java.util.Collections;
      7 import java.util.Date;
      8 import java.util.EnumMap;
      9 import java.util.EnumSet;
     10 import java.util.HashMap;
     11 import java.util.HashSet;
     12 import java.util.Iterator;
     13 import java.util.LinkedHashMap;
     14 import java.util.LinkedHashSet;
     15 import java.util.List;
     16 import java.util.Locale;
     17 import java.util.Map;
     18 import java.util.Map.Entry;
     19 import java.util.Set;
     20 import java.util.TreeMap;
     21 import java.util.TreeSet;
     22 import java.util.regex.Matcher;
     23 import java.util.regex.Pattern;
     24 
     25 import org.unicode.cldr.draft.ScriptMetadata;
     26 import org.unicode.cldr.test.CoverageLevel2;
     27 import org.unicode.cldr.tool.LikelySubtags;
     28 import org.unicode.cldr.tool.PluralMinimalPairs;
     29 import org.unicode.cldr.tool.PluralRulesFactory;
     30 import org.unicode.cldr.util.Builder;
     31 import org.unicode.cldr.util.CLDRConfig;
     32 import org.unicode.cldr.util.CLDRFile;
     33 import org.unicode.cldr.util.CLDRFile.WinningChoice;
     34 import org.unicode.cldr.util.CLDRLocale;
     35 import org.unicode.cldr.util.CldrUtility;
     36 import org.unicode.cldr.util.Iso639Data;
     37 import org.unicode.cldr.util.Iso639Data.Scope;
     38 import org.unicode.cldr.util.IsoCurrencyParser;
     39 import org.unicode.cldr.util.LanguageTagCanonicalizer;
     40 import org.unicode.cldr.util.LanguageTagParser;
     41 import org.unicode.cldr.util.Level;
     42 import org.unicode.cldr.util.Organization;
     43 import org.unicode.cldr.util.Pair;
     44 import org.unicode.cldr.util.PluralRanges;
     45 import org.unicode.cldr.util.PreferredAndAllowedHour;
     46 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle;
     47 import org.unicode.cldr.util.StandardCodes;
     48 import org.unicode.cldr.util.StandardCodes.CodeType;
     49 import org.unicode.cldr.util.StandardCodes.LstrType;
     50 import org.unicode.cldr.util.SupplementalDataInfo;
     51 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
     52 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
     53 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle;
     54 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
     55 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo;
     56 import org.unicode.cldr.util.SupplementalDataInfo.DateRange;
     57 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange;
     58 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
     59 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
     60 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
     61 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
     62 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
     63 import org.unicode.cldr.util.SupplementalDataInfo.SampleList;
     64 import org.unicode.cldr.util.Validity;
     65 import org.unicode.cldr.util.Validity.Status;
     66 
     67 import com.google.common.collect.ImmutableSet;
     68 import com.google.common.collect.Multimap;
     69 import com.google.common.collect.TreeMultimap;
     70 import com.ibm.icu.dev.util.CollectionUtilities;
     71 import com.ibm.icu.impl.Relation;
     72 import com.ibm.icu.impl.Row;
     73 import com.ibm.icu.impl.Row.R2;
     74 import com.ibm.icu.impl.Row.R3;
     75 import com.ibm.icu.impl.Utility;
     76 import com.ibm.icu.lang.UCharacter;
     77 import com.ibm.icu.lang.UCharacterEnums;
     78 import com.ibm.icu.lang.UScript;
     79 import com.ibm.icu.text.PluralRules;
     80 import com.ibm.icu.text.PluralRules.FixedDecimal;
     81 import com.ibm.icu.text.PluralRules.FixedDecimalRange;
     82 import com.ibm.icu.text.PluralRules.FixedDecimalSamples;
     83 import com.ibm.icu.text.PluralRules.SampleType;
     84 import com.ibm.icu.text.StringTransform;
     85 import com.ibm.icu.text.UnicodeSet;
     86 import com.ibm.icu.util.Output;
     87 import com.ibm.icu.util.TimeZone;
     88 import com.ibm.icu.util.ULocale;
     89 
     90 public class TestSupplementalInfo extends TestFmwkPlus {
     91     static CLDRConfig testInfo = CLDRConfig.getInstance();
     92 
     93     private static final StandardCodes STANDARD_CODES = testInfo
     94         .getStandardCodes();
     95 
     96     private static final SupplementalDataInfo SUPPLEMENTAL = testInfo
     97         .getSupplementalDataInfo();
     98 
     99     public static void main(String[] args) {
    100         new TestSupplementalInfo().run(args);
    101     }
    102 
    103     public void TestPluralSampleOrder() {
    104         HashSet<PluralInfo> seen = new HashSet<PluralInfo>();
    105         for (String locale : SUPPLEMENTAL.getPluralLocales()) {
    106             if (locale.equals("root")) {
    107                 continue;
    108             }
    109             PluralInfo pi = SUPPLEMENTAL.getPlurals(locale);
    110             if (seen.contains(pi)) {
    111                 continue;
    112             }
    113             seen.add(pi);
    114             for (SampleType s : SampleType.values()) {
    115                 for (Count c : pi.getCounts(s)) {
    116                     FixedDecimalSamples sSamples = pi.getPluralRules()
    117                         .getDecimalSamples(c.toString(), s);
    118                     if (sSamples == null) {
    119                         errln(locale + " no sample for " + c);
    120                         continue;
    121                     }
    122                     if (s == SampleType.DECIMAL) {
    123                         continue; // skip
    124                     }
    125                     FixedDecimalRange lastSample = null;
    126                     for (FixedDecimalRange sample : sSamples.samples) {
    127                         if (lastSample != null) {
    128                             if (lastSample.start.compareTo(sample.start) > 0) {
    129                                 errln(locale + ":" + c + ": out of order with "
    130                                     + lastSample + " > " + sample);
    131                             } else if (false) {
    132                                 logln(locale + ":" + c + ": in order with "
    133                                     + lastSample + " < " + sample);
    134                             }
    135                         }
    136                         lastSample = sample;
    137                     }
    138                 }
    139             }
    140         }
    141     }
    142 
    143     public void TestPluralRanges() {
    144         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
    145         Set<String> localesToTest = new TreeSet<String>(
    146             SUPPLEMENTAL.getPluralRangesLocales());
    147         for (String locale : StandardCodes.make().getLocaleCoverageLocales(
    148             "google")) { // superset
    149             if (locale.equals("*") || locale.contains("_")) {
    150                 continue;
    151             }
    152             localesToTest.add(locale);
    153         }
    154         Set<String> modernLocales = testInfo.getStandardCodes()
    155             .getLocaleCoverageLocales(Organization.cldr,
    156                 EnumSet.of(Level.MODERN));
    157 
    158         Output<FixedDecimal> maxSample = new Output<FixedDecimal>();
    159         Output<FixedDecimal> minSample = new Output<FixedDecimal>();
    160 
    161         for (String locale : localesToTest) {
    162             final String templateLine = "Template for " + ULocale.getDisplayName(locale, "en") + " (" + locale + ") translators to fix:";
    163             PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
    164             Set<Count> counts = pluralInfo.getCounts();
    165 
    166             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(new ULocale(locale).toString());
    167 
    168             // check that there are no null values
    169             PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
    170             if (pluralRanges == null) {
    171                 if (!modernLocales.contains(locale)) {
    172                     logln("Missing plural ranges for " + locale);
    173                 } else {
    174                     errOrLog(CoverageIssue.error, locale + "\tMissing plural ranges", "Cldrbug:7839", "Missing plural data for modern locales");
    175                     StringBuilder failureCases = new StringBuilder(templateLine);
    176                     for (Count start : counts) {
    177                         for (Count end : counts) {
    178                             pluralInfo.rangeExists(start, end, minSample, maxSample);
    179                             final String rangeLine = getRangeLine(start, end, null, maxSample, minSample, samplePatterns);
    180                             failureCases.append("\n" + locale + "\t" + rangeLine);
    181                         }
    182                     }
    183                     errOrLog(CoverageIssue.warn, failureCases.toString());
    184                 }
    185                 continue;
    186             }
    187             EnumSet<Count> found = EnumSet.noneOf(Count.class);
    188             for (Count count : Count.values()) {
    189                 if (pluralRanges.isExplicitlySet(count)
    190                     && !counts.contains(count)) {
    191                     assertTrue(
    192                         locale
    193                             + "\t pluralRanges categories must be valid for locale:\t"
    194                             + count + " must be in " + counts,
    195                         !pluralRanges.isExplicitlySet(count));
    196                 }
    197                 for (Count end : Count.values()) {
    198                     Count result = pluralRanges.getExplicit(count, end);
    199                     if (result != null) {
    200                         found.add(result);
    201                     }
    202                 }
    203             }
    204 
    205             // check empty range results
    206             if (found.isEmpty()) {
    207                 errOrLog(CoverageIssue.error, "Empty range results for " + locale, "Cldrbug:7839", "Missing plural data for modern locales");
    208             } else {
    209                 if (samplePatterns == null) {
    210                     errOrLog(CoverageIssue.error, locale + "\tMissing sample patterns", "Cldrbug:7839", "Missing plural data for modern locales");
    211                 } else {
    212                     for (Count result : found) {
    213                         String samplePattern = samplePatterns.get(
    214                             PluralRules.PluralType.CARDINAL, result);
    215                         if (samplePattern != null && !samplePattern.contains("{0}")) {
    216                             errln("Plural Ranges cannot have results that don't use {0} in samples: "
    217                                 + locale
    218                                 + ", "
    219                                 + result
    220                                 + "\t"
    221                                 + samplePattern + "");
    222                         }
    223                     }
    224                 }
    225                 if (isVerbose()) {
    226                     logln("Range results for " + locale + ":\t" + found);
    227                 }
    228             }
    229 
    230             // check for missing values
    231             boolean failure = false;
    232             StringBuilder failureCases = new StringBuilder(templateLine);
    233             for (Count start : counts) {
    234                 for (Count end : counts) {
    235                     boolean needsValue = pluralInfo.rangeExists(start, end,
    236                         minSample, maxSample);
    237                     Count explicitValue = pluralRanges.getExplicit(start, end);
    238                     final String rangeLine = getRangeLine(start, end, explicitValue, maxSample, minSample, samplePatterns);
    239                     failureCases.append("\n" + locale + "\t" + rangeLine);
    240                     if (needsValue && explicitValue == null) {
    241                         errOrLog(CoverageIssue.error, locale + "\tNo explicit value for range: "
    242                             + rangeLine,
    243                             "Cldrbug:7839", "Missing plural data for modern locales");
    244                         failure = true;
    245                         failureCases.append("\tError  need explicit result");
    246                     } else if (!needsValue && explicitValue != null) {
    247                         errOrLog(CoverageIssue.error, locale + "\tDoesn't need explicit value, but has one: "
    248                             + PluralRanges.showRange(start, end, explicitValue),
    249                             "Cldrbug:7839", "Missing plural data for modern locales");
    250                         failureCases.append("\tUnnecessary");
    251                         failure = true;
    252                     } else {
    253                         failureCases.append("\tOK");
    254                     }
    255                 }
    256             }
    257             if (failure) {
    258                 errOrLog(CoverageIssue.warn, failureCases.toString());
    259             }
    260         }
    261     }
    262 
    263     private String getRangeLine(Count start, Count end, Count result,
    264         Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample,
    265         PluralMinimalPairs samplePatterns) {
    266         final String range = minSample + "" + maxSample;
    267         String example = range;
    268         if (samplePatterns != null) {
    269             example = "";
    270             if (result != null) {
    271                 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result);
    272                 example += "" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "";
    273             } else {
    274                 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) {
    275                     String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c);
    276                     example += c + ":" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "" + "?\tOR ";
    277                 }
    278                 example += " ";
    279             }
    280         }
    281         return start + "\t" + end + "\t" + (result == null ? "?" : result.toString()) + "\t" + example;
    282     }
    283 
    284     private String getRangeLine(Count count, PluralRules pluralRules, String pattern) {
    285         String sample = "?";
    286         FixedDecimalSamples exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER);
    287         if (exampleList == null) {
    288             exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL);
    289         }
    290         FixedDecimal sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList);
    291         sample = sampleDecimal.toString();
    292 
    293         String example = pattern == null ? "NO-SAMPLE!" : "" + pattern.replace("{0}", sample) + "";
    294         return count + "\t" + example;
    295     }
    296 
    297     public void TestPluralSamples() {
    298         String[][] test = { { "en", "ordinal", "1", "one" },
    299             { "en", "ordinal", "2", "two" },
    300             { "en", "ordinal", "3", "few" },
    301             { "en", "ordinal", "4", "other" },
    302             { "sl", "cardinal", "2", "two" }, };
    303         for (String[] row : test) {
    304             checkPluralSamples(row);
    305         }
    306     }
    307 
    308     public void TestPluralSamples2() {
    309         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
    310         for (String locale : prf.getLocales()) {
    311             if (locale.equals("und")) {
    312                 continue;
    313             }
    314             if (locale.equals("pl")) {
    315                 int debug = 0;
    316             }
    317             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale);
    318             for (PluralRules.PluralType type : PluralRules.PluralType.values()) {
    319                 PluralInfo rules = SUPPLEMENTAL.getPlurals(
    320                     SupplementalDataInfo.PluralType.fromStandardType(type),
    321                     locale.toString());
    322                 if (rules.getCounts().size() == 1) {
    323                     continue; // don't require rules for unary cases
    324                 }
    325                 Multimap<String, Count> sampleToCount = TreeMultimap.create();
    326 
    327                 for (Count count : rules.getCounts()) {
    328                     String sample = samplePatterns.get(type, count);
    329                     if (sample == null) {
    330                         errOrLog(CoverageIssue.error, locale + "\t" + type + " \tmissing samples for " + count, "cldrbug:7075",
    331                             "Missing ordinal minimal pairs");
    332                     } else {
    333                         sampleToCount.put(sample, count);
    334                         PluralRules pRules = rules.getPluralRules();
    335                         double unique = pRules.getUniqueKeywordValue(count
    336                             .toString());
    337                         if (unique == PluralRules.NO_UNIQUE_VALUE
    338                             && !sample.contains("{0}")) {
    339                             errln("Missing {0} in sample: " + locale + ", " + type + ", " + count + " " + sample + "");
    340                         }
    341                     }
    342                 }
    343                 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) {
    344                     if (entry.getValue().size() > 1) {
    345                         errln("Colliding minimal pair samples: " + locale + ", " + type + ", " + entry.getValue() + " " + entry.getKey() + "");
    346                     }
    347                 }
    348             }
    349         }
    350     }
    351 
    352     public void TestCldrScriptCodes() {
    353         Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes();
    354 
    355         Set<String> unicodeScripts = ScriptMetadata.getScripts();
    356         assertRelation("getCLDRScriptCodes contains Unicode Scripts", true, codes, CONTAINS_ALL, unicodeScripts);
    357 
    358         ImmutableSet<String> allSpecials = ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz");
    359         assertRelation("getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials);
    360 
    361         ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore");
    362         assertRelation("getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos);
    363 
    364         Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script);
    365         for (Entry<Status, Set<String>> e : scripts.entrySet()) {
    366             switch (e.getKey()) {
    367             case regular:
    368             case special:
    369             case unknown:
    370                 assertRelation("getCLDRScriptCodes contains " + e.getKey(), true, codes, CONTAINS_ALL, e.getValue());
    371                 break;
    372             default:
    373                 break; // do nothin
    374             }
    375         }
    376 
    377         ImmutableSet<String> variants = ImmutableSet.of("Aran", "Cyrs", "Geok", "Latf", "Latg", "Syre", "Syrj", "Syrn");
    378         assertRelation("getCLDRScriptCodes contains variants", false, codes, CONTAINS_SOME, variants);
    379     }
    380 
    381     public void checkPluralSamples(String... row) {
    382         PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(
    383             PluralType.valueOf(row[1]), row[0]);
    384         Count count = pluralInfo.getCount(new FixedDecimal(row[2]));
    385         assertEquals(CollectionUtilities.join(row, ", "),
    386             Count.valueOf(row[3]), count);
    387     }
    388 
    389     public void TestPluralLocales() {
    390         // get the unique rules
    391         for (PluralType type : PluralType.values()) {
    392             Relation<PluralInfo, String> pluralsToLocale = Relation.of(
    393                 new HashMap<PluralInfo, Set<String>>(), TreeSet.class);
    394             for (String locale : new TreeSet<String>(
    395                 SUPPLEMENTAL.getPluralLocales(type))) {
    396                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale);
    397                 pluralsToLocale.put(pluralInfo, locale);
    398             }
    399 
    400             String[][] equivalents = { { "mo", "ro" }, { "tl", "fil" },
    401                 { "he", "iw" }, { "in", "id" }, { "jw", "jv" },
    402                 { "ji", "yi" }, { "sh", "sr" }, };
    403             for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale
    404                 .keyValuesSet()) {
    405                 PluralInfo pluralInfo2 = pluralInfoEntry.getKey();
    406                 Set<String> locales = pluralInfoEntry.getValue();
    407                 // check that equivalent locales are either both in or both out
    408                 for (String[] row : equivalents) {
    409                     assertEquals(
    410                         type + " must be equivalent: " + Arrays.asList(row),
    411                         locales.contains(row[0]), locales.contains(row[1]));
    412                 }
    413                 // check that no rules contain 'within'
    414                 for (Count count : pluralInfo2.getCounts()) {
    415                     String rule = pluralInfo2.getRule(count);
    416                     if (rule == null) {
    417                         continue;
    418                     }
    419                     assertFalse(
    420                         "Rule '" + rule + "' for " + Arrays.asList(locales)
    421                             + " doesn't contain 'within'",
    422                         rule.contains("within"));
    423                 }
    424             }
    425         }
    426     }
    427 
    428     public void TestDigitPluralCases() {
    429         String[][] tests = {
    430             { "en", "one", "1", "1" },
    431             { "en", "one", "2", "" },
    432             { "en", "one", "3", "" },
    433             { "en", "one", "4", "" },
    434             { "en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, " },
    435             { "en", "other", "2", "10-99, 10.0, 10.1, 10.2, " },
    436             { "en", "other", "3", "100-999, 100.0, 100.1, 100.2, " },
    437             { "en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, " },
    438             { "hr", "one", "1", "1, 0.1, 2.10, 1.1, " },
    439             { "hr", "one", "2",
    440                 "21, 31, 41, 51, 61, 71, , 10.1, 12.10, 11.1, " },
    441             { "hr", "one", "3",
    442                 "101, 121, 131, 141, 151, 161, , 100.1, 102.10, 101.1, " },
    443             { "hr", "one", "4",
    444                 "1001, 1021, 1031, 1041, 1051, 1061, , 1000.1, 1002.10, 1001.1, " },
    445             { "hr", "few", "1", "2-4, 0.2, 0.3, 0.4, " },
    446             { "hr", "few", "2",
    447                 "22-24, 32-34, 42-44, , 10.2, 10.3, 10.4, " },
    448             { "hr", "few", "3",
    449                 "102-104, 122-124, 132-134, , 100.2, 100.3, 100.4, " },
    450             { "hr", "few", "4",
    451                 "1002-1004, 1022-1024, 1032-1034, , 1000.2, 1000.3, 1000.4, " },
    452             { "hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, " },
    453             { "hr", "other", "2",
    454                 "10-20, 25-30, 35-40, , 10.0, 10.5, 10.6, " },
    455             { "hr", "other", "3",
    456                 "100, 105-120, 125-130, 135-140, , 100.0, 100.5, 100.6, " },
    457             { "hr", "other", "4",
    458                 "1000, 1005-1020, 1025-1030, 1035-1040, , 1000.0, 1000.5, 1000.6, " }, };
    459         for (String[] row : tests) {
    460             PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
    461             SampleList uset = plurals.getSamples9999(Count.valueOf(row[1]),
    462                 Integer.parseInt(row[2]));
    463             assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3],
    464                 uset.toString());
    465         }
    466     }
    467 
    468     public void TestDigitPluralCompleteness() {
    469         String[][] exceptionStrings = {
    470             // defaults
    471             { "*", "zero", "0,00,000,0000" }, { "*", "one", "0" },
    472             { "*", "two", "0,00,000,0000" },
    473             { "*", "few", "0,00,000,0000" },
    474             { "*", "many", "0,00,000,0000" },
    475             { "*", "other", "0,00,000,0000" },
    476             // others
    477             { "mo", "other", "00,000,0000" }, //
    478             { "ro", "other", "00,000,0000" }, //
    479             { "cs", "few", "0" }, // j in 2..4
    480             { "sk", "few", "0" }, // j in 2..4
    481             { "da", "one", "0" }, // j is 1 or t is not 0 and n within 0..2
    482             { "is", "one", "0,00,000,0000" }, // j is 1 or f is 1
    483             { "sv", "one", "0" }, // j is 1
    484             { "he", "two", "0" }, // j is 2
    485             { "ru", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
    486             // is not 11
    487             { "uk", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
    488             // is not 11
    489             { "bs", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
    490             // is not 11 or f mod 10 is
    491             // 1 and f mod 100 is not 11
    492             { "hr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
    493             // is not 11 or f mod 10 is
    494             // 1 and f mod 100 is not 11
    495             { "sh", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
    496             // is not 11 or f mod 10 is
    497             // 1 and f mod 100 is not 11
    498             { "sr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
    499             // is not 11 or f mod 10 is
    500             // 1 and f mod 100 is not 11
    501             { "mk", "one", "0,00,000,0000" }, // j mod 10 is 1 or f mod 10
    502             // is 1
    503             { "sl", "one", "0,000,0000" }, // j mod 100 is 1
    504             { "sl", "two", "0,000,0000" }, // j mod 100 is 2
    505             { "he", "many", "00,000,0000" }, // j not in 0..10 and j mod 10
    506             // is 0
    507             { "tzm", "one", "0,00" }, // n in 0..1 or n in 11..99
    508             { "gd", "one", "0,00" }, // n in 1,11
    509             { "gd", "two", "0,00" }, // n in 2,12
    510             { "shi", "few", "0,00" }, // n in 2..10
    511             { "gd", "few", "0,00" }, // n in 3..10,13..19
    512             { "ga", "few", "0" }, // n in 3..6
    513             { "ga", "many", "0,00" }, // n in 7..10
    514             { "ar", "zero", "0" }, // n is 0
    515             { "cy", "zero", "0" }, // n is 0
    516             { "ksh", "zero", "0" }, // n is 0
    517             { "lag", "zero", "0" }, // n is 0
    518             { "pt", "one", "0" }, // i = 1 and v = 0 or i = 0 and t = 1
    519             { "pt_PT", "one", "0" }, // n = 1 and v = 0
    520             { "ar", "two", "0" }, // n is 2
    521             { "cy", "two", "0" }, // n is 2
    522             { "ga", "two", "0" }, // n is 2
    523             { "iu", "two", "0" }, // n is 2
    524             { "kw", "two", "0" }, // n is 2
    525             { "naq", "two", "0" }, // n is 2
    526             { "se", "two", "0" }, // n is 2
    527             { "sma", "two", "0" }, // n is 2
    528             { "smi", "two", "0" }, // n is 2
    529             { "smj", "two", "0" }, // n is 2
    530             { "smn", "two", "0" }, // n is 2
    531             { "sms", "two", "0" }, // n is 2
    532             { "cy", "few", "0" }, // n is 3
    533             { "cy", "many", "0" }, // n is 6
    534             { "br", "many", "" }, // n is not 0 and n mod 1000000 is 0
    535             { "gv", "one", "0,00,000,0000" }, // n mod 10 is 1
    536             { "be", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
    537             // is not 11
    538             { "lv", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
    539             // is not 11 or v is 2 and f
    540             // mod 10 is 1 and f mod 100
    541             // is not 11 or v is not 2
    542             // and f mod 10 is 1
    543             { "br", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
    544             // not in 11,71,91
    545             { "lt", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
    546             // not in 11..19
    547             { "fil", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v =
    548             // 0 and i % 10 != 4,6,9 or
    549             // v != 0 and f % 10 !=
    550             // 4,6,9
    551             { "tl", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v =
    552             // 0 and i % 10 != 4,6,9 or
    553             // v != 0 and f % 10 !=
    554             // 4,6,9
    555             { "dsb", "one", "0,00,000,0000" }, // v = 0 and i % 100 = 1 or f
    556             // % 100 = 1
    557         };
    558         // parse out the exceptions
    559         Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<PluralInfo, Relation<Count, Integer>>();
    560         Relation<Count, Integer> fallback = Relation.of(
    561             new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class);
    562         for (String[] row : exceptionStrings) {
    563             Relation<Count, Integer> countToDigits;
    564             if (row[0].equals("*")) {
    565                 countToDigits = fallback;
    566             } else {
    567                 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
    568                 countToDigits = exceptions.get(plurals);
    569                 if (countToDigits == null) {
    570                     exceptions.put(
    571                         plurals,
    572                         countToDigits = Relation.of(
    573                             new EnumMap<Count, Set<Integer>>(
    574                                 Count.class),
    575                             TreeSet.class));
    576                 }
    577             }
    578             Count c = Count.valueOf(row[1]);
    579             for (String digit : row[2].split(",")) {
    580                 // "99" is special, just to have the result be non-empty
    581                 countToDigits.put(c, digit.length());
    582             }
    583         }
    584         Set<PluralInfo> seen = new HashSet<PluralInfo>();
    585         Set<String> sorted = new TreeSet<String>(
    586             SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
    587         Relation<String, String> ruleToExceptions = Relation.of(
    588             new TreeMap<String, Set<String>>(), TreeSet.class);
    589 
    590         for (String locale : sorted) {
    591             PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale);
    592             if (seen.contains(plurals)) { // skip identicals
    593                 continue;
    594             }
    595             Relation<Count, Integer> countToDigits = exceptions.get(plurals);
    596             if (countToDigits == null) {
    597                 countToDigits = fallback;
    598             }
    599             for (Count c : plurals.getCounts()) {
    600                 List<String> compose = new ArrayList<String>();
    601                 boolean needLine = false;
    602                 Set<Integer> digitSet = countToDigits.get(c);
    603                 if (digitSet == null) {
    604                     digitSet = fallback.get(c);
    605                 }
    606                 for (int digits = 1; digits < 5; ++digits) {
    607                     boolean expected = digitSet.contains(digits);
    608                     boolean hasSamples = plurals.hasSamples(c, digits);
    609                     if (hasSamples) {
    610                         compose.add(Utility.repeat("0", digits));
    611                     }
    612                     if (!assertEquals(locale + ", " + digits + ", " + c,
    613                         expected, hasSamples)) {
    614                         needLine = true;
    615                     }
    616                 }
    617                 if (needLine) {
    618                     String countRules = plurals.getPluralRules().getRules(
    619                         c.toString());
    620                     ruleToExceptions.put(countRules == null ? "" : countRules,
    621                         "{\"" + locale + "\", \"" + c + "\", \""
    622                             + CollectionUtilities.join(compose, ",")
    623                             + "\"},");
    624                 }
    625             }
    626         }
    627         if (!ruleToExceptions.isEmpty()) {
    628             System.out
    629                 .println("To fix the above, review the following, then replace in TestDigitPluralCompleteness");
    630             for (Entry<String, String> entry : ruleToExceptions.entrySet()) {
    631                 System.out.println(entry.getValue() + "\t// " + entry.getKey());
    632             }
    633         }
    634     }
    635 
    636     public void TestLikelyCode() {
    637         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
    638         String[][] tests = { { "it_AQ", "it_Latn_AQ" },
    639             { "it_Arab", "it_Arab_IT" }, { "az_Cyrl", "az_Cyrl_AZ" }, };
    640         for (String[] pair : tests) {
    641             String newMax = LikelySubtags.maximize(pair[0], likely);
    642             assertEquals("Likely", pair[1], newMax);
    643         }
    644 
    645     }
    646 
    647     public void TestLikelySubtagCompleteness() {
    648         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
    649 
    650         for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) {
    651             if (!likely.containsKey(language)) {
    652                 logln("WARNING: No likely subtag for CLDR language code ("
    653                     + language + ")");
    654             }
    655         }
    656         for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) {
    657             if (!likely.containsKey("und_" + script)
    658                 && !script.equals("Latn")
    659                 && !script.equals("Zinh")
    660                 && !script.equals("Zyyy")
    661                 && ScriptMetadata.getInfo(script) != null
    662                 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION
    663                 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) {
    664                 errln("No likely subtag for CLDR script code (und_" + script
    665                     + ")");
    666             }
    667         }
    668 
    669     }
    670 
    671     public void TestEquivalentLocales() {
    672         Set<Set<String>> seen = new HashSet<Set<String>>();
    673         Set<String> toTest = new TreeSet<String>(testInfo.getCldrFactory()
    674             .getAvailable());
    675         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet());
    676         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values());
    677         toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales());
    678         LanguageTagParser ltp = new LanguageTagParser();
    679         main: for (String locale : toTest) {
    680             if (locale.startsWith("und") || locale.equals("root")) {
    681                 continue;
    682             }
    683             Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale);
    684             if (seen.contains(s)) {
    685                 continue;
    686             }
    687             // System.out.println(s + " => " + VettingViewer.gatherCodes(s));
    688 
    689             List<String> ss = new ArrayList<String>(s);
    690             String last = ss.get(ss.size() - 1);
    691             ltp.set(last);
    692             if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) {
    693                 continue; // skip variants for now.
    694             }
    695             String language = ltp.getLanguage();
    696             String script = ltp.getScript();
    697             String region = ltp.getRegion();
    698             if (!script.isEmpty() && !region.isEmpty()) {
    699                 String noScript = ltp.setScript("").toString();
    700                 String noRegion = ltp.setScript(script).setRegion("")
    701                     .toString();
    702                 switch (s.size()) {
    703                 case 1: // ok if already maximized and strange script/country,
    704                     // eg it_Arab_JA
    705                     continue main;
    706                 case 2: // ok if adds default country/script, eg {en_Cyrl,
    707                     // en_Cyrl_US} or {en_GB, en_Latn_GB}
    708                     String first = ss.get(0);
    709                     if (first.equals(noScript) || first.equals(noRegion)) {
    710                         continue main;
    711                     }
    712                     break;
    713                 case 3: // ok if different script in different country, eg
    714                     // {az_IR, az_Arab, az_Arab_IR}
    715                     if (noScript.equals(ss.get(0))
    716                         && noRegion.equals(ss.get(1))) {
    717                         continue main;
    718                     }
    719                     break;
    720                 case 4: // ok if all combinations, eg {en, en_US, en_Latn,
    721                     // en_Latn_US}
    722                     if (language.equals(ss.get(0))
    723                         && noScript.equals(ss.get(1))
    724                         && noRegion.equals(ss.get(2))) {
    725                         continue main;
    726                     }
    727                     break;
    728                 }
    729             }
    730             errln("Strange size or composition:\t" + s + " \t"
    731                 + showLocaleParts(s));
    732             seen.add(s);
    733         }
    734     }
    735 
    736     private String showLocaleParts(Set<String> s) {
    737         LanguageTagParser ltp = new LanguageTagParser();
    738         Set<String> b = new LinkedHashSet<String>();
    739         for (String ss : s) {
    740             ltp.set(ss);
    741             addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b);
    742             addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b);
    743             addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b);
    744         }
    745         return CollectionUtilities.join(b, "; ");
    746     }
    747 
    748     private void addName(int languageName, String code, Set<String> b) {
    749         if (code.isEmpty()) {
    750             return;
    751         }
    752         String name = testInfo.getEnglish().getName(languageName, code);
    753         if (!code.equals(name)) {
    754             b.add(code + "=" + name);
    755         }
    756     }
    757 
    758     public void TestDefaultScriptCompleteness() {
    759         Relation<String, String> scriptToBase = Relation.of(
    760             new LinkedHashMap<String, Set<String>>(), TreeSet.class);
    761         main: for (String locale : testInfo.getCldrFactory()
    762             .getAvailableLanguages()) {
    763             if (!locale.contains("_") && !"root".equals(locale)) {
    764                 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale);
    765                 if (defaultScript != null) {
    766                     continue;
    767                 }
    768                 CLDRFile cldrFile = testInfo.getCLDRFile(locale,
    769                     false);
    770                 UnicodeSet set = cldrFile.getExemplarSet("",
    771                     WinningChoice.NORMAL);
    772                 for (String s : set) {
    773                     int script = UScript.getScript(s.codePointAt(0));
    774                     if (script != UScript.UNKNOWN && script != UScript.COMMON
    775                         && script != UScript.INHERITED) {
    776                         scriptToBase.put(UScript.getShortName(script), locale);
    777                         continue main;
    778                     }
    779                 }
    780                 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale);
    781             }
    782         }
    783         if (scriptToBase.size() != 0) {
    784             for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) {
    785                 errln("Default Scripts missing:\t" + entry.getKey() + "\t"
    786                     + entry.getValue());
    787             }
    788         }
    789     }
    790 
    791     public void TestTimeData() {
    792         Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL
    793             .getTimeData();
    794         Set<String> regionsSoFar = new HashSet<String>();
    795         Set<String> current24only = new HashSet<String>();
    796         Set<String> current12preferred = new HashSet<String>();
    797 
    798         boolean haveWorld = false;
    799 
    800         ImmutableSet<HourStyle> oldSchool = ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k));
    801 
    802         for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) {
    803             String region = e.getKey();
    804             if (region.equals("001")) {
    805                 haveWorld = true;
    806             }
    807             regionsSoFar.add(region);
    808             PreferredAndAllowedHour preferredAndAllowedHour = e.getValue();
    809             assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred);
    810 
    811             // find first h or H
    812             HourStyle found = null;
    813 
    814             for (HourStyle item : preferredAndAllowedHour.allowed) {
    815                 if (oldSchool.contains(item)) {
    816                     found = item;
    817                     if (item != preferredAndAllowedHour.preferred) {
    818                         String message = "Inconsistent values for " + region + ": preferred=" + preferredAndAllowedHour.preferred
    819                             + " but that isn't the first " + oldSchool + " in allowed: " + preferredAndAllowedHour.allowed;
    820                         if (!logKnownIssue("cldrbug:11448", message)) {
    821                             errln(message);
    822                         }
    823                     }
    824                     break;
    825                 }
    826             }
    827             if (found == null) {
    828                 errln(region + ": preferred " + preferredAndAllowedHour.preferred
    829                     + " not in " + preferredAndAllowedHour.allowed);
    830             }
    831 //            final HourStyle firstAllowed = preferredAndAllowedHour.allowed.iterator().next();
    832 //            if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.h
    833 //                || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.hb
    834 //                || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed == HourStyle.H) {
    835 //                errln(region + ": allowed " + preferredAndAllowedHour.allowed
    836 //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
    837 //            } else if (isVerbose()) {
    838 //                logln(region + ": allowed " + preferredAndAllowedHour.allowed
    839 //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
    840 //            }
    841             // for (HourStyle c : preferredAndAllowedHour.allowed) {
    842             // if (!PreferredAndAllowedHour.HOURS.contains(c)) {
    843             // errln(region + ": illegal character in " +
    844             // preferredAndAllowedHour.allowed + ". It contains " + c
    845             // + " which is not in " + PreferredAndAllowedHour.HOURS);
    846             // }
    847             // }
    848             if (!preferredAndAllowedHour.allowed.contains(HourStyle.h)
    849                 && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) {
    850                 current24only.add(region);
    851             }
    852             if (preferredAndAllowedHour.preferred == HourStyle.h) {
    853                 current12preferred.add(region);
    854             }
    855         }
    856         Set<String> missing = new TreeSet<String>(
    857             STANDARD_CODES.getGoodAvailableCodes(CodeType.territory));
    858         missing.removeAll(regionsSoFar);
    859         for (Iterator<String> it = missing.iterator(); it.hasNext();) {
    860             if (!StandardCodes.isCountry(it.next())) {
    861                 it.remove();
    862             }
    863         }
    864 
    865         // if we don't have 001, then we can't miss any regions
    866         if (!missing.isEmpty()) {
    867             if (haveWorld) {
    868                 logln("Implicit regions: " + missing);
    869             } else {
    870                 errln("Missing regions: " + missing);
    871             }
    872         }
    873 
    874         // The feedback gathered from our translators is that the following use
    875         // 24 hour time ONLY:
    876         Set<String> only24lang = new TreeSet<String>(
    877             Arrays.asList(("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, "
    878                 + "fr, gl, he, is, id, it, nb, pt, ro, ru, sr, sk, sl, sv, tr, hy")
    879                     .split(",\\s*")));
    880         // With the new preferences, this is changed
    881         Set<String> only24region = new TreeSet<String>();
    882         Set<String> either24or12region = new TreeSet<String>();
    883 
    884         // get all countries where official or de-facto official
    885         // add them two one of two lists, based on the above list of languages
    886         for (String language : SUPPLEMENTAL
    887             .getLanguagesForTerritoriesPopulationData()) {
    888             boolean a24lang = only24lang.contains(language);
    889             for (String region : SUPPLEMENTAL
    890                 .getTerritoriesForPopulationData(language)) {
    891                 PopulationData pop = SUPPLEMENTAL
    892                     .getLanguageAndTerritoryPopulationData(language, region);
    893                 if (pop.getOfficialStatus().compareTo(
    894                     OfficialStatus.de_facto_official) < 0) {
    895                     continue;
    896                 }
    897                 if (a24lang) {
    898                     only24region.add(region);
    899                 } else {
    900                     either24or12region.add(region);
    901                 }
    902             }
    903         }
    904         // if we have a case like CA, where en uses 12/24 but fr uses 24, remove
    905         // it for safety
    906         only24region.removeAll(either24or12region);
    907         // There are always exceptions... Remove VA (Vatican), since it allows 12/24
    908         // but the de facto langauge is Italian.
    909         only24region.remove("VA");
    910         // also remove all the regions where 'h' is preferred
    911         only24region.removeAll(current12preferred);
    912         // now verify
    913         if (!current24only.containsAll(only24region)) {
    914             Set<String> missing24only = new TreeSet<String>(only24region);
    915             missing24only.removeAll(current24only);
    916 
    917             errln("24-hour-only doesn't include needed items:\n"
    918                 + " add "
    919                 + CldrUtility.join(missing24only, " ")
    920                 + "\n\t\t"
    921                 + CldrUtility.join(missing24only, "\n\t\t",
    922                     new NameCodeTransform(testInfo.getEnglish(),
    923                         CLDRFile.TERRITORY_NAME)));
    924         }
    925     }
    926 
    927     public static class NameCodeTransform implements StringTransform {
    928         private final CLDRFile file;
    929         private final int codeType;
    930 
    931         public NameCodeTransform(CLDRFile file, int code) {
    932             this.file = file;
    933             this.codeType = code;
    934         }
    935 
    936         @Override
    937         public String transform(String code) {
    938             return file.getName(codeType, code) + " [" + code + "]";
    939         }
    940     }
    941 
    942     public void TestAliases() {
    943         testInfo.getStandardCodes();
    944         Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes
    945             .getLStreg();
    946         Map<String, Map<String, R2<List<String>, String>>> aliases = SUPPLEMENTAL
    947             .getLocaleAliasInfo();
    948 
    949         for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases
    950             .entrySet()) {
    951             String type = typeMap.getKey();
    952             Map<String, R2<List<String>, String>> codeReplacement = typeMap
    953                 .getValue();
    954 
    955             Map<String, Map<String, String>> bcp47DataTypeData = bcp47Data
    956                 .get(type.equals("territory") ? "region" : type);
    957             if (bcp47DataTypeData == null) {
    958                 logln("skipping BCP47 test for " + type);
    959             } else {
    960                 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData
    961                     .entrySet()) {
    962                     String code = codeData.getKey();
    963                     if (codeReplacement.containsKey(code)
    964                         || codeReplacement.containsKey(code
    965                             .toUpperCase(Locale.ENGLISH))) {
    966                         continue;
    967                         // TODO, check the value
    968                     }
    969                     Map<String, String> data = codeData.getValue();
    970                     if (data.containsKey("Deprecated")
    971                         && SUPPLEMENTAL.getCLDRLanguageCodes().contains(
    972                             code)) {
    973                         errln("supplementalMetadata.xml: alias is missing <languageAlias type=\""
    974                             + code + "\" ... /> " + "\t" + data);
    975                     }
    976                 }
    977             }
    978 
    979             Set<R3<String, List<String>, List<String>>> failures = new TreeSet<R3<String, List<String>, List<String>>>();
    980             Set<String> nullReplacements = new TreeSet<String>();
    981             for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement
    982                 .entrySet()) {
    983                 String code = codeRep.getKey();
    984                 List<String> replacements = codeRep.getValue().get0();
    985                 if (replacements == null) {
    986                     nullReplacements.add(code);
    987                     continue;
    988                 }
    989                 Set<String> fixedReplacements = new LinkedHashSet<String>();
    990                 for (String replacement : replacements) {
    991                     R2<List<String>, String> newReplacement = codeReplacement
    992                         .get(replacement);
    993                     if (newReplacement != null) {
    994                         List<String> list = newReplacement.get0();
    995                         if (list != null) {
    996                             fixedReplacements.addAll(list);
    997                         }
    998                     } else {
    999                         fixedReplacements.add(replacement);
   1000                     }
   1001                 }
   1002                 List<String> fixedList = new ArrayList<String>(
   1003                     fixedReplacements);
   1004                 if (!replacements.equals(fixedList)) {
   1005                     R3<String, List<String>, List<String>> row = Row.of(code,
   1006                         replacements, fixedList);
   1007                     System.out.println(row.toString());
   1008                     failures.add(row);
   1009                 }
   1010             }
   1011 
   1012             if (failures.size() != 0) {
   1013                 for (R3<String, List<String>, List<String>> item : failures) {
   1014                     String code = item.get0();
   1015                     List<String> oldReplacement = item.get1();
   1016                     List<String> newReplacement = item.get2();
   1017 
   1018                     errln(code + "\t=>\t" + oldReplacement + "\tshould be:\n\t"
   1019                         + "<" + type + "Alias type=\"" + code
   1020                         + "\" replacement=\""
   1021                         + CollectionUtilities.join(newReplacement, " ")
   1022                         + "\" reason=\"XXX\"/> <!-- YYY -->\n");
   1023                 }
   1024             }
   1025             if (nullReplacements.size() != 0) {
   1026                 logln("No Replacements\t" + type + "\t" + nullReplacements);
   1027             }
   1028         }
   1029     }
   1030 
   1031     static final List<String> oldRegions = Arrays
   1032         .asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU"
   1033             .split(", "));
   1034 
   1035     public void TestTerritoryContainment() {
   1036         Relation<String, String> map = SUPPLEMENTAL
   1037             .getTerritoryToContained(ContainmentStyle.all);
   1038         Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore();
   1039         Set<String> mapItems = new LinkedHashSet<String>();
   1040         // get all the items
   1041         for (String item : map.keySet()) {
   1042             mapItems.add(item);
   1043             mapItems.addAll(map.getAll(item));
   1044         }
   1045         Map<String, Map<String, String>> bcp47RegionData = StandardCodes
   1046             .getLStreg().get("region");
   1047 
   1048         // verify that all regions are covered
   1049         Set<String> bcp47Regions = new LinkedHashSet<String>(
   1050             bcp47RegionData.keySet());
   1051         bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the
   1052         // unknown region...
   1053         for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext();) {
   1054             String region = it.next();
   1055             Map<String, String> data = bcp47RegionData.get(region);
   1056             if (data.containsKey("Deprecated")) {
   1057                 logln("Removing deprecated " + region);
   1058                 it.remove();
   1059             }
   1060             if ("Private use".equals(data.get("Description"))) {
   1061                 it.remove();
   1062             }
   1063         }
   1064 
   1065         if (!mapItems.equals(bcp47Regions)) {
   1066             mapItems.removeAll(oldRegions);
   1067             errlnDiff("containment items not in bcp47 regions: ", mapItems,
   1068                 bcp47Regions);
   1069             errlnDiff("bcp47 regions not in containment items: ", bcp47Regions,
   1070                 mapItems);
   1071         }
   1072 
   1073         // verify that everything in the containment core can be reached
   1074         // downwards from 001.
   1075 
   1076         Map<String, Integer> from001 = getRecursiveContainment("001", map,
   1077             new LinkedHashMap<String, Integer>(), 1);
   1078         from001.put("001", 0);
   1079         Set<String> keySet = from001.keySet();
   1080         for (String region : keySet) {
   1081             logln(Utility.repeat("\t", from001.get(region)) + "\t" + region
   1082                 + "\t" + getRegionName(region));
   1083         }
   1084 
   1085         // Populate mapItems with the core containment
   1086         mapItems.clear();
   1087         for (String item : mapCore.keySet()) {
   1088             mapItems.add(item);
   1089             mapItems.addAll(mapCore.getAll(item));
   1090         }
   1091 
   1092         if (!mapItems.equals(keySet)) {
   1093             errlnDiff(
   1094                 "containment core items that can't be reached from 001: ",
   1095                 mapItems, keySet);
   1096         }
   1097     }
   1098 
   1099     private void errlnDiff(String title, Set<String> mapItems,
   1100         Set<String> keySet) {
   1101         Set<String> diff = new LinkedHashSet<String>(mapItems);
   1102         diff.removeAll(keySet);
   1103         if (diff.size() != 0) {
   1104             errln(title + diff);
   1105         }
   1106     }
   1107 
   1108     private String getRegionName(String region) {
   1109         return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region);
   1110     }
   1111 
   1112     private Map<String, Integer> getRecursiveContainment(String region,
   1113         Relation<String, String> map, Map<String, Integer> result, int depth) {
   1114         Set<String> contained = map.getAll(region);
   1115         if (contained == null) {
   1116             return result;
   1117         }
   1118         for (String item : contained) {
   1119             if (result.containsKey(item)) {
   1120                 logln("Duplicate containment " + item + "\t"
   1121                     + getRegionName(item));
   1122                 continue;
   1123             }
   1124             result.put(item, depth);
   1125             getRecursiveContainment(item, map, result, depth + 1);
   1126         }
   1127         return result;
   1128     }
   1129 
   1130     public void TestMacrolanguages() {
   1131         Set<String> languageCodes = STANDARD_CODES
   1132             .getAvailableCodes("language");
   1133         Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement = SUPPLEMENTAL
   1134             .getLocaleAliasInfo();
   1135         Map<String, R2<List<String>, String>> tagToReplacement = typeToTagToReplacement
   1136             .get("language");
   1137 
   1138         Relation<String, String> replacementToReplaced = Relation.of(
   1139             new TreeMap<String, Set<String>>(), TreeSet.class);
   1140         for (String language : tagToReplacement.keySet()) {
   1141             List<String> replacements = tagToReplacement.get(language).get0();
   1142             if (replacements != null) {
   1143                 replacementToReplaced.putAll(replacements, language);
   1144             }
   1145         }
   1146         replacementToReplaced.freeze();
   1147 
   1148         Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes
   1149             .getLStreg();
   1150         Map<String, Map<String, String>> lstregLanguageInfo = lstreg
   1151             .get("language");
   1152 
   1153         Relation<Scope, String> scopeToCodes = Relation.of(
   1154             new TreeMap<Scope, Set<String>>(), TreeSet.class);
   1155         // the invariant is that every macrolanguage has exactly 1 encompassed
   1156         // language that maps to it
   1157 
   1158         main: for (String language : Builder.with(new TreeSet<String>())
   1159             .addAll(languageCodes).addAll(Iso639Data.getAvailable()).get()) {
   1160             if (language.equals("no") || language.equals("sh"))
   1161                 continue; // special cases
   1162             Scope languageScope = getScope(language, lstregLanguageInfo);
   1163             if (languageScope == Scope.Macrolanguage) {
   1164                 if (Iso639Data.getHeirarchy(language) != null) {
   1165                     continue main; // is real family
   1166                 }
   1167                 Set<String> replacements = replacementToReplaced
   1168                     .getAll(language);
   1169                 if (replacements == null || replacements.size() == 0) {
   1170                     scopeToCodes.put(languageScope, language);
   1171                 } else {
   1172                     // it still might be bad, if we don't have a mapping to a
   1173                     // regular language
   1174                     for (String replacement : replacements) {
   1175                         Scope replacementScope = getScope(replacement,
   1176                             lstregLanguageInfo);
   1177                         if (replacementScope == Scope.Individual) {
   1178                             continue main;
   1179                         }
   1180                     }
   1181                     scopeToCodes.put(languageScope, language);
   1182                 }
   1183             }
   1184         }
   1185         // now show the items we found
   1186         for (Scope scope : scopeToCodes.keySet()) {
   1187             for (String language : scopeToCodes.getAll(scope)) {
   1188                 String name = testInfo.getEnglish().getName(language);
   1189                 if (name == null || name.equals(language)) {
   1190                     Set<String> set = Iso639Data.getNames(language);
   1191                     if (set != null) {
   1192                         name = set.iterator().next();
   1193                     } else {
   1194                         Map<String, String> languageInfo = lstregLanguageInfo
   1195                             .get(language);
   1196                         if (languageInfo != null) {
   1197                             name = languageInfo.get("Description");
   1198                         }
   1199                     }
   1200                 }
   1201                 errln(scope + "\t" + language + "\t" + name + "\t"
   1202                     + Iso639Data.getType(language));
   1203             }
   1204         }
   1205     }
   1206 
   1207     private Scope getScope(String language,
   1208         Map<String, Map<String, String>> lstregLanguageInfo) {
   1209         Scope languageScope = Iso639Data.getScope(language);
   1210         Map<String, String> languageInfo = lstregLanguageInfo.get(language);
   1211         if (languageInfo == null) {
   1212             // System.out.println("Couldn't get lstreg info for " + language);
   1213         } else {
   1214             String lstregScope = languageInfo.get("Scope");
   1215             if (lstregScope != null) {
   1216                 Scope scope2 = Scope.fromString(lstregScope);
   1217                 if (languageScope != scope2) {
   1218                     // System.out.println("Mismatch in scope between LSTR and ISO 639:\t"
   1219                     // + scope2 + "\t" +
   1220                     // languageScope);
   1221                     languageScope = scope2;
   1222                 }
   1223             }
   1224         }
   1225         return languageScope;
   1226     }
   1227 
   1228     static final boolean LOCALES_FIXED = true;
   1229 
   1230     public void TestPopulation() {
   1231         Set<String> languages = SUPPLEMENTAL
   1232             .getLanguagesForTerritoriesPopulationData();
   1233         Relation<String, String> baseToLanguages = Relation.of(
   1234             new TreeMap<String, Set<String>>(), TreeSet.class);
   1235         LanguageTagParser ltp = new LanguageTagParser();
   1236         LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false);
   1237 
   1238         for (String language : languages) {
   1239             if (LOCALES_FIXED) {
   1240                 String canonicalForm = ltc.transform(language);
   1241                 if (!assertEquals("Canonical form", canonicalForm, language)) {
   1242                     int debug = 0;
   1243                 }
   1244             }
   1245 
   1246             String base = ltp.set(language).getLanguage();
   1247             String script = ltp.getScript();
   1248             baseToLanguages.put(base, language);
   1249 
   1250             // add basic data, basically just for wo!
   1251             // if there are primary scripts, they must include script (if not
   1252             // empty)
   1253             Set<String> primaryScripts = Collections.emptySet();
   1254             Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL
   1255                 .getBasicLanguageDataMap(base);
   1256             if (basicData != null) {
   1257                 BasicLanguageData s = basicData
   1258                     .get(BasicLanguageData.Type.primary);
   1259                 if (s != null) {
   1260                     primaryScripts = s.getScripts();
   1261                 }
   1262             }
   1263 
   1264             // do some consistency tests; if there is a script, it must be in
   1265             // primaryScripts
   1266             if (!script.isEmpty() && !primaryScripts.contains(script)) {
   1267                 errln(base + ": Script found in territory data (" + script
   1268                     + ") is not in primary scripts :\t" + primaryScripts);
   1269             }
   1270 
   1271             // if there are multiple primary scripts, they will be in
   1272             // baseToLanguages
   1273             if (primaryScripts.size() > 1) {
   1274                 for (String script2 : primaryScripts) {
   1275                     baseToLanguages.put(base, base + "_" + script2);
   1276                 }
   1277             }
   1278         }
   1279 
   1280         if (!LOCALES_FIXED) {
   1281             // the invariants are that if we have a base, we must not have a script.
   1282             // and if we don't have a base, we must have two items
   1283             for (String base : baseToLanguages.keySet()) {
   1284                 Set<String> languagesForBase = baseToLanguages.getAll(base);
   1285                 if (languagesForBase.contains(base)) {
   1286                     if (languagesForBase.size() > 1) {
   1287                         errln("Cannot have base alone with other scripts:\t"
   1288                             + languagesForBase);
   1289                     }
   1290                 } else {
   1291                     if (languagesForBase.size() == 1) {
   1292                         errln("Cannot have only one script for language:\t"
   1293                             + languagesForBase);
   1294                     }
   1295                 }
   1296             }
   1297         }
   1298     }
   1299 
   1300     public void TestCompleteness() {
   1301         if (SUPPLEMENTAL.getSkippedElements().size() > 0) {
   1302             logln("SupplementalDataInfo API doesn't support: "
   1303                 + SUPPLEMENTAL.getSkippedElements().toString());
   1304         }
   1305     }
   1306 
   1307     // these are settings for exceptional cases we want to allow
   1308     private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW = new TreeSet<String>(
   1309         Arrays.asList("ILS", "NZD", "PGK", "TWD"));
   1310 
   1311     // ok since there is no problem with confusion
   1312     private static final Set<String> OK_TO_NOT_HAVE_OLD = new TreeSet<String>(
   1313         Arrays.asList("ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM",
   1314             "FRF", "GRD", "IEP", "ITL", "LUF", "MTL", "MTP", "NLG",
   1315             "PTE", "YUM", "ARA", "BAD", "BGL", "BOP", "BRC", "BRN",
   1316             "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE", "HRD",
   1317             "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI",
   1318             "PES", "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD",
   1319             "YUN", "ZRZ", "GWE"));
   1320 
   1321     private static final Date LIMIT_FOR_NEW_CURRENCY = new Date(
   1322         new Date().getYear() - 5, 1, 1);
   1323     private static final Date NOW = new Date();
   1324     private Matcher oldMatcher = Pattern.compile(
   1325         "\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE)
   1326         .matcher("");
   1327     private Matcher newMatcher = Pattern.compile("\\bnew\\b",
   1328         Pattern.CASE_INSENSITIVE).matcher("");
   1329 
   1330     /**
   1331      * Test that access to currency info in supplemental data is ok. At this
   1332      * point just a simple test.
   1333      *
   1334      * @param args
   1335      */
   1336     public void TestCurrency() {
   1337         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
   1338         Set<String> currencyCodes = STANDARD_CODES
   1339             .getGoodAvailableCodes("currency");
   1340         Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes = Relation
   1341             .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(),
   1342                 TreeSet.class);
   1343         Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes = Relation
   1344             .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(),
   1345                 TreeSet.class);
   1346         Set<String> territoriesWithoutModernCurrencies = new TreeSet<String>(
   1347             STANDARD_CODES.getGoodAvailableCodes("territory"));
   1348         Map<String, Date> currencyFirstValid = new TreeMap<String, Date>();
   1349         Map<String, Date> currencyLastValid = new TreeMap<String, Date>();
   1350         territoriesWithoutModernCurrencies.remove("ZZ");
   1351 
   1352         for (String territory : STANDARD_CODES
   1353             .getGoodAvailableCodes("territory")) {
   1354             /* "EU" behaves like a country for purposes of this test */
   1355             if ((SUPPLEMENTAL.getContained(territory) != null)
   1356                 && !territory.equals("EU")) {
   1357                 territoriesWithoutModernCurrencies.remove(territory);
   1358                 continue;
   1359             }
   1360             Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL
   1361                 .getCurrencyDateInfo(territory);
   1362             if (currencyInfo == null) {
   1363                 continue; // error, but will pick up below.
   1364             }
   1365             for (CurrencyDateInfo dateInfo : currencyInfo) {
   1366                 final String currency = dateInfo.getCurrency();
   1367                 final Date start = dateInfo.getStart();
   1368                 final Date end = dateInfo.getEnd();
   1369                 if (dateInfo.getErrors().length() != 0) {
   1370                     logln("parsing " + territory + "\t" + dateInfo.toString()
   1371                         + "\t" + dateInfo.getErrors());
   1372                 }
   1373                 Date firstValue = currencyFirstValid.get(currency);
   1374                 if (firstValue == null || firstValue.compareTo(start) < 0) {
   1375                     currencyFirstValid.put(currency, start);
   1376                 }
   1377                 Date lastValue = currencyLastValid.get(currency);
   1378                 if (lastValue == null || lastValue.compareTo(end) > 0) {
   1379                     currencyLastValid.put(currency, end);
   1380                 }
   1381                 if (start.compareTo(NOW) < 0 && end.compareTo(NOW) >= 0) { // Non-tender
   1382                     // is
   1383                     // OK...
   1384                     modernCurrencyCodes.put(currency,
   1385                         new Pair<String, CurrencyDateInfo>(territory,
   1386                             dateInfo));
   1387                     territoriesWithoutModernCurrencies.remove(territory);
   1388                 } else {
   1389                     nonModernCurrencyCodes.put(currency,
   1390                         new Pair<String, CurrencyDateInfo>(territory,
   1391                             dateInfo));
   1392                 }
   1393                 logln(territory
   1394                     + "\t"
   1395                     + dateInfo.toString()
   1396                     + "\t"
   1397                     + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME,
   1398                         currency));
   1399             }
   1400         }
   1401         // fix up
   1402         nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet());
   1403         Relation<String, String> isoCurrenciesToCountries = Relation.of(
   1404             new TreeMap<String, Set<String>>(), TreeSet.class)
   1405             .addAllInverted(isoCodes.getCountryToCodes());
   1406         // now print error messages
   1407         logln("Modern Codes: " + modernCurrencyCodes.size() + "\t"
   1408             + modernCurrencyCodes);
   1409         Set<String> missing = new TreeSet<String>(
   1410             isoCurrenciesToCountries.keySet());
   1411         missing.removeAll(modernCurrencyCodes.keySet());
   1412         if (missing.size() != 0) {
   1413             errln("Missing codes compared to ISO: " + missing.toString());
   1414         }
   1415 
   1416         for (String currency : modernCurrencyCodes.keySet()) {
   1417             Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes
   1418                 .getAll(currency);
   1419             final String name = testInfo.getEnglish().getName(
   1420                 CLDRFile.CURRENCY_NAME, currency);
   1421 
   1422             Set<String> isoCountries = isoCurrenciesToCountries
   1423                 .getAll(currency);
   1424             if (isoCountries == null) {
   1425                 isoCountries = new TreeSet<String>();
   1426             }
   1427 
   1428             TreeSet<String> cldrCountries = new TreeSet<String>();
   1429             for (Pair<String, CurrencyDateInfo> x : data) {
   1430                 cldrCountries.add(x.getFirst());
   1431             }
   1432             if (!isoCountries.equals(cldrCountries)) {
   1433                 if (!logKnownIssue("cldrbug:10765", "Missing codes compared to ISO: " + missing.toString())) {
   1434 
   1435                     errln("Mismatch between ISO and Cldr modern currencies for "
   1436                         + currency + "\tISO:" + isoCountries + "\tCLDR:"
   1437                         + cldrCountries);
   1438                     showCountries("iso-cldr", isoCountries, cldrCountries, missing);
   1439                     showCountries("cldr-iso", cldrCountries, isoCountries, missing);
   1440                 }
   1441             }
   1442 
   1443             if (oldMatcher.reset(name).find()) {
   1444                 errln("Has 'old' in name but still used " + "\t" + currency
   1445                     + "\t" + name + "\t" + data);
   1446             }
   1447             if (newMatcher.reset(name).find()
   1448                 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
   1449                 // find the first use. If older than 5 years, flag as error
   1450                 if (currencyFirstValid.get(currency).compareTo(
   1451                     LIMIT_FOR_NEW_CURRENCY) < 0) {
   1452                     errln("Has 'new' in name but used since "
   1453                         + CurrencyDateInfo.formatDate(currencyFirstValid
   1454                             .get(currency))
   1455                         + "\t" + currency + "\t"
   1456                         + name + "\t" + data);
   1457                 } else {
   1458                     logln("Has 'new' in name but used since "
   1459                         + CurrencyDateInfo.formatDate(currencyFirstValid
   1460                             .get(currency))
   1461                         + "\t" + currency + "\t"
   1462                         + name + "\t" + data);
   1463                 }
   1464             }
   1465         }
   1466         logln("Non-Modern Codes (with dates): " + nonModernCurrencyCodes.size()
   1467             + "\t" + nonModernCurrencyCodes);
   1468         for (String currency : nonModernCurrencyCodes.keySet()) {
   1469             final String name = testInfo.getEnglish().getName(
   1470                 CLDRFile.CURRENCY_NAME, currency);
   1471             if (newMatcher.reset(name).find()
   1472                 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
   1473                 logln("Has 'new' in name but NOT used since "
   1474                     + CurrencyDateInfo.formatDate(currencyLastValid
   1475                         .get(currency))
   1476                     + "\t" + currency + "\t" + name
   1477                     + "\t" + nonModernCurrencyCodes.getAll(currency));
   1478             } else if (!oldMatcher.reset(name).find()
   1479                 && !OK_TO_NOT_HAVE_OLD.contains(currency)) {
   1480                 logln("Doesn't have 'old' or date range in name but NOT used since "
   1481                     + CurrencyDateInfo.formatDate(currencyLastValid
   1482                         .get(currency))
   1483                     + "\t"
   1484                     + currency
   1485                     + "\t"
   1486                     + name
   1487                     + "\t" + nonModernCurrencyCodes.getAll(currency));
   1488                 for (Pair<String, CurrencyDateInfo> pair : nonModernCurrencyCodes
   1489                     .getAll(currency)) {
   1490                     final String territory = pair.getFirst();
   1491                     Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL
   1492                         .getCurrencyDateInfo(territory);
   1493                     for (CurrencyDateInfo dateInfo : currencyInfo) {
   1494                         if (dateInfo.getEnd().compareTo(NOW) < 0) {
   1495                             continue;
   1496                         }
   1497                         logln("\tCurrencies used instead: "
   1498                             + territory
   1499                             + "\t"
   1500                             + dateInfo
   1501                             + "\t"
   1502                             + testInfo.getEnglish().getName(
   1503                                 CLDRFile.CURRENCY_NAME,
   1504                                 dateInfo.getCurrency()));
   1505 
   1506                     }
   1507                 }
   1508 
   1509             }
   1510         }
   1511         Set<String> remainder = new TreeSet<String>();
   1512         remainder.addAll(currencyCodes);
   1513         remainder.removeAll(nonModernCurrencyCodes.keySet());
   1514         // TODO make this an error, except for allowed exceptions.
   1515         logln("Currencies without Territories: " + remainder);
   1516         if (territoriesWithoutModernCurrencies.size() != 0) {
   1517             errln("Modern territory missing currency: "
   1518                 + territoriesWithoutModernCurrencies);
   1519         }
   1520     }
   1521 
   1522     private void showCountries(final String title, Set<String> isoCountries,
   1523         Set<String> cldrCountries, Set<String> missing) {
   1524         missing.clear();
   1525         missing.addAll(isoCountries);
   1526         missing.removeAll(cldrCountries);
   1527         for (String country : missing) {
   1528             logln("\t\tExtra in " + title + "\t" + country + " - "
   1529                 + getRegionName(country));
   1530         }
   1531     }
   1532 
   1533     public void TestCurrencyDecimalPlaces() {
   1534         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
   1535         Relation<String, IsoCurrencyParser.Data> codeList = isoCodes
   1536             .getCodeList();
   1537         Set<String> currencyCodes = STANDARD_CODES
   1538             .getGoodAvailableCodes("currency");
   1539         for (String cc : currencyCodes) {
   1540             Set<IsoCurrencyParser.Data> d = codeList.get(cc);
   1541             if (d != null) {
   1542                 for (IsoCurrencyParser.Data x : d) {
   1543                     CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc);
   1544                     if (cni.digits != x.getMinorUnit()) {
   1545                         logln("Mismatch between ISO/CLDR for decimal places for currency => " + cc +
   1546                             ". ISO = " + x.getMinorUnit() + " CLDR = " + cni.digits);
   1547                     }
   1548                 }
   1549             }
   1550         }
   1551     }
   1552 
   1553     /**
   1554      * Verify that we have a default script for every CLDR base language
   1555      */
   1556     public void TestDefaultScripts() {
   1557         SupplementalDataInfo supp = SUPPLEMENTAL;
   1558         Map<String, String> likelyData = supp.getLikelySubtags();
   1559         Map<String, String> baseToDefaultContentScript = new HashMap<String, String>();
   1560         for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) {
   1561             String script = locale.getScript();
   1562             if (!script.isEmpty() && locale.getCountry().isEmpty()) {
   1563                 baseToDefaultContentScript.put(locale.getLanguage(), script);
   1564             }
   1565         }
   1566         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
   1567             if ("root".equals(locale)) {
   1568                 continue;
   1569             }
   1570             CLDRLocale loc = CLDRLocale.getInstance(locale);
   1571             String baseLanguage = loc.getLanguage();
   1572             String defaultScript = supp.getDefaultScript(baseLanguage);
   1573 
   1574             String defaultContentScript = baseToDefaultContentScript
   1575                 .get(baseLanguage);
   1576             if (defaultContentScript != null) {
   1577                 assertEquals(loc + " defaultContentScript = default",
   1578                     defaultScript, defaultContentScript);
   1579             }
   1580             String likely = likelyData.get(baseLanguage);
   1581             String likelyScript = likely == null ? null : CLDRLocale
   1582                 .getInstance(likely).getScript();
   1583             Map<Type, BasicLanguageData> scriptInfo = supp
   1584                 .getBasicLanguageDataMap(baseLanguage);
   1585             if (scriptInfo == null) {
   1586                 errln(loc + ": has no BasicLanguageData");
   1587             } else {
   1588                 BasicLanguageData data = scriptInfo.get(Type.primary);
   1589                 if (data == null) {
   1590                     data = scriptInfo.get(Type.secondary);
   1591                 }
   1592                 if (data == null) {
   1593                     errln(loc + ": has no scripts in BasicLanguageData");
   1594                 } else if (!data.getScripts().contains(defaultScript)) {
   1595                     errln(loc + ": " + defaultScript
   1596                         + " not in BasicLanguageData " + data.getScripts());
   1597                 }
   1598             }
   1599 
   1600             assertEquals(loc + " likely = default", defaultScript, likelyScript);
   1601 
   1602             assertNotNull(loc + ": needs default script", defaultScript);
   1603 
   1604             if (!loc.getScript().isEmpty()) {
   1605                 if (!loc.getScript().equals(defaultScript)) {
   1606                     assertNotEquals(locale
   1607                         + ": only include script if not default",
   1608                         loc.getScript(), defaultScript);
   1609                 }
   1610             }
   1611 
   1612         }
   1613     }
   1614 
   1615     enum CoverageIssue {
   1616         log, warn, error
   1617     }
   1618 
   1619     public void TestPluralCompleteness() {
   1620         // Set<String> cardinalLocales = new
   1621         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
   1622         // Set<String> ordinalLocales = new
   1623         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal));
   1624         // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals =
   1625         // PluralRulesFactory.getLocaleToSamplePatterns();
   1626         // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales();
   1627         // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale,
   1628         // type).keySet());
   1629         // Map<ULocale, PluralRules> overrideCardinals =
   1630         // PluralRulesFactory.getPluralOverrides();
   1631         // Set<ULocale> overrideCardinalLocales = new
   1632         // HashSet<ULocale>(overrideCardinals.keySet());
   1633 
   1634         Set<String> testLocales = STANDARD_CODES.getLocaleCoverageLocales(
   1635             Organization.google, EnumSet.of(Level.MODERN));
   1636         Set<String> allLocales = testInfo.getCldrFactory().getAvailable();
   1637         LanguageTagParser ltp = new LanguageTagParser();
   1638         for (String locale : allLocales) {
   1639             // the only known case where plural rules depend on region or script
   1640             // is pt_PT
   1641             if (locale.equals("root")) {
   1642                 continue;
   1643             }
   1644             ltp.set(locale);
   1645             if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) {
   1646                 continue;
   1647             }
   1648             CoverageIssue needsCoverage = testLocales.contains(locale)
   1649                 ? CoverageIssue.error
   1650                 : CoverageIssue.log;
   1651             CoverageIssue needsCoverage2 = needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage;
   1652 
   1653             //            if (logKnownIssue("Cldrbug:8809", "Missing plural rules/samples be and ga locales")) {
   1654             //                if (locale.equals("be") || locale.equals("ga")) {
   1655             //                    needsCoverage = CoverageIssue.warn;
   1656             //                }
   1657             //            }
   1658             PluralRulesFactory prf = PluralRulesFactory
   1659                 .getInstance(CLDRConfig.getInstance()
   1660                     .getSupplementalDataInfo());
   1661 
   1662             for (PluralType type : PluralType.values()) {
   1663                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale,
   1664                     false);
   1665                 if (pluralInfo == null) {
   1666                     errOrLog(needsCoverage, locale + "\t" + type + " \tmissing plural rules", "Cldrbug:7839", "Missing plural data for modern locales");
   1667                     continue;
   1668                 }
   1669                 Set<Count> counts = pluralInfo.getCounts();
   1670                 // if (counts.size() == 1) {
   1671                 // continue; // skip checking samples
   1672                 // }
   1673                 HashSet<String> samples = new HashSet<String>();
   1674                 EnumSet<Count> countsWithNoSamples = EnumSet
   1675                     .noneOf(Count.class);
   1676                 Relation<String, Count> samplesToCounts = Relation.of(
   1677                     new HashMap(), LinkedHashSet.class);
   1678                 Set<Count> countsFound = prf.getSampleCounts(locale,
   1679                     type.standardType);
   1680                 StringBuilder failureCases = new StringBuilder();
   1681                 for (Count count : counts) {
   1682                     String pattern = prf.getSamplePattern(locale, type.standardType, count);
   1683                     final String rangeLine = getRangeLine(count, pluralInfo.getPluralRules(), pattern);
   1684                     failureCases.append('\n').append(locale).append('\t').append(type).append('\t').append(rangeLine);
   1685                     if (countsFound == null || !countsFound.contains(count)) {
   1686                         countsWithNoSamples.add(count);
   1687                     } else {
   1688                         samplesToCounts.put(pattern, count);
   1689                         logln(locale + "\t" + type + "\t" + count + "\t"
   1690                             + pattern);
   1691                     }
   1692                 }
   1693                 if (!countsWithNoSamples.isEmpty()) {
   1694                     errOrLog(needsCoverage, locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples,
   1695                         "cldrbug:7075", "Missing ordinal minimal pairs");
   1696                     errOrLog(needsCoverage2, failureCases.toString());
   1697                 }
   1698                 for (Entry<String, Set<Count>> entry : samplesToCounts
   1699                     .keyValuesSet()) {
   1700                     if (entry.getValue().size() != 1) {
   1701                         errOrLog(needsCoverage, locale + "\t" + type + "\t duplicate samples: " + entry.getValue()
   1702                             + " => " + entry.getKey() + "", "cldrbug:7119", "Some duplicate minimal pairs");
   1703                         errOrLog(needsCoverage2, failureCases.toString());
   1704                     }
   1705                 }
   1706             }
   1707         }
   1708     }
   1709 
   1710     public void errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment) {
   1711         switch (causeError) {
   1712         case error:
   1713             if (logTicket == null) {
   1714                 errln(message);
   1715                 break;
   1716             }
   1717             logKnownIssue(logTicket, logComment);
   1718             // fall through
   1719         case warn:
   1720             warnln(message);
   1721             break;
   1722         case log:
   1723             logln(message);
   1724             break;
   1725         }
   1726     }
   1727 
   1728     public void errOrLog(CoverageIssue causeError, String message) {
   1729         errOrLog(causeError, message, null, null);
   1730     }
   1731 
   1732     public void TestNumberingSystemDigits() {
   1733 
   1734         // Don't worry about digits from supplemental planes yet ( ICU can't
   1735         // handle them anyways )
   1736         // hanidec is the only known non codepoint order numbering system
   1737         // TODO: Fix so that it works properly on non-BMP digit strings.
   1738         String[] knownExceptions = { "brah", "cakm", "hanidec", "osma", "shrd",
   1739             "sora", "takr" };
   1740         List<String> knownExceptionList = Arrays.asList(knownExceptions);
   1741         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
   1742             if (knownExceptionList.contains(ns)) {
   1743                 continue;
   1744             }
   1745             String digits = SUPPLEMENTAL.getDigits(ns);
   1746             int previousChar = 0;
   1747             int ch;
   1748 
   1749             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
   1750                 ch = digits.codePointAt(i);
   1751                 if (i > 0 && ch != previousChar + 1) {
   1752                     errln("Digits for numbering system "
   1753                         + ns
   1754                         + " are not in code point order. Previous char = U+"
   1755                         + Utility.hex(previousChar, 4)
   1756                         + " Current char = U+" + Utility.hex(ch, 4));
   1757                     break;
   1758                 }
   1759                 previousChar = ch;
   1760             }
   1761         }
   1762     }
   1763 
   1764     public void TestNumberingSystemDigitCompleteness() {
   1765         List<Integer> unicodeDigits = new ArrayList<Integer>();
   1766         for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) {
   1767             if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) {
   1768                 unicodeDigits.add(Integer.valueOf(cp));
   1769             }
   1770         }
   1771 
   1772         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
   1773             String digits = SUPPLEMENTAL.getDigits(ns);
   1774             int ch;
   1775 
   1776             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
   1777                 ch = digits.codePointAt(i);
   1778                 unicodeDigits.remove(Integer.valueOf(ch));
   1779             }
   1780         }
   1781 
   1782         if (unicodeDigits.size() > 0) {
   1783             for (Integer i : unicodeDigits) {
   1784                 errln("Unicode digit: " + UCharacter.getName(i) + " is not in any numbering system. Script = "
   1785                     + UScript.getShortName(UScript.getScript(i)));
   1786             }
   1787         }
   1788     }
   1789 
   1790     public void TestMetazones() {
   1791         Date goalMin = new Date(70, 0, 1);
   1792         Date goalMax = new Date(300, 0, 2);
   1793         ImmutableSet<String> knownTZWithoutMetazone = ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov");
   1794         for (String timezoneRaw : TimeZone.getAvailableIDs()) {
   1795             String timezone = TimeZone.getCanonicalID(timezoneRaw);
   1796             String region = TimeZone.getRegion(timezone);
   1797             if (!timezone.equals(timezoneRaw) || "001".equals(region)) {
   1798                 continue;
   1799             }
   1800             if (knownTZWithoutMetazone.contains(timezone)) {
   1801                 continue;
   1802             }
   1803             final Set<MetaZoneRange> ranges = SUPPLEMENTAL
   1804                 .getMetaZoneRanges(timezone);
   1805 
   1806             if (assertNotNull("metazones for " + timezone, ranges)) {
   1807                 long min = Long.MAX_VALUE;
   1808                 long max = Long.MIN_VALUE;
   1809                 for (MetaZoneRange range : ranges) {
   1810                     if (range.dateRange.from != DateRange.START_OF_TIME) {
   1811                         min = Math.min(min, range.dateRange.from);
   1812                     }
   1813                     if (range.dateRange.to != DateRange.END_OF_TIME) {
   1814                         max = Math.max(max, range.dateRange.to);
   1815                     }
   1816                 }
   1817                 assertRelation(timezone + " has metazone before 1970?", true,
   1818                     goalMin, LEQ, new Date(min));
   1819                 assertRelation(timezone
   1820                     + " has metazone until way in the future?", true,
   1821                     goalMax, GEQ, new Date(max));
   1822             }
   1823         }
   1824         com.google.common.collect.Interners i;
   1825     }
   1826 
   1827     public void Test9924() {
   1828         PopulationData zhCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(LOCALES_FIXED ? "zh" : "zh_Hans", "CN");
   1829         PopulationData yueCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN");
   1830         assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation());
   1831     }
   1832 
   1833     public void Test10765() { //
   1834         Set<String> surveyToolLanguages = SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool
   1835         Set<String> mainLanguages = new TreeSet<>();
   1836         LanguageTagParser ltp = new LanguageTagParser();
   1837         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
   1838             mainLanguages.add(ltp.set(locale).getLanguage());
   1839         }
   1840         // add special codes we want to see anyway
   1841         mainLanguages.add("und");
   1842         mainLanguages.add("mul");
   1843         mainLanguages.add("zxx");
   1844 
   1845         if (!mainLanguages.containsAll(surveyToolLanguages)) {
   1846             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale
   1847             Set<String> temp = new TreeSet<>(surveyToolLanguages);
   1848             temp.removeAll(mainLanguages);
   1849             Set<String> modern = new TreeSet<>();
   1850             Set<String> comprehensive = new TreeSet<>();
   1851             for (String lang : temp) {
   1852                 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang));
   1853                 if (level.compareTo(Level.MODERN) <= 0) {
   1854                     modern.add(lang);
   1855                 } else {
   1856                     comprehensive.add(lang);
   1857                 }
   1858             }
   1859             warnln("Modern Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(modern));
   1860             logln("Comprehensive Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(comprehensive));
   1861         }
   1862         if (!surveyToolLanguages.containsAll(mainLanguages)) {
   1863             mainLanguages.removeAll(surveyToolLanguages);
   1864             assertEquals("No main/* languages are missing from Survey Tool:language names (eg <variable id='$language' type='choice'>) ",
   1865                 Collections.EMPTY_SET, mainLanguages);
   1866         }
   1867     }
   1868 
   1869     private Set<String> getNames(Set<String> temp) {
   1870         Set<String> tempNames = new TreeSet<>();
   1871         for (String langCode : temp) {
   1872             tempNames.add(testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode) + " (" + langCode + ")");
   1873         }
   1874         return tempNames;
   1875     }
   1876 }
   1877