Home | History | Annotate | Download | only in test
      1 /*
      2  ******************************************************************************
      3  * Copyright (C) 2004, International Business Machines Corporation and        *
      4  * others. All Rights Reserved.                                               *
      5  ******************************************************************************
      6  */
      7 package org.unicode.cldr.test;
      8 
      9 import java.io.File;
     10 import java.io.IOException;
     11 import java.io.PrintWriter;
     12 import java.util.Arrays;
     13 import java.util.Calendar;
     14 import java.util.Collection;
     15 import java.util.Date;
     16 import java.util.HashMap;
     17 import java.util.HashSet;
     18 import java.util.Iterator;
     19 import java.util.LinkedHashSet;
     20 import java.util.List;
     21 import java.util.Map;
     22 import java.util.Set;
     23 import java.util.TreeMap;
     24 import java.util.TreeSet;
     25 
     26 import org.unicode.cldr.draft.FileUtilities;
     27 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType;
     28 import org.unicode.cldr.util.CLDRFile;
     29 import org.unicode.cldr.util.CLDRPaths;
     30 import org.unicode.cldr.util.CldrUtility;
     31 import org.unicode.cldr.util.Factory;
     32 import org.unicode.cldr.util.LanguageTagParser;
     33 import org.unicode.cldr.util.SimpleFactory;
     34 import org.unicode.cldr.util.StandardCodes;
     35 import org.unicode.cldr.util.TimezoneFormatter;
     36 import org.unicode.cldr.util.XPathParts;
     37 import org.xml.sax.SAXException;
     38 
     39 import com.ibm.icu.dev.test.TestFmwk;
     40 import com.ibm.icu.text.BreakIterator;
     41 import com.ibm.icu.text.DecimalFormat;
     42 import com.ibm.icu.text.NumberFormat;
     43 import com.ibm.icu.text.UTF16;
     44 import com.ibm.icu.text.UnicodeSet;
     45 import com.ibm.icu.util.ULocale;
     46 
     47 /**
     48  * Initial version of CLDR tests. Each test is named TextXXX. To run all the tests, use the options
     49  * <blockquote>-nothrow</blockquote>
     50  * To run a particular set of tests, include their names, like
     51  * <blockquote>-nothrow TestForIllegalAttributeValues TestMinimalLocalization</blockquote>
     52  * To show more information (logln), add -verbose
     53  * <p>
     54  * There are some environment variables that can be used with the test. <br>
     55  * -DSHOW_FILES=<anything> shows all create/open of files. <br>
     56  * -DXML_MATCH=<regular expression> skips all locales that don't match the regular expression <br>
     57  * -DXML_MAIN_DIR=<filesystem directory> resets to a different main directory (eg not cldr/common/main. For example,
     58  * some of the tools generate into a locale directory like -DXML_MAIN_DIR=C:\Unicode-CVS2\cldr\common\gen\main\ so this
     59  * can be used to check that directory. <br>
     60  * -DSKIP_DRAFT=<boolean> skips draft locales if <boolean> is a string starting with T or t
     61  */
     62 public class CLDRTest extends TestFmwk {
     63     /**
     64      * privates
     65      */
     66     private static String MATCH;
     67     private static String MAIN_DIR;
     68     private static boolean SKIP_DRAFT;
     69     private Set<String> locales;
     70     private Set<String> languageLocales;
     71     private Factory cldrFactory;
     72     private CLDRFile resolvedRoot;
     73     private CLDRFile resolvedEnglish;
     74     private final UnicodeSet commonAndInherited = new UnicodeSet(
     75         "[[:script=common:][:script=inherited:][:alphabetic=false:]]");
     76     private static final String[] WIDTHS = { "narrow", "wide", "abbreviated", "short" };
     77     private static final String[] MONTHORDAYS = { "day", "month" };
     78     private Map<String, String> localeNameCache = new HashMap<String, String>();
     79     private CLDRFile english = null;
     80 
     81     private Set<String> surveyInfo = new TreeSet<String>();
     82 
     83     /**
     84      * TestFmwk boilerplate
     85      */
     86     public static void main(String[] args) throws Exception {
     87         MATCH = System.getProperty("XML_MATCH");
     88         if (MATCH == null)
     89             MATCH = ".*";
     90         else
     91             System.out.println("Resetting MATCH:" + MATCH);
     92         MAIN_DIR = System.getProperty("XML_MAIN_DIR");
     93         if (MAIN_DIR == null)
     94             MAIN_DIR = CLDRPaths.MAIN_DIRECTORY;
     95         else
     96             System.out.println("Resetting MAIN_DIR:" + MAIN_DIR);
     97         SKIP_DRAFT = System.getProperty("XML_SKIP_DRAFT") != null;
     98         if (SKIP_DRAFT) System.out.println("Skipping Draft locales");
     99 
    100         double deltaTime = System.currentTimeMillis();
    101         new CLDRTest().run(args);
    102         deltaTime = System.currentTimeMillis() - deltaTime;
    103         System.out.println("Seconds: " + deltaTime / 1000);
    104 
    105     }
    106 
    107     public void TestZZZZHack() throws IOException {
    108         // hack to get file written at the end of run.
    109         PrintWriter surveyFile = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "surveyInfo.txt");
    110         for (String s : surveyInfo) {
    111             surveyFile.println(s);
    112         }
    113         surveyFile.close();
    114     }
    115 
    116     /**
    117      * TestFmwk boilerplate
    118      */
    119     public CLDRTest() throws SAXException, IOException {
    120         // TODO parameterize the directory and filter
    121         cldrFactory = Factory.make(MAIN_DIR, MATCH);
    122         // CLDRKey.main(new String[]{"-mde.*"});
    123         locales = cldrFactory.getAvailable();
    124         languageLocales = cldrFactory.getAvailableLanguages();
    125         resolvedRoot = cldrFactory.make("root", true);
    126         /*
    127          * PrintWriter out = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "resolved/", "root.xml");
    128          * CLDRFile temp = (CLDRFile) resolvedRoot.clone();
    129          * temp.write(out);
    130          * out.close();
    131          */
    132         resolvedEnglish = cldrFactory.make("en", true);
    133     }
    134 
    135     /**
    136      * Check to make sure that the currency formats are kosher.
    137      */
    138     public void TestCurrencyFormats() {
    139         // String decimal = "//ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat[@type=\"standard\"]/";
    140         // String currency = "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/";
    141         for (String locale : locales) {
    142             boolean isPOSIX = locale.indexOf("POSIX") >= 0;
    143             logln("Testing: " + locale);
    144             CLDRFile item = cldrFactory.make(locale, false);
    145             for (String xpath : item) {
    146                 NumericType type = NumericType.getNumericType(xpath);
    147                 if (type == NumericType.NOT_NUMERIC) continue;
    148                 String value = item.getStringValue(xpath);
    149                 // at this point, we only have currency formats
    150                 String pattern = DisplayAndInputProcessor.getCanonicalPattern(value, type, isPOSIX);
    151                 if (!pattern.equals(value)) {
    152                     String draft = "";
    153                     if (item.getFullXPath(xpath).indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
    154                     assertEquals(getLocaleAndName(locale) + draft + " " + type + " pattern incorrect", pattern, value);
    155                 }
    156             }
    157         }
    158     }
    159 
    160     /**
    161      * Internal class
    162      */
    163     private static class ValueCount {
    164         int count = 1;
    165         String value;
    166         String fullxpath;
    167     }
    168 
    169     /**
    170      * Verify that if all the children of a language locale do not have the same value for the same key.
    171      */
    172     public void TestCommonChildren() {
    173         if (disableUntilLater("TestCommonChildren")) return;
    174 
    175         Map<String, ValueCount> currentValues = new TreeMap<String, ValueCount>();
    176         Set<String> okValues = new TreeSet<String>();
    177 
    178         for (String parent : languageLocales) {
    179             logln("Testing: " + parent);
    180             currentValues.clear();
    181             okValues.clear();
    182             Set<String> availableWithParent = cldrFactory.getAvailableWithParent(parent, true);
    183             for (String locale : availableWithParent) {
    184                 logln("\tTesting: " + locale);
    185                 CLDRFile item = cldrFactory.make(locale, false);
    186                 // Walk through all the xpaths, adding to currentValues
    187                 // Whenever two values for the same xpath are different, we remove from currentValues, and add to
    188                 // okValues
    189                 for (String xpath : item) {
    190                     if (okValues.contains(xpath)) continue;
    191                     if (xpath.startsWith("//ldml/identity/")) continue; // skip identity elements
    192                     String v = item.getStringValue(xpath);
    193                     ValueCount last = currentValues.get(xpath);
    194                     if (last == null) {
    195                         ValueCount vc = new ValueCount();
    196                         vc.value = v;
    197                         vc.fullxpath = item.getFullXPath(xpath);
    198                         currentValues.put(xpath, vc);
    199                     } else if (v.equals(last.value)) {
    200                         last.count++;
    201                     } else {
    202                         okValues.add(xpath);
    203                         currentValues.remove(xpath);
    204                     }
    205                 }
    206                 // at the end, only the keys left in currentValues are (possibly) faulty
    207                 // they are actually bad IFF either
    208                 // (a) the count is equal to the total (thus all children are the same), or
    209                 // (b) their value is the same as the parent's resolved value (thus all children are the same or the
    210                 // same
    211                 // as the inherited parent value).
    212             }
    213             if (currentValues.size() == 0) continue;
    214             int size = availableWithParent.size();
    215             CLDRFile parentCLDR = cldrFactory.make(parent, true);
    216             for (String xpath : currentValues.keySet()) {
    217                 ValueCount vc = currentValues.get(xpath);
    218                 if (vc.count == size || (vc.value.equals(parentCLDR.getStringValue(xpath))
    219                     && vc.fullxpath.equals(parentCLDR.getStringValue(xpath)))) {
    220                     String draft = "";
    221                     if (vc.fullxpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
    222                     String count = (vc.count == size ? "" : vc.count + "/") + size;
    223                     warnln(getLocaleAndName(parent) + draft +
    224                         "\tall children (" + count + ") have same value for:\t"
    225                         + xpath + ";\t" + vc.value);
    226                 }
    227             }
    228         }
    229     }
    230 
    231     static String[] EXEMPLAR_SKIPS = { "/hourFormat", "/exemplarCharacters", "/pattern", "/localizedPatternChars" };
    232 
    233     /**
    234      * Check that the exemplars include all characters in the data.
    235      */
    236     public void TestThatExemplarsContainAll() {
    237         UnicodeSet allExemplars = new UnicodeSet();
    238         if (disableUntilLater("TestThatExemplarsContainAll")) return;
    239         Set<String> counts = new TreeSet<String>();
    240         int totalCount = 0;
    241         UnicodeSet localeMissing = new UnicodeSet();
    242         for (String locale : locales) {
    243             if (locale.equals("root")) continue;
    244             CLDRFile resolved = cldrFactory.make(locale, false); // FIX LATER
    245             UnicodeSet exemplars = getFixedExemplarSet(locale, resolved);
    246             CLDRFile plain = cldrFactory.make(locale, false);
    247             int count = 0;
    248             localeMissing.clear();
    249             file: for (String xpath : plain) {
    250                 for (int i = 0; i < EXEMPLAR_SKIPS.length; ++i) {
    251                     if (xpath.indexOf(EXEMPLAR_SKIPS[i]) > 0) continue file; // skip some items.
    252                 }
    253                 if (SKIP_DRAFT) {
    254                     String fullxpath = plain.getFullXPath(xpath);
    255                     if (fullxpath.indexOf("[@draft=\"unconfirmed\"") > 0) continue;
    256                 }
    257                 if (xpath.startsWith("//ldml/posix/messages")) continue;
    258                 String value = plain.getStringValue(xpath);
    259                 allExemplars.addAll(value);
    260                 if (!exemplars.containsAll(value)) {
    261                     count++;
    262                     UnicodeSet missing = new UnicodeSet().addAll(value).removeAll(exemplars);
    263                     localeMissing.addAll(missing);
    264                     logln(getLocaleAndName(locale) + "\t" + xpath + "\t<" + value + "> contains " + missing
    265                         + ", not in exemplars");
    266                     surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' contains characters "
    267                         + missing.toPattern(false) + ", which are not in exemplars");
    268                 }
    269             }
    270             NumberFormat nf = new DecimalFormat("000");
    271             if (count != 0) {
    272                 totalCount += count;
    273                 counts.add(nf.format(count) + "\t" + getLocaleAndName(locale) + "\t" + localeMissing);
    274             }
    275             if (localeMissing.size() != 0) {
    276                 errln(getLocaleAndName(locale) + "\t uses " + localeMissing + ", not in exemplars");
    277             }
    278         }
    279         for (String c : counts) {
    280             logln(c);
    281         }
    282         logln("Total Count: " + totalCount);
    283         System.out.println("All exemplars: " + allExemplars.toPattern(true));
    284     }
    285 
    286     // Get Date-Time in milliseconds
    287     private static long getDateTimeinMillis(int year, int month, int date) {
    288         Calendar cal = Calendar.getInstance();
    289         cal.set(year, month, date);
    290         return cal.getTimeInMillis();
    291     }
    292 
    293     static final long disableDate = getDateTimeinMillis(2005, 6 - 1, 3);
    294 
    295     /**
    296      *
    297      */
    298     private boolean disableUntilLater(String string) {
    299         if (new Date().getTime() >= disableDate) return false;
    300         warnln("Disabling " + string + " until " + new Date(disableDate));
    301         return true;
    302     }
    303 
    304     /**
    305      * Internal
    306      */
    307     private UnicodeSet getFixedExemplarSet(String locale, CLDRFile cldrfile) {
    308         UnicodeSet exemplars = getExemplarSet(cldrfile, "");
    309         if (exemplars.size() == 0) {
    310             errln(getLocaleAndName(locale) + " has empty exemplar set");
    311         }
    312         exemplars.addAll(getExemplarSet(cldrfile, "standard"));
    313         UnicodeSet auxiliary = getExemplarSet(cldrfile, "auxiliary");
    314         if (exemplars.containsSome(auxiliary)) {
    315             errln(getLocaleAndName(locale) + "Auxiliary & main exemplars should be disjoint, but overlap with " +
    316                 new UnicodeSet(exemplars).retainAll(auxiliary) +
    317                 ": change auxiliary to " + auxiliary.removeAll(exemplars));
    318         }
    319         exemplars.addAll(auxiliary);
    320         exemplars.addAll(commonAndInherited);
    321         return exemplars;
    322     }
    323 
    324     /**
    325      * @return Gets an exemplar set. Also verifies that the set contains no properties.
    326      */
    327     public UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) {
    328         if (type.length() != 0) type = "[@type=\"" + type + "\"]";
    329         String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type);
    330         if (v == null) return new UnicodeSet();
    331         String pattern = v;
    332         if (pattern.indexOf("[:") >= 0 || pattern.indexOf("\\p{") > 0) {
    333             errln(getLocaleName(cldrfile.getLocaleID()) + " exemplar pattern contains property: " + pattern);
    334         }
    335         try {
    336             UnicodeSet result = new UnicodeSet(v, UnicodeSet.CASE);
    337             result.remove(0x20);
    338             return result;
    339         } catch (RuntimeException e) {
    340             e.printStackTrace();
    341             errln(getLocaleAndName(cldrfile.getLocaleID()) + " has illegal exemplar set: <" + v + ">");
    342             return new UnicodeSet();
    343         }
    344         // if (type.length() != 0) System.out.println("fetched set for " + type);
    345     }
    346 
    347     public String getLocaleAndName(String locale) {
    348         return locale + " (" + getLocaleName(locale) + ")";
    349     }
    350 
    351     /**
    352      * @return the ID plus its localization (for language, script, and territory IDs only)
    353      */
    354     public String getIDAndLocalization(String id) {
    355         return id + " " + getLocalization(id);
    356     }
    357 
    358     /**
    359      * @return the localization (for language, script, and territory IDs only)
    360      */
    361     public String getLocalization(String id) {
    362         if (english == null) english = cldrFactory.make("en", true);
    363         if (id.length() == 0) return "?";
    364         // pick on basis of case
    365         char ch = id.charAt(0);
    366         if ('a' <= ch && ch <= 'z') return getName(english, "languages/language", id);
    367         if (id.length() == 4 && 'A' <= ch && ch <= 'Z') return getName(english, "scripts/script", id);
    368         return getName(english, "territories/territory", id);
    369     }
    370 
    371     /**
    372      * Internal
    373      */
    374     private String getIDAndLocalization(Set<String> missing) {
    375         StringBuffer buffer = new StringBuffer();
    376         for (String next : missing) {
    377             if (buffer.length() != 0) buffer.append("; ");
    378             buffer.append(getIDAndLocalization(next));
    379         }
    380         return buffer.toString();
    381     }
    382 
    383     public String getLocaleName(String locale) {
    384         String name = localeNameCache.get(locale);
    385         if (name != null) return name;
    386         if (english == null) english = cldrFactory.make("en", true);
    387         String result = english.getName(locale);
    388         /*
    389          * Collection c = Utility.splitList(locale, '_', false, null);
    390          * String[] pieces = new String[c.size()];
    391          * c.toArray(pieces);
    392          * int i = 0;
    393          * String result = getName(english, "languages/language", pieces[i++]);
    394          * if (pieces[i].length() == 0) return result;
    395          * if (pieces[i].length() == 4) {
    396          * result += " " + getName(english, "scripts/script", pieces[i++]);
    397          * }
    398          * if (pieces[i].length() == 0) return result;
    399          * result += " " + getName(english, "territories/territory", pieces[i++]);
    400          * if (pieces[i].length() == 0) return result;
    401          * result += " " + getName(english, "variant/variants", pieces[i++]);
    402          */
    403         localeNameCache.put(locale, result);
    404         return result;
    405     }
    406 
    407     /**
    408      * Internal
    409      */
    410     private String getName(CLDRFile english, String kind, String type) {
    411         String v = english.getStringValue("//ldml/localeDisplayNames/" + kind + "[@type=\"" + type + "\"]");
    412         if (v == null) return "<" + type + ">";
    413         return v;
    414     }
    415 
    416     /**
    417      * Make sure we are only using attribute values that are in RFC3066bis, the Olson database (with aliases removed)
    418      * or ISO 4217
    419      *
    420      * @throws IOException
    421      */
    422     public void TestForIllegalAttributeValues() {
    423         // check for illegal attribute values that are not in the DTD
    424         Map<String, Set<String>> result = new TreeMap<String, Set<String>>();
    425         Map<String, Set<String>> totalResult = new TreeMap<String, Set<String>>();
    426         for (String locale : locales) {
    427             logln("Testing: " + locale);
    428             CLDRFile item = cldrFactory.make(locale, false);
    429             result.clear();
    430             Set<String> xpathFailures = null; // don't collect
    431             // XPathParts parts;
    432             // String xpath;
    433             // CLDRFile.StringValue value;
    434             // String element;
    435             // Map attributes;
    436             checkAttributeValidity(item, result, xpathFailures);
    437 
    438             // now show
    439             //String localeName = getLocaleAndName(locale);
    440             for (Iterator<String> it3 = result.keySet().iterator(); it3.hasNext();) {
    441                 String code = it3.next();
    442                 Set<String> avalues = result.get(code);
    443                 errln(getLocaleAndName(locale) + "\tillegal attribute value for " + code + ", value:\t" + show(avalues));
    444                 Set<String> totalvalues = totalResult.get(code);
    445                 if (totalvalues == null) totalResult.put(code, totalvalues = new TreeSet<String>());
    446                 totalvalues.addAll(avalues);
    447             }
    448         }
    449         for (Iterator<String> it3 = totalResult.keySet().iterator(); it3.hasNext();) {
    450             String code = it3.next();
    451             Set<String> avalues = totalResult.get(code);
    452             errln("All illegal attribute values for " + code + ", value:\t" + show(avalues));
    453         }
    454     }
    455 
    456     /**
    457      * Tests whether the display names have any collisions, e.g. if in the fully resolved
    458      * locale $ is used for both USD and UAD.
    459      *
    460      */
    461     public void TestDisplayNameCollisions() {
    462         if (disableUntilLater("TestDisplayNameCollisions")) return;
    463 
    464         Map<String, String>[] maps = new HashMap[CLDRFile.LIMIT_TYPES];
    465         for (int i = 0; i < maps.length; ++i)
    466             maps[i] = new HashMap<String, String>();
    467         Set<String> collisions = new TreeSet<String>();
    468         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
    469             String locale = it.next();
    470             CLDRFile item = cldrFactory.make(locale, true);
    471             for (int i = 0; i < maps.length; ++i)
    472                 maps[i].clear();
    473             collisions.clear();
    474 
    475             for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
    476                 String xpath = it2.next();
    477                 int nameType = CLDRFile.getNameType(xpath);
    478                 if (nameType < 0) continue;
    479                 String value = item.getStringValue(xpath);
    480                 String xpath2 = maps[nameType].get(value);
    481                 if (xpath2 == null) {
    482                     maps[nameType].put(value, xpath);
    483                     continue;
    484                 }
    485                 collisions.add(CLDRFile.getNameTypeName(nameType) + "\t" + value + "\t" + xpath + "\t" + xpath2);
    486                 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is a duplicate of what is in " + xpath2);
    487             }
    488             String name = getLocaleAndName(locale) + "\t";
    489             for (Iterator<String> it2 = collisions.iterator(); it2.hasNext();) {
    490                 errln(name + it2.next());
    491             }
    492         }
    493     }
    494 
    495     /**
    496      * Checks the validity of attributes, based on StandardCodes.
    497      * The invalid codes are added to badCodes, and the failing xpaths are added to xpathFailures.
    498      *
    499      * @param item
    500      * @param badCodes
    501      * @param xpathFailures
    502      */
    503     public static void checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures) {
    504         XPathParts parts = new XPathParts(null, null);
    505         for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
    506             String xpath = it2.next();
    507             parts.set(item.getFullXPath(xpath));
    508             for (int i = 0; i < parts.size(); ++i) {
    509                 if (parts.getAttributeCount(i) == 0) continue;
    510                 String element = parts.getElement(i);
    511                 Map<String, String> attributes = parts.getAttributes(i);
    512                 for (Iterator<String> it3 = attributes.keySet().iterator(); it3.hasNext();) {
    513                     String attribute = it3.next();
    514                     String avalue = attributes.get(attribute);
    515                     checkValidity(xpath, element, attribute, avalue, badCodes, xpathFailures);
    516                 }
    517             }
    518         }
    519     }
    520 
    521     /**
    522      * Internal
    523      */
    524     private String show(Collection<String> avalues) {
    525         StringBuffer result = new StringBuffer("{");
    526         boolean first = true;
    527         for (Iterator<String> it3 = avalues.iterator(); it3.hasNext();) {
    528             if (first)
    529                 first = false;
    530             else
    531                 result.append(", ");
    532             result.append(it3.next().toString());
    533         }
    534         result.append("}");
    535         return result.toString();
    536     }
    537 
    538     /**
    539      * Internal function
    540      */
    541     private static void checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results,
    542         Set<String> xpathsFailing) {
    543         StandardCodes codes = StandardCodes.make();
    544         if (attribute.equals("type")) {
    545             boolean checkReplacements = xpath.indexOf("/identity") < 0;
    546             if (element.equals("currency"))
    547                 checkCodes(xpath, "currency", avalue, codes, results, xpathsFailing, checkReplacements);
    548             else if (element.equals("script"))
    549                 checkCodes(xpath, "script", avalue, codes, results, xpathsFailing, checkReplacements);
    550             else if (element.equals("territory"))
    551                 checkCodes(xpath, "territory", avalue, codes, results, xpathsFailing, checkReplacements);
    552             else if (element.equals("language"))
    553                 checkCodes(xpath, "language", avalue, codes, results, xpathsFailing, checkReplacements);
    554             else if (element.equals("zone"))
    555                 checkCodes(xpath, "tzid", avalue, codes, results, xpathsFailing, checkReplacements);
    556         }
    557     }
    558 
    559     /**
    560      * Internal function
    561      *
    562      * @param checkReplacements
    563      *            TODO
    564      */
    565     private static void checkCodes(String xpath, String code, String avalue, StandardCodes codes, Map<String, Set<String>> results,
    566         Set<String> xpathFailures, boolean checkReplacements) {
    567         // ok if code is found AND it has no replacement
    568         if (codes.getData(code, avalue) != null
    569             && (!checkReplacements || codes.getReplacement(code, avalue) == null)) return;
    570 
    571         if (xpathFailures != null) xpathFailures.add(xpath);
    572         if (results == null) return;
    573         Set<String> s = results.get(code);
    574         if (s == null) {
    575             s = new TreeSet<String>();
    576             results.put(code, s);
    577         }
    578         s.add(avalue);
    579     }
    580 
    581     /**
    582      * Verify that a small set of locales (currently just English) has everything translated.
    583      *
    584      * @throws IOException
    585      */
    586     public void TestCompleteLocales() {
    587         // just test English for now
    588         if (english == null) english = cldrFactory.make("en", true);
    589         checkTranslatedCodes(english);
    590     }
    591 
    592     /**
    593      * Tests that the file contains codes for all main display name ids: language, script, territory, tzid, currency.
    594      */
    595     private void checkTranslatedCodes(CLDRFile cldrfile) {
    596         StandardCodes codes = StandardCodes.make();
    597         checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency", "/displayName");
    598         // can't check timezones for English.
    599         // checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone", "");
    600         checkTranslatedCode(cldrfile, codes, "language", "//ldml/localeDisplayNames/languages/language", "");
    601         checkTranslatedCode(cldrfile, codes, "script", "//ldml/localeDisplayNames/scripts/script", "");
    602         checkTranslatedCode(cldrfile, codes, "territory", "//ldml/localeDisplayNames/territories/territory", "");
    603         checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant", "");
    604     }
    605 
    606     /**
    607      * @param codes
    608      * @param type
    609      * @param prefix
    610      * @param postfix
    611      *            TODO
    612      */
    613     private void checkTranslatedCode(CLDRFile cldrfile, StandardCodes codes, String type, String prefix, String postfix) {
    614 
    615         // TODO, expand to other languages
    616         Map<String, Set<String>> completionExceptions = new HashMap<String, Set<String>>();
    617         Set<String> scriptExceptions = new HashSet<String>();
    618         scriptExceptions.add("Cham");
    619         scriptExceptions.add("Thai");
    620         completionExceptions.put("script", scriptExceptions);
    621 
    622         Set<String> codeItems = codes.getGoodAvailableCodes(type);
    623         int count = 0;
    624         Set<String> exceptions = completionExceptions.get(type);
    625         for (String code : codeItems) {
    626             String rfcname = codes.getData(type, code);
    627             // if (rfcname.equals("ZZ")) continue;
    628             ++count;
    629             if (rfcname.equals("PRIVATE USE")) continue;
    630             String fullFragment = prefix + "[@type=\"" + code + "\"]" + postfix;
    631             String v = cldrfile.getStringValue(fullFragment);
    632             if (v == null) {
    633                 errln("Missing translation for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">");
    634                 continue;
    635             }
    636             String translation = v;
    637             if (translation.equals(code)) {
    638                 if (exceptions != null && exceptions.contains(code)) continue;
    639                 errln("Translation = code for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">");
    640                 continue;
    641             }
    642             if (false && !translation.equalsIgnoreCase(rfcname)) {
    643                 warnln(type + " translation differs from RFC, check: " + code + "\trfc: " + rfcname + "\tcldr: "
    644                     + translation);
    645             }
    646         }
    647         logln("Total " + type + ":\t" + count);
    648     }
    649 
    650     // <territoryContainment><group type="001" contains="002 009 019 142 150"/>
    651     // <languageData><language type="af" scripts="Latn" territories="ZA"/>
    652     void getSupplementalData(Map<String, Set<String>> language_scripts, Map<String, Set<String>> language_territories,
    653         Map<String, Set<String>> group_territory,
    654         Map<String, Set<String>> territory_currencies, Map<String, Map<String, String>> aliases) {
    655         boolean SHOW = false;
    656         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
    657         CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false);
    658         XPathParts parts = new XPathParts(new UTF16.StringComparator(), null);
    659         for (Iterator<String> it = supp.iterator(); it.hasNext();) {
    660             String path = it.next();
    661             try {
    662                 parts.set(supp.getFullXPath(path));
    663                 Map<String, String> m;
    664                 String type = "";
    665                 if (aliases != null && parts.findElement("alias") >= 0) {
    666                     m = parts.findAttributes(type = "languageAlias");
    667                     if (m == null) m = parts.findAttributes(type = "territoryAlias");
    668                     if (m != null) {
    669                         Map top = aliases.get(type);
    670                         if (top == null) aliases.put(type, top = new TreeMap());
    671                         top.put(m.get("type"), m.get("replacement"));
    672                     }
    673                 }
    674                 if (territory_currencies != null) {
    675                     m = parts.findAttributes("region");
    676                     if (m != null) {
    677                         String region = m.get("iso3166");
    678                         Set s = territory_currencies.get(region);
    679                         if (s == null) territory_currencies.put(region, s = new LinkedHashSet());
    680                         m = parts.findAttributes("currency");
    681                         if (m == null) {
    682                             warnln("missing currency for region: " + path);
    683                             continue;
    684                         }
    685                         String currency = m.get("iso4217");
    686                         s.add(currency);
    687                         m = parts.findAttributes("alternate");
    688                         String alternate = m == null ? null : (String) m.get("iso4217");
    689                         if (alternate != null) s.add(alternate);
    690                         continue;
    691                     }
    692                 }
    693                 m = parts.findAttributes("group");
    694                 if (m != null) {
    695                     if (group_territory == null) continue;
    696                     type = m.get("type");
    697                     String contains = m.get("contains");
    698                     group_territory.put(type, new TreeSet(CldrUtility.splitList(contains, ' ', true)));
    699                     continue;
    700                 }
    701                 m = parts.findAttributes("language");
    702                 if (m == null) continue;
    703                 String language = m.get("type");
    704                 String scripts = m.get("scripts");
    705                 if (scripts == null)
    706                     language_scripts.put(language, new TreeSet<String>());
    707                 else {
    708                     language_scripts.put(language, new TreeSet<String>(CldrUtility.splitList(scripts, ' ', true)));
    709                     if (SHOW)
    710                         System.out.println(getIDAndLocalization(language) + "\t\t"
    711                             + getIDAndLocalization(language_scripts.get(language)));
    712                 }
    713                 String territories = m.get("territories");
    714                 if (territories == null)
    715                     language_territories.put(language, new TreeSet<String>());
    716                 else {
    717                     language_territories.put(language, new TreeSet<String>(CldrUtility.splitList(territories, ' ', true)));
    718                     if (SHOW)
    719                         System.out.println(getIDAndLocalization(language) + "\t\t"
    720                             + getIDAndLocalization(language_territories.get(language)));
    721                 }
    722             } catch (RuntimeException e) {
    723                 throw (IllegalArgumentException) new IllegalArgumentException("Failure with: " + path).initCause(e);
    724             }
    725         }
    726     }
    727 
    728     /**
    729      * Verify that the minimal localizations are present.
    730      */
    731     public void TestMinimalLocalization() throws IOException {
    732         if (disableUntilLater("TestMinimalLocalization")) return;
    733 
    734         boolean testDraft = false;
    735         Map<String, Set<String>> language_scripts = new HashMap<String, Set<String>>();
    736         Map<String, Set<String>> language_territories = new HashMap<String, Set<String>>();
    737         getSupplementalData(language_scripts, language_territories, null, null, null);
    738         LanguageTagParser localIDParser = new LanguageTagParser();
    739         // see http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/design/minimal_requirements.htm
    740         int[] failureCount = new int[1];
    741         int[] warningCount = new int[1];
    742         for (Iterator<String> it = languageLocales.iterator(); it.hasNext();) {
    743             String locale = it.next();
    744             if (locale.equals("root")) continue;
    745             // if (!locale.equals("zh_Hant")) continue;
    746 
    747             CLDRFile item = cldrFactory.make(locale, true);
    748             if (!testDraft && item.isDraft()) {
    749                 logln(getLocaleAndName(locale) + "\tskipping draft");
    750                 continue;
    751             }
    752             UnicodeSet exemplars = getFixedExemplarSet(locale, item);
    753             CLDRFile missing = SimpleFactory.makeFile(locale);
    754             failureCount[0] = 0;
    755             warningCount[0] = 0;
    756             localIDParser.set(locale);
    757             String language = localIDParser.getLanguage();
    758             logln("Testing: " + locale);
    759             // languages
    760             Set<String> languages = new TreeSet<String>(CldrUtility.MINIMUM_LANGUAGES);
    761             languages.add(language);
    762             // LANGUAGE_NAME = 0, SCRIPT_NAME = 1, TERRITORY_NAME = 2, VARIANT_NAME = 3,
    763             // CURRENCY_NAME = 4, CURRENCY_SYMBOL = 5, TZID = 6
    764 
    765             checkForItems(item, languages, CLDRFile.LANGUAGE_NAME, missing, failureCount, null);
    766 
    767             /*
    768              * checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency");
    769              * checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone");
    770              * checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant");
    771              */
    772 
    773             Set<String> scripts = new TreeSet<String>();
    774             scripts.add("Latn");
    775             Set<String> others = language_scripts.get(language);
    776             if (others != null) scripts.addAll(others);
    777             checkForItems(item, scripts, CLDRFile.SCRIPT_NAME, missing, failureCount, null);
    778 
    779             Set<String> countries = new TreeSet<String>(CldrUtility.MINIMUM_TERRITORIES);
    780             others = language_territories.get(language);
    781             if (others != null) countries.addAll(others);
    782             checkForItems(item, countries, CLDRFile.TERRITORY_NAME, missing, failureCount, null);
    783 
    784             Set<String> currencies = new TreeSet<String>();
    785             StandardCodes sc = StandardCodes.make();
    786             for (Iterator<String> it2 = countries.iterator(); it2.hasNext();) {
    787                 String country = it2.next();
    788                 Set<String> countryCurrencies = sc.getMainCurrencies(country);
    789                 if (countryCurrencies == null) {
    790                     errln("Internal Error: no currencies for " + country + ", locale: " + locale);
    791                 } else {
    792                     currencies.addAll(countryCurrencies);
    793                 }
    794             }
    795             checkForItems(item, currencies, CLDRFile.CURRENCY_NAME, missing, failureCount, null);
    796             checkForItems(item, currencies, CLDRFile.CURRENCY_SYMBOL, missing, failureCount, exemplars);
    797 
    798             // context=format and width=wide; context=stand-alone & width=abbreviated
    799             Set<String> months = new TreeSet<String>();
    800             for (int i = 1; i <= 12; ++i)
    801                 months.add(i + "");
    802             Set<String> days = new TreeSet<String>(Arrays.asList(new String[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" }));
    803             for (int i = -7; i < 0; ++i) {
    804                 checkForItems(item, (i < -4 ? months : days), i, missing, failureCount, null);
    805             }
    806 
    807             String filename = "missing_" + locale + ".xml";
    808             if (failureCount[0] > 0 || warningCount[0] > 0) {
    809                 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "missing/", filename);
    810                 missing.write(out);
    811                 out.close();
    812                 // String s = getIDAndLocalization(missing);
    813                 String message = "missing localizations, creating file"
    814                     + new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).getCanonicalPath();
    815                 if (failureCount[0] > 0)
    816                     warnln(getLocaleAndName(locale) + "\t" + message);
    817                 else
    818                     logln(getLocaleAndName(locale) + "\tpossibly " + message);
    819             } else {
    820                 new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).delete();
    821             }
    822         }
    823     }
    824 
    825     /**
    826      * Internal
    827      */
    828     private String getDateKey(String monthOrDay, String width, String code) {
    829         // String context = width.equals("narrow") ? "format" : "stand-alone";
    830         return "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/"
    831             + monthOrDay + "s/" + monthOrDay + "Context[@type=\"format\"]/"
    832             + monthOrDay + "Width[@type=\"" + width + "\"]/" + monthOrDay
    833             + "[@type=\"" + code + "\"]";
    834     }
    835 
    836     /**
    837      * Internal
    838      */
    839     private String getDateKey(int type, String code) {
    840         // type is 6..4 for months abbrev..narrow, 3..0 for days short..narrow
    841         int monthOrDayType = 0, widthType = type;
    842         if (type >= 4) {
    843             monthOrDayType = 1;
    844             widthType -= 4;
    845         }
    846         return getDateKey(MONTHORDAYS[monthOrDayType], WIDTHS[widthType], code);
    847     }
    848 
    849     /**
    850      * @param item
    851      * @param codes
    852      * @param missing
    853      * @param exemplarTest
    854      *            TODO
    855      *            TODO
    856      */
    857     private void checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[],
    858         UnicodeSet exemplarTest) {
    859         // check codes
    860         for (Iterator<String> it2 = codes.iterator(); it2.hasNext();) {
    861             String code = it2.next();
    862             String key;
    863             if (type >= 0) {
    864                 key = CLDRFile.getKey(type, code);
    865             } else {
    866                 key = getDateKey(-type - 1, code);
    867             }
    868             String v = item.getStringValue(key);
    869             String rootValue = resolvedRoot.getStringValue(key);
    870             if (v == null || v.equals(rootValue) && (exemplarTest == null || !exemplarTest.containsAll(rootValue))) {
    871                 String englishValue = resolvedEnglish.getStringValue(key);
    872                 String transValue;
    873                 if (englishValue != null) {
    874                     transValue = englishValue;
    875                 } else {
    876                     transValue = code;
    877                 }
    878                 missing.add(key, "TODO " + transValue);
    879                 failureCount[0]++;
    880             } else {
    881                 logln("\t" + code + "\t" + v);
    882             }
    883         }
    884     }
    885 
    886     /*
    887      * void showTestStr() {
    888      * LocaleIDParser lparser = new LocaleIDParser();
    889      * Collection s = split(teststr,',', true, new ArrayList());
    890      * for (Iterator it = s.iterator(); it.hasNext();) {
    891      * String item = (String)it.next();
    892      * lparser.set(item.replace('?', '_'));
    893      * String region = lparser.getRegion();
    894      * System.out.print(item.replace('?', '-') + " (" + getLocalization(region) + "), ");
    895      * //System.out.print(getLocalization(region) + ", ");
    896      * }
    897      * }
    898      * static String teststr =
    899      * "en?AG, en?AI, en?AS, en?AU, en?IN, en?BB, en?BE, en?BM, en?BN, en?BS, en?BW, en?BZ, en?CA, en?CK, en?CM, en?DM, en?ER, en?ET, en?FJ, en?FK, en?FM, en?GB, en?GD, en?GH, en?GI, en?GM, en?GU, en?GY, en?HK, en?IE, en?IL, en?IO, en?JM, en?KE, en?KI, en?KN, en?KY, en?LC, en?LR, en?LS, en?MH, en?MP, en?MS, en?MT, en?MU, en?MW, en?NA, en?NF, en?NG, en?NR, en?NU, en?NZ, en?PG, en?PH, en?PK, en?PN, en?PR, en?PW, en?RW, en?SB, en?SC, en?SG, en?SH, en?SL, en?SO, en?SZ, en?TC, en?TK, en?TO, en?TT, en?UG, en?UM, en?US, en?VC, en?VG, en?VI, en?VU, en?WS, en?ZA, en?ZM, en?ZW"
    900      * ;
    901      */
    902 
    903     CldrUtility.CollectionTransform EnglishName = new CldrUtility.CollectionTransform() {
    904         public Object transform(Object source) {
    905             // TODO Auto-generated method stub
    906             return getLocalization(source.toString()) + " (" + source + ")";
    907         }
    908     };
    909 
    910     CldrUtility.CollectionTransform EnglishCurrencyName = new CldrUtility.CollectionTransform() {
    911         public Object transform(Object source) {
    912             if (english == null) english = cldrFactory.make("en", true);
    913             return english.getName("currency", source.toString()) + " (" + source + ")";
    914         }
    915     };
    916 
    917     /**
    918      * Tests that the supplemental data is well-formed.
    919      *
    920      */
    921     public void TestSupplementalData() {
    922         Map<String, Set<String>> language_scripts = new TreeMap<String, Set<String>>();
    923         Map<String, Set<String>> language_territories = new TreeMap<String, Set<String>>();
    924         Map<String, Set<String>> groups = new TreeMap<String, Set<String>>();
    925         Map<String, Set<String>> territory_currencies = new TreeMap<String, Set<String>>();
    926         Map<String, Map<String, String>> aliases = new TreeMap<String, Map<String, String>>();
    927         getSupplementalData(language_scripts, language_territories, groups, territory_currencies, aliases);
    928         Set<String> sTerritories = new TreeSet<String>();
    929         for (Iterator<Set<String>> it = language_territories.values().iterator(); it.hasNext();) {
    930             sTerritories.addAll(it.next());
    931         }
    932         StandardCodes sc = StandardCodes.make();
    933         Set<String> fullTerritories = sc.getAvailableCodes("territory");
    934         Set<String> fullLanguages = sc.getAvailableCodes("language");
    935 
    936         Set<String> allLanguages = new TreeSet<String>(language_scripts.keySet());
    937         allLanguages.addAll(language_territories.keySet());
    938         for (Iterator<String> it = allLanguages.iterator(); it.hasNext();) {
    939             Object language = it.next();
    940             Set<String> scripts = language_scripts.get(language);
    941             Set<String> territories = language_territories.get(language);
    942             logln(EnglishName.transform(language)
    943                 + " scripts: " + EnglishName.transform(scripts)
    944                 + " territories: " + EnglishName.transform(territories));
    945         }
    946 
    947         Map<String, String> changedLanguage = new TreeMap<String, String>();
    948         for (Iterator<String> it = fullLanguages.iterator(); it.hasNext();) {
    949             String code = it.next();
    950             List<String> data = sc.getFullData("language", code);
    951             if (data.size() < 3) {
    952                 System.out.println("data problem: " + data);
    953                 continue;
    954             }
    955             String replacement = data.get(2);
    956             if (!replacement.equals("")) {
    957                 if (!replacement.equals("--")) changedLanguage.put(code, replacement);
    958                 continue;
    959             }
    960         }
    961 
    962         // remove private use, deprecated, groups
    963         Set<String> standardTerritories = new TreeSet<String>();
    964         Map<String, String> changedTerritory = new TreeMap<String, String>();
    965         for (Iterator<String> it = fullTerritories.iterator(); it.hasNext();) {
    966             String code = it.next();
    967             if (code.equals("200")) continue; // || code.equals("YU") || code.equals("PZ")
    968             List<String> data = sc.getFullData("territory", code);
    969             if (data.get(0).equals("PRIVATE USE")) continue;
    970             if (!data.get(2).equals("")) {
    971                 if (!data.get(2).equals("--")) changedTerritory.put(code, data.get(2));
    972                 continue;
    973             }
    974             standardTerritories.add(code);
    975         }
    976         standardTerritories.removeAll(groups.keySet());
    977 
    978         if (!standardTerritories.containsAll(sTerritories)) {
    979             TreeSet<String> extras = new TreeSet<String>(sTerritories);
    980             extras.removeAll(standardTerritories);
    981             errln("Supplemental Language Territories contain illegal values: " + EnglishName.transform(extras));
    982         }
    983         if (!sTerritories.containsAll(standardTerritories)) {
    984             TreeSet<String> extras = new TreeSet<String>(standardTerritories);
    985             extras.removeAll(sTerritories);
    986             warnln("Missing Language Territories: " + EnglishName.transform(extras));
    987         }
    988 
    989         // now test currencies
    990         logln("Check that no illegal territories are used");
    991         if (!standardTerritories.containsAll(territory_currencies.keySet())) {
    992             TreeSet<String> extras = new TreeSet<String>(territory_currencies.keySet());
    993             extras.removeAll(fullTerritories);
    994             if (extras.size() != 0) errln("Currency info -- Illegal Territories: " + EnglishName.transform(extras));
    995             extras = new TreeSet<String>(territory_currencies.keySet());
    996             extras.retainAll(fullTerritories);
    997             extras.removeAll(standardTerritories);
    998             if (extras.size() != 0) warnln("Currency info -- Archaic Territories: " + EnglishName.transform(extras));
    999         }
   1000         logln("Check that no territories are missing");
   1001         if (!territory_currencies.keySet().containsAll(standardTerritories)) {
   1002             TreeSet<String> extras = new TreeSet<String>(standardTerritories);
   1003             extras.removeAll(territory_currencies.keySet());
   1004             errln("Currency info -- Missing Territories: " + EnglishName.transform(extras));
   1005         }
   1006         Set<String> currencies = new TreeSet<String>();
   1007         for (Iterator<Set<String>> it = territory_currencies.values().iterator(); it.hasNext();) {
   1008             currencies.addAll(it.next());
   1009         }
   1010         logln("Check that no illegal currencies are used");
   1011         Set<String> legalCurrencies = new TreeSet<String>(sc.getAvailableCodes("currency"));
   1012         // first remove non-ISO
   1013         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) {
   1014             String code = it.next();
   1015             List<String> data = sc.getFullData("currency", code);
   1016             if ("X".equals(data.get(3))) it.remove();
   1017         }
   1018         if (!legalCurrencies.containsAll(currencies)) {
   1019             TreeSet<String> extras = new TreeSet<String>(currencies);
   1020             extras.removeAll(legalCurrencies);
   1021             errln("Currency info -- Illegal Currencies: " + EnglishCurrencyName.transform(extras));
   1022         }
   1023         logln("Check that there are no missing currencies");
   1024         if (!currencies.containsAll(legalCurrencies)) {
   1025             TreeSet<String> extras = new TreeSet<String>(legalCurrencies);
   1026             extras.removeAll(currencies);
   1027             Map<String, Set<String>> failures = new TreeMap<String, Set<String>>();
   1028             for (Iterator<String> it = extras.iterator(); it.hasNext();) {
   1029                 String code = it.next();
   1030                 List<String> data = sc.getFullData("currency", code);
   1031                 if (data.get(1).equals("ZZ")) continue;
   1032                 String type = data.get(3) + "/" + data.get(1);
   1033                 Set<String> s = failures.get(type);
   1034                 if (s == null) failures.put(type, s = new TreeSet<String>());
   1035                 s.add(code);
   1036             }
   1037             for (Iterator<String> it = failures.keySet().iterator(); it.hasNext();) {
   1038                 String type = it.next();
   1039                 Set<String> s = failures.get(type);
   1040                 warnln("Currency info -- Missing Currencies: " + type + "\t \u2192 " + EnglishCurrencyName.transform(s));
   1041             }
   1042         }
   1043         logln("Missing English currency names");
   1044         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) {
   1045             String currency = it.next();
   1046             String name = english.getName("currency", currency);
   1047             if (name == null) {
   1048                 String standardName = sc.getFullData("currency", currency).get(0);
   1049                 logln("\t\t\t<currency type=\"" + currency + "\">");
   1050                 logln("\t\t\t\t<displayName>" + standardName + "</displayName>");
   1051                 logln("\t\t\t</currency>");
   1052             }
   1053         }
   1054         logln("Check Aliases");
   1055         for (Iterator<String> it = aliases.keySet().iterator(); it.hasNext();) {
   1056             // the first part of the mapping had better not be in the standardTerritories
   1057             String key = it.next();
   1058             Map<String, String> submap = aliases.get(key);
   1059             if (key.equals("territoryAlias")) {
   1060                 checkEqual(key, submap, changedTerritory);
   1061             } else if (key.equals("languageAlias")) {
   1062                 for (Iterator<String> it2 = submap.keySet().iterator(); it2.hasNext();) {
   1063                     String k = it2.next();
   1064                     String value = submap.get(k);
   1065                     if (value.indexOf("_") >= 0) it2.remove();
   1066                 }
   1067                 checkEqual(key, submap, changedLanguage);
   1068             }
   1069         }
   1070     }
   1071 
   1072     /**
   1073      *
   1074      */
   1075     private void checkEqual(String title, Map map1, Map map2) {
   1076         Set foo = new TreeSet(map1.keySet());
   1077         foo.removeAll(map2.keySet());
   1078         if (!foo.isEmpty()) errln("Extraneous Aliases: " + title + "\t" + foo);
   1079         foo = new TreeSet(map2.keySet());
   1080         foo.removeAll(map1.keySet());
   1081         if (!foo.isEmpty()) errln("Missing Aliases: " + title + "\t" + foo);
   1082         foo = map2.keySet();
   1083         foo.retainAll(map1.keySet());
   1084         for (Iterator it = foo.iterator(); it.hasNext();) {
   1085             Object key = it.next();
   1086             Object result1 = map1.get(key);
   1087             Object result2 = map2.get(key);
   1088             if (!result1.equals(result2))
   1089                 errln("Missing Aliases: " + title + "\t" + key + "\t" + result1 + " != " + result2);
   1090         }
   1091     }
   1092 
   1093     /**
   1094      * Test that the zone ids are well-formed.
   1095      *
   1096      */
   1097     public void TestZones() {
   1098         StandardCodes sc = StandardCodes.make();
   1099 
   1100         Map<String, String> defaultNames = new TreeMap();
   1101         Map<String, String> old_new = sc.getZoneLinkold_new();
   1102         Set<String> core = sc.getZoneData().keySet();
   1103         logln("Checking for collisions with last field");
   1104         for (Iterator<String> it = core.iterator(); it.hasNext();) {
   1105             String currentItem = it.next();
   1106             String defaultName = TimezoneFormatter.getFallbackName(currentItem);
   1107             String fullName = defaultNames.get(defaultName);
   1108             if (fullName == null)
   1109                 defaultNames.put(defaultName, currentItem);
   1110             else {
   1111                 errln("Collision between: " + currentItem + " AND " + fullName);
   1112             }
   1113         }
   1114 
   1115         logln("Checking that all links are TO canonical zones");
   1116         Set<String> s = new TreeSet<String>(old_new.values());
   1117         s.removeAll(core);
   1118         if (s.size() != 0) {
   1119             errln("Links go TO zones that are not canonical! " + s);
   1120         }
   1121 
   1122         logln("Checking that no links are FROM canonical zones");
   1123         s = new TreeSet<String>(core);
   1124         s.retainAll(old_new.keySet());
   1125         if (s.size() != 0) {
   1126             errln("Links go FROM zones that are canonical! " + s);
   1127         }
   1128 
   1129         logln("Checking that the zones with rule data are all canonical");
   1130         Set<String> zonesWithRules = sc.getZone_rules().keySet();
   1131         s.clear();
   1132         s.addAll(zonesWithRules);
   1133         s.removeAll(core);
   1134         if (s.size() != 0) logln("Zones with rules that are not canonical: " + s);
   1135 
   1136         logln("Checking that the rule data are all canonical");
   1137         s.clear();
   1138         s.addAll(core);
   1139         s.removeAll(zonesWithRules);
   1140         s.removeAll(old_new.keySet());
   1141         if (s.size() != 0) logln("Canonical zones that don't have rules or links: " + s);
   1142 
   1143         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
   1144             String oldItem = it.next();
   1145             logln("old: " + oldItem + "\tnew: " + old_new.get(oldItem));
   1146         }
   1147         Map<String, Set<String>> new_old = new TreeMap<String, Set<String>>();
   1148         for (Iterator<String> it = core.iterator(); it.hasNext();) {
   1149             new_old.put(it.next(), new TreeSet<String>());
   1150         }
   1151         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
   1152             String oldItem = it.next();
   1153             String newItem = old_new.get(oldItem);
   1154             Set<String> oldItems = new_old.get(newItem);
   1155             if (oldItems == null) { // try recursing
   1156                 logln("!!!!Skipping " + oldItem + " \u2192 " + newItem);
   1157                 continue;
   1158                 // new_old.put(oldOne, oldItems = new TreeSet());
   1159             }
   1160             oldItems.add(oldItem);
   1161         }
   1162         for (Iterator<String> it = new_old.keySet().iterator(); it.hasNext();) {
   1163             String newOne = it.next();
   1164             Set<String> oldItems = new_old.get(newOne);
   1165             logln(newOne + "\t" + oldItems);
   1166         }
   1167     }
   1168 
   1169     public void TestNarrowForms() {
   1170         if (disableUntilLater("TestMinimalLocalization")) return;
   1171 
   1172         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
   1173             String locale = it.next();
   1174             logln("Testing: " + getLocaleAndName(locale));
   1175             BreakIterator bi = BreakIterator.getCharacterInstance(new ULocale(locale));
   1176             CLDRFile item = cldrFactory.make(locale, false);
   1177             // Walk through all the xpaths, adding to currentValues
   1178             // Whenever two values for the same xpath are different, we remove from currentValues, and add to okValues
   1179             for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
   1180                 String xpath = it2.next();
   1181                 if (xpath.indexOf("[@type=\"narrow\"]") >= 0) {
   1182                     String value = item.getStringValue(xpath);
   1183                     // logln("\tTesting: " + value + "\t path: " + xpath);
   1184                     int end = getXGraphemeClusterBoundary(bi, value, 0);
   1185                     if (end == value.length()) continue;
   1186                     errln(getLocaleAndName(locale) + "\tillegal narrow value " + value + "\t path: " + xpath);
   1187                     surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is too wide for a \"narrow\" value.");
   1188                 }
   1189             }
   1190         }
   1191     }
   1192 
   1193     static final UnicodeSet XGRAPHEME = new UnicodeSet("[[:mark:][:grapheme_extend:]]");
   1194     static final UnicodeSet DIGIT = new UnicodeSet("[:decimal_number:]");
   1195 
   1196     private int getXGraphemeClusterBoundary(BreakIterator bi, String value, int start) {
   1197         if (value.length() <= 1) return 1;
   1198 
   1199         bi.setText(value);
   1200         if (start != 0) bi.preceding(start + 1); // backup one
   1201         int current = bi.next();
   1202         int cp = 0;
   1203         // link any digits
   1204         if (DIGIT.contains(UTF16.charAt(value, current - 1))) {
   1205             current = DIGIT.findIn(value, current, true);
   1206         }
   1207         // continue collecting any additional characters that are M or grapheme extend
   1208         return XGRAPHEME.findIn(value, current, true);
   1209     }
   1210 }
   1211 
   1212 /*
   1213  * private static final int
   1214  * HELP1 = 0,
   1215  * HELP2 = 1,
   1216  * SOURCEDIR = 2,
   1217  * DESTDIR = 3,
   1218  * MATCH = 4,
   1219  * SKIP = 5,
   1220  * TZADIR = 6,
   1221  * NONVALIDATING = 7,
   1222  * SHOW_DTD = 8,
   1223  * TRANSLIT = 9;
   1224  * options[SOURCEDIR].value
   1225  *
   1226  * private static final UOption[] options = {
   1227  * UOption.HELP_H(),
   1228  * UOption.HELP_QUESTION_MARK(),
   1229  * UOption.SOURCEDIR().setDefault("C:\\ICU4C\\locale\\common\\main\\"),
   1230  * UOption.DESTDIR().setDefault("C:\\DATA\\GEN\\cldr\\mainCheck\\"),
   1231  * UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
   1232  * UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"),
   1233  * UOption.create("tzadir", 't',
   1234  * UOption.REQUIRES_ARG).setDefault("C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
   1235  * UOption.create("nonvalidating", 'n', UOption.NO_ARG),
   1236  * UOption.create("dtd", 'w', UOption.NO_ARG),
   1237  * UOption.create("transliterate", 'y', UOption.NO_ARG), };
   1238  *
   1239  * private static String timeZoneAliasDir = null;
   1240  * /
   1241  *
   1242  * public static void main(String[] args) throws SAXException, IOException {
   1243  * UOption.parseArgs(args, options);
   1244  * localeList = getMatchingXMLFiles(options[SOURCEDIR].value, options[MATCH].value);
   1245  * /*
   1246  * log = FileUtilities.openUTF8Writer(options[DESTDIR].value, "log.txt");
   1247  * try {
   1248  * for (Iterator it = getMatchingXMLFiles(options[SOURCEDIR].value, options[MATCH].value).iterator(); it.hasNext();) {
   1249  * String name = (String) it.next();
   1250  * for (int i = 0; i <= 1; ++i) {
   1251  * boolean resolved = i == 1;
   1252  * CLDRKey key = make(name, resolved);
   1253  *
   1254  * PrintWriter pw = FileUtilities.openUTF8Writer(options[DESTDIR].value, name + (resolved ? "_r" : "") + ".txt");
   1255  * write(pw, key);
   1256  * pw.close();
   1257  *
   1258  * }
   1259  * }
   1260  * } finally {
   1261  * log.close();
   1262  * System.out.println("Done");
   1263  * }
   1264  *
   1265  *
   1266  * <language type="in">Indonesian</language>
   1267  * <language type="iw">Hebrew</language>
   1268  * <script type="Bali">Balinese</script>
   1269  * <script type="Batk">Batak</script>
   1270  * <script type="Blis">Blissymbols</script>
   1271  * <script type="Brah">Brahmi</script>
   1272  * <script type="Bugi">Buginese</script>
   1273  * <script type="Cham">Cham</script>
   1274  * <script type="Cirt">Cirth</script>
   1275  * <script type="Cyrs">Cyrillic (Old Church Slavonic variant)</script>
   1276  * <script type="Egyd">Egyptian demotic</script>
   1277  * <script type="Egyh">Egyptian hieratic</script>
   1278  * <script type="Egyp">Egyptian hieroglyphs</script>
   1279  * <script type="Glag">Glagolitic</script>
   1280  * <script type="Hmng">Pahawh Hmong</script>
   1281  * <script type="Hung">Old Hungarian</script>
   1282  * <script type="Inds">Indus (Harappan)</script>
   1283  * <script type="Java">Javanese</script>
   1284  * <script type="Kali">Kayah Li</script>
   1285  * <script type="Khar">Kharoshthi</script>
   1286  * <script type="Latf">Latin (Fraktur variant)</script>
   1287  * <script type="Latg">Latin (Gaelic variant)</script>
   1288  * <script type="Lepc">Lepcha (Rong)</script>
   1289  * <script type="Lina">Linear A</script>
   1290  * <script type="Mand">Mandaean</script>
   1291  * <script type="Maya">Mayan hieroglyphs</script>
   1292  * <script type="Mero">Meroitic</script>
   1293  * <script type="Orkh">Orkhon</script>
   1294  * <script type="Perm">Old Permic</script>
   1295  * <script type="Phag">Phags-pa</script>
   1296  * <script type="Phnx">Phoenician</script>
   1297  * <script type="Plrd">Pollard Phonetic</script>
   1298  * <script type="Roro">Rongorongo</script>
   1299  * <script type="Sara">Sarati</script>
   1300  * <script type="Sylo">Syloti Nagri</script>
   1301  * <script type="Syre">Syriac (Estrangelo variant)</script>
   1302  * <script type="Syrj">Syriac (Western variant)</script>
   1303  * <script type="Syrn">Syriac (Eastern variant)</script>
   1304  * <script type="Talu">Tai Lue</script>
   1305  * <script type="Teng">Tengwar</script>
   1306  * <script type="Tfng">Tifinagh (Berber)</script>
   1307  * <script type="Thai">Thai</script>
   1308  * <script type="Vaii">Vai</script>
   1309  * <script type="Visp">Visible Speech</script>
   1310  * <script type="Xpeo">Old Persian</script>
   1311  * <script type="Xsux">Cuneiform, Sumero-Akkadian</script>
   1312  * <script type="Zxxx">Code for unwritten languages</script>
   1313  * <script type="Zzzz">Code for uncoded script</script>
   1314  * <territory type="001">World</territory>
   1315  * <territory type="002">Africa</territory>
   1316  * <territory type="003">North America</territory>
   1317  * <territory type="005">South America</territory>
   1318  * <territory type="009">Oceania</territory>
   1319  * <territory type="011">Western Africa</territory>
   1320  * <territory type="013">Central America</territory>
   1321  * <territory type="014">Eastern Africa</territory>
   1322  * <territory type="015">Northern Africa</territory>
   1323  * <territory type="017">Middle Africa</territory>
   1324  * <territory type="018">Southern Africa</territory>
   1325  * <territory type="019">Americas</territory>
   1326  * <territory type="021">Northern America</territory>
   1327  * <territory type="029">Caribbean</territory>
   1328  * <territory type="030">Eastern Asia</territory>
   1329  * <territory type="035">South-eastern Asia</territory>
   1330  * <territory type="039">Southern Europe</territory>
   1331  * <territory type="053">Australia and New Zealand</territory>
   1332  * <territory type="054">Melanesia</territory>
   1333  * <territory type="057">Micronesia</territory>
   1334  * <territory type="061">Polynesia</territory>
   1335  * <territory type="062">South-central Asia</territory>
   1336  * <territory type="AX">Aland Islands</territory>
   1337  * <territory type="BQ">British Antarctic Territory</territory>
   1338  * <territory type="BU">Myanmar</territory>
   1339  * <territory type="CS">Czechoslovakia</territory>
   1340  * <territory type="CT">Canton and Enderbury Islands</territory>
   1341  * <territory type="DD">East Germany</territory>
   1342  * <territory type="DY">Benin</territory>
   1343  * <territory type="FQ">French Southern and Antarctic Territories</territory>
   1344  * <territory type="FX">Metropolitan France</territory>
   1345  * <territory type="HV">Burkina Faso</territory>
   1346  * <territory type="JT">Johnston Island</territory>
   1347  * <territory type="MI">Midway Islands</territory>
   1348  * <territory type="NH">Vanuatu</territory>
   1349  * <territory type="NQ">Dronning Maud Land</territory>
   1350  * <territory type="NT">Neutral Zone</territory>
   1351  * <territory type="PC">Pacific Islands Trust Territory</territory>
   1352  * <territory type="PU">U.S. Miscellaneous Pacific Islands</territory>
   1353  * <territory type="PZ">Panama Canal Zone</territory>
   1354  * <territory type="RH">Zimbabwe</territory>
   1355  * <territory type="SU">Union of Soviet Socialist Republics</territory>
   1356  * <territory type="TP">Timor-Leste</territory>
   1357  * <territory type="VD">North Vietnam</territory>
   1358  * <territory type="WK">Wake Island</territory>
   1359  * <territory type="YD">People's Democratic Republic of Yemen</territory>
   1360  * <territory type="ZR">Congo, The Democratic Republic of the</territory>
   1361  * <variant type="1901">Traditional German orthography</variant>
   1362  * <variant type="1996">German orthography of 1996</variant>
   1363  * <variant type="boont">Boontling</variant>
   1364  * <variant type="gaulish">Gaulish</variant>
   1365  * <variant type="guoyu">Mandarin or Standard Chinese</variant>
   1366  * <variant type="hakka">Hakka</variant>
   1367  * <variant type="lojban">Lojban</variant>
   1368  * <variant type="nedis">Natisone dialect</variant>
   1369  * <variant type="rozaj">Resian</variant>
   1370  * <variant type="scouse">Scouse</variant>
   1371  * <variant type="xiang">Xiang or Hunanese</variant>
   1372  *
   1373  *
   1374  * <currency type="CFP"><displayName>???</displayName></currency>
   1375  * <currency type="DDR"><displayName>???</displayName></currency>
   1376  * <currency type="EQE"><displayName>???</displayName></currency>
   1377  * <currency type="ESA"><displayName>???</displayName></currency>
   1378  * <currency type="ESB"><displayName>???</displayName></currency>
   1379  * <currency type="JAN"><displayName>???</displayName></currency>
   1380  * <currency type="LSM"><displayName>???</displayName></currency>
   1381  * <currency type="LUC"><displayName>???</displayName></currency>
   1382  * <currency type="LUL"><displayName>???</displayName></currency>
   1383  * <currency type="NAM"><displayName>???</displayName></currency>
   1384  * <currency type="NEW"><displayName>???</displayName></currency>
   1385  * <currency type="RHD"><displayName>???</displayName></currency>
   1386  * <currency type="SAN"><displayName>???</displayName></currency>
   1387  * <currency type="SDR"><displayName>???</displayName></currency>
   1388  * <currency type="SEE"><displayName>???</displayName></currency>
   1389  * <currency type="SRI"><displayName>???</displayName></currency>
   1390  * <currency type="UAE"><displayName>???</displayName></currency>
   1391  * <currency type="UDI"><displayName>???</displayName></currency>
   1392  * <currency type="UIC"><displayName>???</displayName></currency>
   1393  * <currency type="XAG"><displayName>???</displayName></currency>
   1394  * <currency type="XPD"><displayName>???</displayName></currency>
   1395  * <currency type="XPT"><displayName>???</displayName></currency>
   1396  * <currency type="XRE"><displayName>???</displayName></currency>
   1397  * <currency type="XTS"><displayName>???</displayName></currency>
   1398  * <currency type="XXX"><displayName>???</displayName></currency>
   1399  */
   1400