Home | History | Annotate | Download | only in unittest
      1 package org.unicode.cldr.unittest;
      2 
      3 import java.io.File;
      4 import java.io.IOException;
      5 import java.util.Arrays;
      6 import java.util.EnumSet;
      7 import java.util.HashSet;
      8 import java.util.Iterator;
      9 import java.util.LinkedHashSet;
     10 import java.util.Map;
     11 import java.util.Set;
     12 
     13 import javax.xml.xpath.XPathException;
     14 
     15 import org.unicode.cldr.test.ExampleGenerator;
     16 import org.unicode.cldr.util.AttributeValueValidity;
     17 import org.unicode.cldr.util.AttributeValueValidity.MatcherPattern;
     18 import org.unicode.cldr.util.CLDRConfig;
     19 import org.unicode.cldr.util.CLDRFile;
     20 import org.unicode.cldr.util.CLDRPaths;
     21 import org.unicode.cldr.util.Iso639Data;
     22 import org.unicode.cldr.util.Iso639Data.Scope;
     23 import org.unicode.cldr.util.Iso639Data.Type;
     24 import org.unicode.cldr.util.LanguageTagCanonicalizer;
     25 import org.unicode.cldr.util.LanguageTagParser;
     26 import org.unicode.cldr.util.SimpleXMLSource;
     27 import org.unicode.cldr.util.StandardCodes.CodeType;
     28 import org.unicode.cldr.util.SupplementalDataInfo;
     29 import org.unicode.cldr.util.XPathExpressionParser;
     30 import org.w3c.dom.Element;
     31 import org.w3c.dom.Node;
     32 
     33 public class TestLocale extends TestFmwkPlus {
     34     static CLDRConfig testInfo = CLDRConfig.getInstance();
     35 
     36     public static void main(String[] args) {
     37         new TestLocale().run(args);
     38     }
     39 
     40     static Set<Type> ALLOWED_LANGUAGE_TYPES = EnumSet.of(Type.Ancient,
     41         Type.Living, Type.Constructed, Type.Historical, Type.Extinct, Type.Special);
     42     static Set<Scope> ALLOWED_LANGUAGE_SCOPES = EnumSet.of(Scope.Individual,
     43         Scope.Macrolanguage, Scope.Special); // , Special, Collection, PrivateUse, Unknown
     44     static Set<String> ALLOWED_SCRIPTS = testInfo.getStandardCodes()
     45         .getGoodAvailableCodes(CodeType.script);
     46     static Set<String> ALLOWED_REGIONS = testInfo.getStandardCodes()
     47         .getGoodAvailableCodes(CodeType.territory);
     48 
     49     /**
     50      * XPath expression that will find all alias tags
     51      */
     52     static String XPATH_ALIAS_STRING = "//alias";
     53 
     54     public void TestLanguageRegions() {
     55         Set<String> missingLanguageRegion = new LinkedHashSet<String>();
     56         // TODO This should be derived from metadata: https://unicode.org/cldr/trac/ticket/11224
     57         Set<String> knownMultiScriptLanguages = new HashSet<String>(Arrays.asList("az", "ff", "bs", "pa", "shi", "sr", "vai", "uz", "yue", "zh"));
     58         Set<String> available = testInfo.getCldrFactory().getAvailable();
     59         LanguageTagParser ltp = new LanguageTagParser();
     60         Set<String> defaultContents = testInfo.getSupplementalDataInfo()
     61             .getDefaultContentLocales();
     62         for (String locale : available) {
     63             String base = ltp.set(locale).getLanguage();
     64             String script = ltp.getScript();
     65             String region = ltp.getRegion();
     66             if (script.isEmpty()) {
     67                 continue;
     68             }
     69             ltp.setRegion("");
     70             String baseScript = ltp.toString();
     71             if (!knownMultiScriptLanguages.contains(base)) {
     72                 assertFalse("Should NOT have " + locale,
     73                     defaultContents.contains(baseScript));
     74             }
     75             if (region.isEmpty()) {
     76                 continue;
     77             }
     78             ltp.setScript("");
     79             ltp.setRegion(region);
     80             String baseRegion = ltp.toString();
     81             if (knownMultiScriptLanguages.contains(base)) {
     82                 continue;
     83             }
     84             if (!missingLanguageRegion.contains(baseRegion)
     85                 && !assertTrue("Should have " + baseRegion,
     86                     available.contains(baseRegion))) {
     87                 missingLanguageRegion.add(baseRegion);
     88             }
     89         }
     90     }
     91 
     92     /**
     93      * Determine whether the file should be checked for aliases; this is
     94      * currently not done for Keyboard definitions or DTD's
     95      *
     96      * @param f
     97      *            the file to check
     98      * @return
     99      */
    100     protected boolean shouldCheckForAliases(File f) {
    101         if (!f.canRead()) {
    102             return false;
    103         }
    104         String absPath = f.getAbsolutePath();
    105         return absPath.endsWith("xml") && !absPath.contains("dtd")
    106             && !absPath.contains("keyboard")
    107             && !absPath.contains("Keyboard");
    108     }
    109 
    110     /**
    111      * Check a single file for aliases, on a content level, the only check that
    112      * is done is that the one for readability.
    113      *
    114      * @param localeName
    115      *            - the localename
    116      * @param file
    117      *            - the file to check
    118      * @param localesWithAliases
    119      *            - a set of locale strings the files of which contain aliases
    120      */
    121     private void checkForAliases(final String localeName, File file,
    122         final Set<String> localesWithAliases) {
    123         try {
    124             if (file.canRead()) {
    125                 XPathExpressionParser parser = new XPathExpressionParser(file);
    126                 parser.iterateThroughNodeSet(XPATH_ALIAS_STRING,
    127                     new XPathExpressionParser.NodeHandlingInterface() {
    128 
    129                         // Handle gets called for every node of the node set
    130                         @Override
    131                         public void handle(Node result) {
    132                             if (result instanceof Element) {
    133                                 Element el = (Element) result;
    134                                 // this node likely has an attribute source
    135                                 if (el.hasAttributes()) {
    136                                     String sourceAttr = el
    137                                         .getAttribute("source");
    138                                     if (sourceAttr != null
    139                                         && !sourceAttr.isEmpty()) {
    140                                         localesWithAliases.add(localeName);
    141                                     }
    142                                 }
    143                             }
    144                         }
    145                     });
    146             }
    147         } catch (IOException e) {
    148             // TODO Auto-generated catch block
    149             e.printStackTrace();
    150         } catch (XPathException e) {
    151             // TODO Auto-generated catch block
    152             e.printStackTrace();
    153         }
    154     }
    155 
    156     /**
    157      * Tests the validity of the file names and of the English localeDisplayName
    158      * types. Also tests for aliases outside root
    159      */
    160     public void TestLocalePartsValidity() {
    161         LanguageTagParser ltp = new LanguageTagParser();
    162         final Set<String> localesWithAliases = new HashSet<>();
    163         for (File file : CLDRConfig.getInstance().getAllCLDRFilesEndingWith(
    164             ".xml")) {
    165             String parent = file.getParent();
    166             if (parent.contains("transform")
    167                 || parent.contains("bcp47")
    168                 || parent.contains("supplemental")
    169                 || parent.contains("validity")) {
    170                 continue;
    171             }
    172             String localeName = file.getName();
    173             localeName = localeName.substring(0, localeName.length() - 4); // remove
    174             // .xml
    175             if (localeName.equals("root") || localeName.equals("_platform")) {
    176                 continue;
    177             }
    178             String fileString = file.toString();
    179             checkLocale(fileString, localeName, ltp);
    180             // check for aliases
    181             if (shouldCheckForAliases(file)) {
    182                 checkForAliases(localeName, file, localesWithAliases);
    183             }
    184         }
    185         // we ran through all of them
    186         if (!localesWithAliases.isEmpty()) {
    187             StringBuilder sb = new StringBuilder();
    188             sb.append("\r\n");
    189             sb.append("The following locales have aliases, but must not: ");
    190             Iterator<String> lIter = localesWithAliases.iterator();
    191             while (lIter.hasNext()) {
    192                 sb.append(lIter.next());
    193                 sb.append(" ");
    194             }
    195             System.out.println(sb.toString());
    196         }
    197         // now check English-resolved
    198         CLDRFile english = testInfo.getEnglish();
    199         for (String xpath : english) {
    200             if (!xpath.startsWith("//ldml/localeDisplayNames/")) {
    201                 continue;
    202             }
    203             switch (CLDRFile.getNameType(xpath)) {
    204             case 0:
    205                 checkLocale("English xpath", CLDRFile.getCode(xpath), ltp);
    206                 break;
    207             case 1:
    208                 checkScript("English xpath", CLDRFile.getCode(xpath));
    209                 break;
    210             case 2:
    211                 checkRegion("English xpath", CLDRFile.getCode(xpath));
    212                 break;
    213             }
    214         }
    215     }
    216 
    217     public void checkLocale(String fileString, String localeName,
    218         LanguageTagParser ltp) {
    219         ltp.set(localeName);
    220         checkLanguage(fileString, ltp.getLanguage());
    221         checkScript(fileString, ltp.getScript());
    222         checkRegion(fileString, ltp.getRegion());
    223     }
    224 
    225     public void checkRegion(String file, String region) {
    226         if (!region.isEmpty() && !region.equals("AN")
    227             && !region.equals("XA") && !region.equals("XB")) {
    228             assertRelation("Region ok? " + region + " in " + file, true,
    229                 ALLOWED_REGIONS, TestFmwkPlus.CONTAINS, region);
    230         }
    231     }
    232 
    233     final MatcherPattern SCRIPT_NON_UNICODE = AttributeValueValidity.getMatcherPattern("$scriptNonUnicode");
    234 
    235     public void checkScript(String file, String script) {
    236         if (!script.isEmpty()) {
    237             if (!ALLOWED_SCRIPTS.contains(script) && SCRIPT_NON_UNICODE.matches(script, null)) {
    238                 return;
    239             }
    240             assertRelation("Script ok? " + script + " in " + file, true,
    241                 ALLOWED_SCRIPTS, TestFmwkPlus.CONTAINS, script);
    242         }
    243     }
    244 
    245     public void checkLanguage(String file, String language) {
    246         if (!language.equals("root")) {
    247             Scope scope = Iso639Data.getScope(language);
    248             if (assertRelation("Language ok? " + language + " in " + file,
    249                 true, ALLOWED_LANGUAGE_SCOPES, TestFmwkPlus.CONTAINS, scope)) {
    250                 Type type = Iso639Data.getType(language);
    251                 assertRelation("Language ok? " + language + " in " + file,
    252                     true, ALLOWED_LANGUAGE_TYPES, TestFmwkPlus.CONTAINS,
    253                     type);
    254             }
    255         }
    256     }
    257 
    258     public void TestConsistency() {
    259         LanguageTagParser ltp = new LanguageTagParser();
    260         SupplementalDataInfo supplementalDataInfo = testInfo
    261             .getSupplementalDataInfo();
    262         Set<String> defaultContentLocales = supplementalDataInfo
    263             .getDefaultContentLocales();
    264         Map<String, String> likelySubtags = supplementalDataInfo
    265             .getLikelySubtags();
    266 
    267         for (String locale : testInfo.getCldrFactory().getAvailable()) {
    268             if (locale.equals("root")) {
    269                 continue;
    270             }
    271             ltp.set(locale);
    272             boolean isDefaultContent = defaultContentLocales.contains(locale);
    273             boolean hasScript = !ltp.getScript().isEmpty();
    274             boolean hasRegion = !ltp.getRegion().isEmpty();
    275             String language = ltp.getLanguage();
    276             String maximized = likelySubtags.get(language);
    277             boolean hasLikelySubtag = maximized != null;
    278 
    279             // verify that the parent locales are consistent with the default
    280             // locales, for scripts
    281             // that is, if zh-Hant has a parent of root, then it is not the
    282             // default content locale, and vice versa
    283 
    284             if (hasScript && !hasRegion) {
    285                 boolean parentIsRoot = "root".equals(supplementalDataInfo
    286                     .getExplicitParentLocale(locale));
    287                 if (parentIsRoot == isDefaultContent) {
    288                     errln("Inconsistency between parentLocales and defaultContents: "
    289                         + locale
    290                         + (parentIsRoot ? " +" : " -")
    291                         + "parentIsRoot"
    292                         + (isDefaultContent ? " +" : " -")
    293                         + "isDefaultContent");
    294                 }
    295 
    296                 // we'd better have a separate likelySubtag
    297                 if (parentIsRoot && !hasLikelySubtag) {
    298                     errln("Missing likely subtags for: " + locale + " "
    299                         + TestInheritance.suggestLikelySubtagFor(locale));
    300                 }
    301             }
    302 
    303             // verify that likelySubtags has all the languages
    304 
    305             if (!hasScript && !hasRegion) {
    306                 if (!hasLikelySubtag) {
    307                     errln("Missing likely subtags for: " + locale + " "
    308                         + TestInheritance.suggestLikelySubtagFor(locale));
    309                 }
    310             }
    311         }
    312     }
    313 
    314     public void TestCanonicalizer() {
    315         LanguageTagCanonicalizer canonicalizer = new LanguageTagCanonicalizer();
    316         String[][] tests = { { "iw", "he" }, { "no-YU", "nb_RS" },
    317             { "no", "nb" }, { "eng-833", "en_IM" }, { "mo", "ro_MD" },
    318             { "mo_Cyrl", "ro_Cyrl_MD" }, { "mo_US", "ro_US" },
    319             { "mo_Cyrl_US", "ro_Cyrl_US" }, { "sh", "sr_Latn" },
    320             { "sh_US", "sr_Latn_US" }, { "sh_Cyrl", "sr" },
    321             { "sh_Cyrl_US", "sr_US" }, { "hy_SU", "hy" },
    322             { "hy_AM", "hy" }, { "en_SU", "en_RU" },
    323             { "rO-cYrl-aQ", "ro_Cyrl_AQ" }, };
    324         for (String[] pair : tests) {
    325             String actual = canonicalizer.transform(pair[0]);
    326             assertEquals("Canonical", pair[1], actual);
    327         }
    328     }
    329 
    330     public void TestBrackets() {
    331         String[][] tests = {
    332             {
    333                 "language",
    334                 "en",
    335                 "Anglish (abc)",
    336                 "en",
    337                 "Anglish [abc]",
    338                 "?Anglish [abc]? (U.S. [ghi])?Anglish [abc]? (Latine [def])?Anglish [abc]? (Latine [def], U.S. [ghi])Langue: ?Anglish (abc)?" },
    339             {
    340                 "script",
    341                 "Latn",
    342                 "Latine (def)",
    343                 "en_Latn",
    344                 "Anglish [abc] (Latine [def])",
    345                 "Anglish [abc] (?Latine [def]?)Anglish [abc] (?Latine [def]?, U.S. [ghi])Scripte: ?Latine (def)?" },
    346             {
    347                 "territory",
    348                 "US",
    349                 "U.S. (ghi)",
    350                 "en_Latn_US",
    351                 "Anglish [abc] (Latine [def], U.S. [ghi])",
    352                 "Anglish [abc] (?U.S. [ghi]?)Anglish [abc] (Latine [def], ?U.S. [ghi]?)Territorie: ?U.S. (ghi)?" },
    353             { null, null, null, "en_US", "Anglish [abc] (U.S. [ghi])", null },
    354             { "variant", "FOOBAR", "foo (jkl)", "en_foobar", "Anglish [abc] (foo [jkl])", null },
    355             { "key", "co", "sort (mno)", "en_foobar@co=FOO", "Anglish [abc] (foo [jkl], sort [mno]=foo)", null },
    356             { "key|type", "co|fii", "sortfii (mno)", "en_foobar@co=FII", "Anglish [abc] (foo [jkl], sortfii [mno])", null }, };
    357         // load up a dummy source
    358         SimpleXMLSource dxs = new SimpleXMLSource("xx");
    359         for (String[] row : tests) {
    360             if (row[0] == null) {
    361                 continue;
    362             }
    363             int typeCode = CLDRFile.typeNameToCode(row[0]);
    364             String path = CLDRFile.getKey(typeCode, row[1]);
    365             dxs.putValueAtDPath(path, row[2]);
    366         }
    367         // create a cldrfile from it and test
    368         SimpleXMLSource root = new SimpleXMLSource("root");
    369         root.putValueAtDPath(
    370             "//ldml/localeDisplayNames/localeDisplayPattern/localePattern",
    371             "{0} ({1})");
    372         root.putValueAtDPath(
    373             "//ldml/localeDisplayNames/localeDisplayPattern/localeSeparator",
    374             "{0}, {1}");
    375         root.putValueAtDPath(
    376             "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"language\"]",
    377             "Langue: {0}");
    378         root.putValueAtDPath(
    379             "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"script\"]",
    380             "Scripte: {0}");
    381         root.putValueAtDPath(
    382             "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"territory\"]",
    383             "Territorie: {0}");
    384         CLDRFile f = new CLDRFile(dxs, root);
    385         ExampleGenerator eg = new ExampleGenerator(f, testInfo.getEnglish(),
    386             CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
    387         for (String[] row : tests) {
    388             if (row[0] != null) {
    389                 int typeCode = CLDRFile.typeNameToCode(row[0]);
    390                 String standAlone = f.getName(typeCode, row[1]);
    391                 if (!assertEquals("stand-alone " + row[3], row[2], standAlone)) {
    392                     typeCode = CLDRFile.typeNameToCode(row[0]);
    393                     standAlone = f.getName(typeCode, row[1]);
    394                 }
    395                 ;
    396                 if (row[5] != null) {
    397                     String path = CLDRFile.getKey(typeCode, row[1]);
    398                     String example = eg
    399                         .getExampleHtml(path, "?" + row[2] + "?");
    400                     assertEquals("example " + row[3], row[5],
    401                         ExampleGenerator.simplify(example));
    402                 }
    403             }
    404             String displayName = f.getName(row[3], true, "{0}={1}",
    405                 "{0} ({1})", "{0}, {1}");
    406             assertEquals("locale " + row[3], row[4], displayName);
    407         }
    408     }
    409 
    410     public void TestLocaleNamePattern() {
    411         assertEquals("Locale name", "Chinese",
    412             testInfo.getEnglish().getName("zh"));
    413         assertEquals("Locale name", "Chinese (United States)", testInfo
    414             .getEnglish().getName("zh-US"));
    415         assertEquals("Locale name", "Chinese (Arabic, United States)", testInfo
    416             .getEnglish().getName("zh-Arab-US"));
    417         CLDRFile japanese = testInfo.getCLDRFile("ja", true);
    418         assertEquals("Locale name", "", japanese.getName("zh"));
    419         assertEquals("Locale name", " ()", japanese.getName("zh-US"));
    420         assertEquals("Locale name", " (\u3001)",
    421             japanese.getName("zh-Arab-US"));
    422     }
    423 
    424     public void TestExtendedLanguage() {
    425         assertEquals("Extended language translation", "Simplified Chinese",
    426             testInfo.getEnglish().getName("zh_Hans"));
    427         assertEquals("Extended language translation",
    428             "Simplified Chinese (Singapore)", testInfo.getEnglish()
    429                 .getName("zh_Hans_SG"));
    430         assertEquals("Extended language translation", "American English",
    431             testInfo.getEnglish().getName("en-US"));
    432         assertEquals("Extended language translation",
    433             "American English (Arabic)",
    434             testInfo.getEnglish().getName("en-Arab-US"));
    435     }
    436 }
    437