1 package org.unicode.cldr.unittest; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.util.Arrays; 6 import java.util.EnumSet; 7 import java.util.HashSet; 8 import java.util.Iterator; 9 import java.util.LinkedHashSet; 10 import java.util.Map; 11 import java.util.Set; 12 13 import javax.xml.xpath.XPathException; 14 15 import org.unicode.cldr.test.ExampleGenerator; 16 import org.unicode.cldr.util.AttributeValueValidity; 17 import org.unicode.cldr.util.AttributeValueValidity.MatcherPattern; 18 import org.unicode.cldr.util.CLDRConfig; 19 import org.unicode.cldr.util.CLDRFile; 20 import org.unicode.cldr.util.CLDRPaths; 21 import org.unicode.cldr.util.Iso639Data; 22 import org.unicode.cldr.util.Iso639Data.Scope; 23 import org.unicode.cldr.util.Iso639Data.Type; 24 import org.unicode.cldr.util.LanguageTagCanonicalizer; 25 import org.unicode.cldr.util.LanguageTagParser; 26 import org.unicode.cldr.util.SimpleXMLSource; 27 import org.unicode.cldr.util.StandardCodes.CodeType; 28 import org.unicode.cldr.util.SupplementalDataInfo; 29 import org.unicode.cldr.util.XPathExpressionParser; 30 import org.w3c.dom.Element; 31 import org.w3c.dom.Node; 32 33 public class TestLocale extends TestFmwkPlus { 34 static CLDRConfig testInfo = CLDRConfig.getInstance(); 35 36 public static void main(String[] args) { 37 new TestLocale().run(args); 38 } 39 40 static Set<Type> ALLOWED_LANGUAGE_TYPES = EnumSet.of(Type.Ancient, 41 Type.Living, Type.Constructed, Type.Historical, Type.Extinct, Type.Special); 42 static Set<Scope> ALLOWED_LANGUAGE_SCOPES = EnumSet.of(Scope.Individual, 43 Scope.Macrolanguage, Scope.Special); // , Special, Collection, PrivateUse, Unknown 44 static Set<String> ALLOWED_SCRIPTS = testInfo.getStandardCodes() 45 .getGoodAvailableCodes(CodeType.script); 46 static Set<String> ALLOWED_REGIONS = testInfo.getStandardCodes() 47 .getGoodAvailableCodes(CodeType.territory); 48 49 /** 50 * XPath expression that will find all alias tags 51 */ 52 static String XPATH_ALIAS_STRING = "//alias"; 53 54 public void TestLanguageRegions() { 55 Set<String> missingLanguageRegion = new LinkedHashSet<String>(); 56 // TODO This should be derived from metadata: https://unicode.org/cldr/trac/ticket/11224 57 Set<String> knownMultiScriptLanguages = new HashSet<String>(Arrays.asList("az", "ff", "bs", "pa", "shi", "sr", "vai", "uz", "yue", "zh")); 58 Set<String> available = testInfo.getCldrFactory().getAvailable(); 59 LanguageTagParser ltp = new LanguageTagParser(); 60 Set<String> defaultContents = testInfo.getSupplementalDataInfo() 61 .getDefaultContentLocales(); 62 for (String locale : available) { 63 String base = ltp.set(locale).getLanguage(); 64 String script = ltp.getScript(); 65 String region = ltp.getRegion(); 66 if (script.isEmpty()) { 67 continue; 68 } 69 ltp.setRegion(""); 70 String baseScript = ltp.toString(); 71 if (!knownMultiScriptLanguages.contains(base)) { 72 assertFalse("Should NOT have " + locale, 73 defaultContents.contains(baseScript)); 74 } 75 if (region.isEmpty()) { 76 continue; 77 } 78 ltp.setScript(""); 79 ltp.setRegion(region); 80 String baseRegion = ltp.toString(); 81 if (knownMultiScriptLanguages.contains(base)) { 82 continue; 83 } 84 if (!missingLanguageRegion.contains(baseRegion) 85 && !assertTrue("Should have " + baseRegion, 86 available.contains(baseRegion))) { 87 missingLanguageRegion.add(baseRegion); 88 } 89 } 90 } 91 92 /** 93 * Determine whether the file should be checked for aliases; this is 94 * currently not done for Keyboard definitions or DTD's 95 * 96 * @param f 97 * the file to check 98 * @return 99 */ 100 protected boolean shouldCheckForAliases(File f) { 101 if (!f.canRead()) { 102 return false; 103 } 104 String absPath = f.getAbsolutePath(); 105 return absPath.endsWith("xml") && !absPath.contains("dtd") 106 && !absPath.contains("keyboard") 107 && !absPath.contains("Keyboard"); 108 } 109 110 /** 111 * Check a single file for aliases, on a content level, the only check that 112 * is done is that the one for readability. 113 * 114 * @param localeName 115 * - the localename 116 * @param file 117 * - the file to check 118 * @param localesWithAliases 119 * - a set of locale strings the files of which contain aliases 120 */ 121 private void checkForAliases(final String localeName, File file, 122 final Set<String> localesWithAliases) { 123 try { 124 if (file.canRead()) { 125 XPathExpressionParser parser = new XPathExpressionParser(file); 126 parser.iterateThroughNodeSet(XPATH_ALIAS_STRING, 127 new XPathExpressionParser.NodeHandlingInterface() { 128 129 // Handle gets called for every node of the node set 130 @Override 131 public void handle(Node result) { 132 if (result instanceof Element) { 133 Element el = (Element) result; 134 // this node likely has an attribute source 135 if (el.hasAttributes()) { 136 String sourceAttr = el 137 .getAttribute("source"); 138 if (sourceAttr != null 139 && !sourceAttr.isEmpty()) { 140 localesWithAliases.add(localeName); 141 } 142 } 143 } 144 } 145 }); 146 } 147 } catch (IOException e) { 148 // TODO Auto-generated catch block 149 e.printStackTrace(); 150 } catch (XPathException e) { 151 // TODO Auto-generated catch block 152 e.printStackTrace(); 153 } 154 } 155 156 /** 157 * Tests the validity of the file names and of the English localeDisplayName 158 * types. Also tests for aliases outside root 159 */ 160 public void TestLocalePartsValidity() { 161 LanguageTagParser ltp = new LanguageTagParser(); 162 final Set<String> localesWithAliases = new HashSet<>(); 163 for (File file : CLDRConfig.getInstance().getAllCLDRFilesEndingWith( 164 ".xml")) { 165 String parent = file.getParent(); 166 if (parent.contains("transform") 167 || parent.contains("bcp47") 168 || parent.contains("supplemental") 169 || parent.contains("validity")) { 170 continue; 171 } 172 String localeName = file.getName(); 173 localeName = localeName.substring(0, localeName.length() - 4); // remove 174 // .xml 175 if (localeName.equals("root") || localeName.equals("_platform")) { 176 continue; 177 } 178 String fileString = file.toString(); 179 checkLocale(fileString, localeName, ltp); 180 // check for aliases 181 if (shouldCheckForAliases(file)) { 182 checkForAliases(localeName, file, localesWithAliases); 183 } 184 } 185 // we ran through all of them 186 if (!localesWithAliases.isEmpty()) { 187 StringBuilder sb = new StringBuilder(); 188 sb.append("\r\n"); 189 sb.append("The following locales have aliases, but must not: "); 190 Iterator<String> lIter = localesWithAliases.iterator(); 191 while (lIter.hasNext()) { 192 sb.append(lIter.next()); 193 sb.append(" "); 194 } 195 System.out.println(sb.toString()); 196 } 197 // now check English-resolved 198 CLDRFile english = testInfo.getEnglish(); 199 for (String xpath : english) { 200 if (!xpath.startsWith("//ldml/localeDisplayNames/")) { 201 continue; 202 } 203 switch (CLDRFile.getNameType(xpath)) { 204 case 0: 205 checkLocale("English xpath", CLDRFile.getCode(xpath), ltp); 206 break; 207 case 1: 208 checkScript("English xpath", CLDRFile.getCode(xpath)); 209 break; 210 case 2: 211 checkRegion("English xpath", CLDRFile.getCode(xpath)); 212 break; 213 } 214 } 215 } 216 217 public void checkLocale(String fileString, String localeName, 218 LanguageTagParser ltp) { 219 ltp.set(localeName); 220 checkLanguage(fileString, ltp.getLanguage()); 221 checkScript(fileString, ltp.getScript()); 222 checkRegion(fileString, ltp.getRegion()); 223 } 224 225 public void checkRegion(String file, String region) { 226 if (!region.isEmpty() && !region.equals("AN") 227 && !region.equals("XA") && !region.equals("XB")) { 228 assertRelation("Region ok? " + region + " in " + file, true, 229 ALLOWED_REGIONS, TestFmwkPlus.CONTAINS, region); 230 } 231 } 232 233 final MatcherPattern SCRIPT_NON_UNICODE = AttributeValueValidity.getMatcherPattern("$scriptNonUnicode"); 234 235 public void checkScript(String file, String script) { 236 if (!script.isEmpty()) { 237 if (!ALLOWED_SCRIPTS.contains(script) && SCRIPT_NON_UNICODE.matches(script, null)) { 238 return; 239 } 240 assertRelation("Script ok? " + script + " in " + file, true, 241 ALLOWED_SCRIPTS, TestFmwkPlus.CONTAINS, script); 242 } 243 } 244 245 public void checkLanguage(String file, String language) { 246 if (!language.equals("root")) { 247 Scope scope = Iso639Data.getScope(language); 248 if (assertRelation("Language ok? " + language + " in " + file, 249 true, ALLOWED_LANGUAGE_SCOPES, TestFmwkPlus.CONTAINS, scope)) { 250 Type type = Iso639Data.getType(language); 251 assertRelation("Language ok? " + language + " in " + file, 252 true, ALLOWED_LANGUAGE_TYPES, TestFmwkPlus.CONTAINS, 253 type); 254 } 255 } 256 } 257 258 public void TestConsistency() { 259 LanguageTagParser ltp = new LanguageTagParser(); 260 SupplementalDataInfo supplementalDataInfo = testInfo 261 .getSupplementalDataInfo(); 262 Set<String> defaultContentLocales = supplementalDataInfo 263 .getDefaultContentLocales(); 264 Map<String, String> likelySubtags = supplementalDataInfo 265 .getLikelySubtags(); 266 267 for (String locale : testInfo.getCldrFactory().getAvailable()) { 268 if (locale.equals("root")) { 269 continue; 270 } 271 ltp.set(locale); 272 boolean isDefaultContent = defaultContentLocales.contains(locale); 273 boolean hasScript = !ltp.getScript().isEmpty(); 274 boolean hasRegion = !ltp.getRegion().isEmpty(); 275 String language = ltp.getLanguage(); 276 String maximized = likelySubtags.get(language); 277 boolean hasLikelySubtag = maximized != null; 278 279 // verify that the parent locales are consistent with the default 280 // locales, for scripts 281 // that is, if zh-Hant has a parent of root, then it is not the 282 // default content locale, and vice versa 283 284 if (hasScript && !hasRegion) { 285 boolean parentIsRoot = "root".equals(supplementalDataInfo 286 .getExplicitParentLocale(locale)); 287 if (parentIsRoot == isDefaultContent) { 288 errln("Inconsistency between parentLocales and defaultContents: " 289 + locale 290 + (parentIsRoot ? " +" : " -") 291 + "parentIsRoot" 292 + (isDefaultContent ? " +" : " -") 293 + "isDefaultContent"); 294 } 295 296 // we'd better have a separate likelySubtag 297 if (parentIsRoot && !hasLikelySubtag) { 298 errln("Missing likely subtags for: " + locale + " " 299 + TestInheritance.suggestLikelySubtagFor(locale)); 300 } 301 } 302 303 // verify that likelySubtags has all the languages 304 305 if (!hasScript && !hasRegion) { 306 if (!hasLikelySubtag) { 307 errln("Missing likely subtags for: " + locale + " " 308 + TestInheritance.suggestLikelySubtagFor(locale)); 309 } 310 } 311 } 312 } 313 314 public void TestCanonicalizer() { 315 LanguageTagCanonicalizer canonicalizer = new LanguageTagCanonicalizer(); 316 String[][] tests = { { "iw", "he" }, { "no-YU", "nb_RS" }, 317 { "no", "nb" }, { "eng-833", "en_IM" }, { "mo", "ro_MD" }, 318 { "mo_Cyrl", "ro_Cyrl_MD" }, { "mo_US", "ro_US" }, 319 { "mo_Cyrl_US", "ro_Cyrl_US" }, { "sh", "sr_Latn" }, 320 { "sh_US", "sr_Latn_US" }, { "sh_Cyrl", "sr" }, 321 { "sh_Cyrl_US", "sr_US" }, { "hy_SU", "hy" }, 322 { "hy_AM", "hy" }, { "en_SU", "en_RU" }, 323 { "rO-cYrl-aQ", "ro_Cyrl_AQ" }, }; 324 for (String[] pair : tests) { 325 String actual = canonicalizer.transform(pair[0]); 326 assertEquals("Canonical", pair[1], actual); 327 } 328 } 329 330 public void TestBrackets() { 331 String[][] tests = { 332 { 333 "language", 334 "en", 335 "Anglish (abc)", 336 "en", 337 "Anglish [abc]", 338 "?Anglish [abc]? (U.S. [ghi])?Anglish [abc]? (Latine [def])?Anglish [abc]? (Latine [def], U.S. [ghi])Langue: ?Anglish (abc)?" }, 339 { 340 "script", 341 "Latn", 342 "Latine (def)", 343 "en_Latn", 344 "Anglish [abc] (Latine [def])", 345 "Anglish [abc] (?Latine [def]?)Anglish [abc] (?Latine [def]?, U.S. [ghi])Scripte: ?Latine (def)?" }, 346 { 347 "territory", 348 "US", 349 "U.S. (ghi)", 350 "en_Latn_US", 351 "Anglish [abc] (Latine [def], U.S. [ghi])", 352 "Anglish [abc] (?U.S. [ghi]?)Anglish [abc] (Latine [def], ?U.S. [ghi]?)Territorie: ?U.S. (ghi)?" }, 353 { null, null, null, "en_US", "Anglish [abc] (U.S. [ghi])", null }, 354 { "variant", "FOOBAR", "foo (jkl)", "en_foobar", "Anglish [abc] (foo [jkl])", null }, 355 { "key", "co", "sort (mno)", "en_foobar@co=FOO", "Anglish [abc] (foo [jkl], sort [mno]=foo)", null }, 356 { "key|type", "co|fii", "sortfii (mno)", "en_foobar@co=FII", "Anglish [abc] (foo [jkl], sortfii [mno])", null }, }; 357 // load up a dummy source 358 SimpleXMLSource dxs = new SimpleXMLSource("xx"); 359 for (String[] row : tests) { 360 if (row[0] == null) { 361 continue; 362 } 363 int typeCode = CLDRFile.typeNameToCode(row[0]); 364 String path = CLDRFile.getKey(typeCode, row[1]); 365 dxs.putValueAtDPath(path, row[2]); 366 } 367 // create a cldrfile from it and test 368 SimpleXMLSource root = new SimpleXMLSource("root"); 369 root.putValueAtDPath( 370 "//ldml/localeDisplayNames/localeDisplayPattern/localePattern", 371 "{0} ({1})"); 372 root.putValueAtDPath( 373 "//ldml/localeDisplayNames/localeDisplayPattern/localeSeparator", 374 "{0}, {1}"); 375 root.putValueAtDPath( 376 "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"language\"]", 377 "Langue: {0}"); 378 root.putValueAtDPath( 379 "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"script\"]", 380 "Scripte: {0}"); 381 root.putValueAtDPath( 382 "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"territory\"]", 383 "Territorie: {0}"); 384 CLDRFile f = new CLDRFile(dxs, root); 385 ExampleGenerator eg = new ExampleGenerator(f, testInfo.getEnglish(), 386 CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 387 for (String[] row : tests) { 388 if (row[0] != null) { 389 int typeCode = CLDRFile.typeNameToCode(row[0]); 390 String standAlone = f.getName(typeCode, row[1]); 391 if (!assertEquals("stand-alone " + row[3], row[2], standAlone)) { 392 typeCode = CLDRFile.typeNameToCode(row[0]); 393 standAlone = f.getName(typeCode, row[1]); 394 } 395 ; 396 if (row[5] != null) { 397 String path = CLDRFile.getKey(typeCode, row[1]); 398 String example = eg 399 .getExampleHtml(path, "?" + row[2] + "?"); 400 assertEquals("example " + row[3], row[5], 401 ExampleGenerator.simplify(example)); 402 } 403 } 404 String displayName = f.getName(row[3], true, "{0}={1}", 405 "{0} ({1})", "{0}, {1}"); 406 assertEquals("locale " + row[3], row[4], displayName); 407 } 408 } 409 410 public void TestLocaleNamePattern() { 411 assertEquals("Locale name", "Chinese", 412 testInfo.getEnglish().getName("zh")); 413 assertEquals("Locale name", "Chinese (United States)", testInfo 414 .getEnglish().getName("zh-US")); 415 assertEquals("Locale name", "Chinese (Arabic, United States)", testInfo 416 .getEnglish().getName("zh-Arab-US")); 417 CLDRFile japanese = testInfo.getCLDRFile("ja", true); 418 assertEquals("Locale name", "", japanese.getName("zh")); 419 assertEquals("Locale name", " ()", japanese.getName("zh-US")); 420 assertEquals("Locale name", " (\u3001)", 421 japanese.getName("zh-Arab-US")); 422 } 423 424 public void TestExtendedLanguage() { 425 assertEquals("Extended language translation", "Simplified Chinese", 426 testInfo.getEnglish().getName("zh_Hans")); 427 assertEquals("Extended language translation", 428 "Simplified Chinese (Singapore)", testInfo.getEnglish() 429 .getName("zh_Hans_SG")); 430 assertEquals("Extended language translation", "American English", 431 testInfo.getEnglish().getName("en-US")); 432 assertEquals("Extended language translation", 433 "American English (Arabic)", 434 testInfo.getEnglish().getName("en-Arab-US")); 435 } 436 } 437