1 package org.unicode.cldr.unittest; 2 3 import java.io.File; 4 import java.util.Arrays; 5 import java.util.Collections; 6 import java.util.HashSet; 7 import java.util.List; 8 import java.util.Map; 9 import java.util.Map.Entry; 10 import java.util.Set; 11 12 import org.unicode.cldr.util.CLDRPaths; 13 import org.unicode.cldr.util.CldrUtility; 14 import org.unicode.cldr.util.LanguageTagCanonicalizer; 15 import org.unicode.cldr.util.LanguageTagParser; 16 import org.unicode.cldr.util.StandardCodes.LstrType; 17 import org.unicode.cldr.util.Validity; 18 import org.unicode.cldr.util.Validity.Status; 19 20 import com.google.common.base.Splitter; 21 import com.google.common.collect.ImmutableMap; 22 import com.google.common.collect.ImmutableSet; 23 import com.ibm.icu.text.UnicodeSet; 24 25 public class TestValidity extends TestFmwkPlus { 26 27 private boolean DEBUG = false; 28 29 public static void main(String[] args) { 30 new TestValidity().run(args); 31 } 32 33 Validity validity = Validity.getInstance(); 34 35 public void TestBasicValidity() { 36 Object[][] tests = { 37 { LstrType.language, Validity.Status.regular, true, "aa", "en" }, 38 { LstrType.language, null, false, "eng" }, // null means never found under any status 39 { LstrType.language, null, false, "root" }, 40 { LstrType.language, Validity.Status.special, true, "mul" }, 41 { LstrType.language, Validity.Status.deprecated, true, "aju" }, 42 { LstrType.language, Validity.Status.private_use, true, "qaa" }, 43 { LstrType.language, Validity.Status.unknown, true, "und" }, 44 45 { LstrType.script, Validity.Status.regular, true, "Zyyy" }, 46 { LstrType.script, Validity.Status.special, true, "Zsye" }, 47 { LstrType.script, Validity.Status.regular, true, "Zyyy" }, 48 { LstrType.script, Validity.Status.unknown, true, "Zzzz" }, 49 50 { LstrType.region, Validity.Status.deprecated, true, "QU" }, 51 { LstrType.region, Validity.Status.macroregion, true, "EU" }, 52 { LstrType.region, Validity.Status.regular, true, "XK" }, 53 { LstrType.region, Validity.Status.macroregion, true, "001" }, 54 { LstrType.region, Validity.Status.private_use, true, "AA" }, 55 { LstrType.region, Validity.Status.unknown, true, "ZZ" }, 56 57 { LstrType.subdivision, Validity.Status.unknown, true, "kzzzzz" }, 58 { LstrType.subdivision, Validity.Status.regular, true, "usca" }, 59 { LstrType.subdivision, Validity.Status.deprecated, true, "albr" }, 60 61 { LstrType.currency, Validity.Status.regular, true, "USD" }, 62 { LstrType.currency, Validity.Status.unknown, true, "XXX" }, 63 { LstrType.currency, Validity.Status.deprecated, true, "ADP" }, 64 65 { LstrType.unit, Validity.Status.regular, true, "area-acre" }, 66 }; 67 for (Object[] test : tests) { 68 LstrType lstr = (LstrType) test[0]; 69 Validity.Status subtypeRaw = (Validity.Status) test[1]; 70 Boolean desired = (Boolean) test[2]; 71 for (int i = 3; i < test.length; ++i) { 72 String code = (String) test[i]; 73 List<Status> subtypes = subtypeRaw == null ? Arrays.asList(Status.values()) : Collections.singletonList(subtypeRaw); 74 for (Status subtype : subtypes) { 75 Set<String> actual = validity.getStatusToCodes(lstr).get(subtype); 76 assertRelation("Validity", desired, CldrUtility.ifNull(actual, Collections.EMPTY_SET), TestFmwkPlus.CONTAINS, code); 77 } 78 } 79 } 80 if (isVerbose()) { 81 82 for (LstrType lstrType : LstrType.values()) { 83 logln(lstrType.toString()); 84 final Map<Status, Set<String>> statusToCodes = validity.getStatusToCodes(lstrType); 85 for (Entry<Validity.Status, Set<String>> entry2 : statusToCodes.entrySet()) { 86 logln("\t" + entry2.getKey()); 87 logln("\t\t" + entry2.getValue()); 88 } 89 } 90 } 91 } 92 93 static final Set<String> ALLOWED_UNDELETIONS = ImmutableSet.of("ug331", "nlbq1", "nlbq2", "nlbq3", "no21", "no22"); 94 static final Set<String> ALLOWED_MISSING = ImmutableSet.of("root", "POSIX", "REVISED", "SAAHO"); 95 96 public void TestCompatibility() { 97 // Only run the rest in exhaustive mode, since it requires CLDR_ARCHIVE_DIRECTORY 98 if (getInclusion() <= 5) { 99 return; 100 } 101 Set<String> messages = new HashSet<>(); 102 File archive = new File(CLDRPaths.ARCHIVE_DIRECTORY); 103 for (File cldrArchive : archive.listFiles()) { 104 if (!cldrArchive.getName().startsWith("cldr-")) { 105 continue; 106 } 107 File oldValidityLocation = new File(cldrArchive, File.separator + "common" + File.separator + "validity" + File.separator); 108 if (!oldValidityLocation.exists()) { 109 logln("Skipping " + oldValidityLocation); 110 continue; 111 } 112 logln("Checking " + oldValidityLocation.toString()); 113 // final String oldValidityLocation = CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + ToolConstants.PREVIOUS_CHART_VERSION + 114 // File.separator + "common" + File.separator + "validity" + File.separator; 115 Validity oldValidity = Validity.getInstance(oldValidityLocation.toString() + File.separator); 116 117 for (LstrType type : LstrType.values()) { 118 final Map<Status, Set<String>> statusToCodes = oldValidity.getStatusToCodes(type); 119 if (statusToCodes == null) { 120 logln("validity data unavailable: " + type); 121 continue; 122 } 123 for (Entry<Status, Set<String>> e2 : statusToCodes.entrySet()) { 124 Status oldStatus = e2.getKey(); 125 for (String code : e2.getValue()) { 126 Status newStatus = getNewStatus(type, code); 127 if (oldStatus == newStatus) { 128 continue; 129 } 130 131 if (newStatus == null) { 132 if (ALLOWED_MISSING.contains(code)) { 133 continue; 134 } 135 errln(messages, type + ":" + code + ":" + oldStatus + " => " + newStatus 136 + " missing in new data"); 137 } 138 139 if (oldStatus == Status.private_use && newStatus == Status.special) { 140 logln(messages, "OK: " + type + ":" + code + " was " + oldStatus + " => " + newStatus); 141 continue; 142 } 143 if (oldStatus == Status.special && newStatus == Status.unknown) { 144 if (type == LstrType.subdivision && code.endsWith("zzzz")) { 145 continue; 146 } 147 logln(messages, "OK: " + type + ":" + code + " was " + oldStatus + " => " + newStatus); 148 continue; 149 } 150 if (oldStatus == Status.regular) { 151 if (newStatus == Status.deprecated) { 152 // logln(messages, "OK: " + type + ":" + code + " was " + oldStatus + " => " + newStatus); 153 continue; 154 } 155 errln(messages, type + ":" + code + ":" + oldStatus + " => " + newStatus 156 + " regular item changed, and didn't become deprecated"); 157 } 158 if (oldStatus == Status.deprecated) { 159 if (ALLOWED_UNDELETIONS.contains(code)) { 160 continue; 161 } 162 errln(messages, type + ":" + code + ":" + oldStatus + " => " + newStatus 163 + " // add to exception list if really un-deprecated"); 164 } else { 165 errln(messages, type + ":" + code + " was " + oldStatus + " => " + newStatus); 166 } 167 } 168 } 169 } 170 } 171 } 172 173 private void logln(Set<String> messages, String string) { 174 if (!messages.contains(string)) { 175 logln(string); 176 messages.add(string); 177 } 178 } 179 180 private void errln(Set<String> messages, String string) { 181 if (!messages.contains(string)) { 182 errln(string); 183 messages.add(string); 184 } 185 } 186 187 188 private Status getNewStatus(LstrType type, String code) { 189 Map<Status, Set<String>> info = validity.getStatusToCodes(type); 190 for (Entry<Status, Set<String>> e : info.entrySet()) { 191 if (e.getValue().contains(code)) { 192 return e.getKey(); 193 } 194 } 195 return null; 196 } 197 198 public void TestBothDirections() { 199 for (LstrType type : LstrType.values()) { 200 Map<Status, Set<String>> statusToCodes = validity.getStatusToCodes(type); 201 Map<String, Status> codeToStatus = validity.getCodeToStatus(type); 202 assertEquals("null at same time", statusToCodes == null, codeToStatus == null); 203 if (statusToCodes == null) { 204 logln("validity data unavailable: " + type); 205 continue; 206 } 207 for (Entry<Status, Set<String>> entry : statusToCodes.entrySet()) { 208 Status status = entry.getKey(); 209 for (String code : entry.getValue()) { 210 assertEquals("Forward works", status, codeToStatus.get(code)); 211 } 212 } 213 for (Entry<String, Status> entry : codeToStatus.entrySet()) { 214 final String code = entry.getKey(); 215 final Status status = entry.getValue(); 216 assertTrue("Reverse works: " + status, statusToCodes.get(status).contains(code)); 217 } 218 } 219 } 220 221 public void TestUnits() { 222 Splitter HYPHEN_SPLITTER = Splitter.on('-'); 223 UnicodeSet allowed = new UnicodeSet("[a-z0-9A-Z]").freeze(); 224 Validity validity = Validity.getInstance(); 225 Map<String, String> shortened = ImmutableMap.<String, String> builder() 226 .put("acceleration", "accel") 227 .put("revolution", "revol") 228 .put("centimeter", "cmeter") 229 .put("kilometer", "kmeter") 230 .put("milligram", "mgram") 231 .put("deciliter", "dliter") 232 .put("millimole", "mmole") 233 .put("consumption", "consumpt") 234 .put("100kilometers", "100km") 235 .put("microsecond", "microsec") 236 .put("millisecond", "millisec") 237 .put("nanosecond", "nanosec") 238 .put("milliampere", "milliamp") 239 .put("foodcalorie", "foodcal") 240 .put("kilocalorie", "kilocal") 241 .put("kilojoule", "kjoule") 242 .put("frequency", "freq") 243 .put("gigahertz", "gigahertz") 244 .put("kilohertz", "khertz") 245 .put("megahertz", "megahertz") 246 .put("astronomical", "astro") 247 .put("decimeter", "dmeter") 248 .put("micrometer", "micmeter") 249 .put("scandinavian", "scand") 250 .put("millimeter", "mmeter") 251 .put("nanometer", "nanomete") 252 .put("picometer", "pmeter") 253 .put("microgram", "migram") 254 .put("horsepower", "horsep") 255 .put("milliwatt", "mwatt") 256 .put("hectopascal", "hpascal") 257 .put("temperature", "temp") 258 .put("fahrenheit", "fahren") 259 .put("centiliter", "cliter") 260 .put("hectoliter", "hliter") 261 .put("megaliter", "megliter") 262 .put("milliliter", "mliter") 263 .put("tablespoon", "tblspoon") 264 .build(); 265 266 for (Entry<LstrType, Map<Status, Set<String>>> e1 : validity.getData().entrySet()) { 267 LstrType lstrType = e1.getKey(); 268 for (Entry<Status, Set<String>> e2 : e1.getValue().entrySet()) { 269 Status status = e2.getKey(); 270 for (String code : e2.getValue()) { 271 StringBuilder fixed = new StringBuilder(); 272 for (String subcode : HYPHEN_SPLITTER.split(code)) { 273 if (fixed.length() > 0) { 274 fixed.append('-'); 275 } 276 if (!allowed.containsAll(subcode)) { 277 errln("subcode has illegal character: " + subcode + ", in " + code); 278 } else if (subcode.length() > 8) { 279 fixed.append(shorten(subcode, shortened)); 280 } else { 281 fixed.append(subcode); 282 } 283 } 284 String fixedCode = fixed.toString(); 285 if (!fixedCode.equals(code)) { 286 warnln("code has overlong subcode: " + code + " should have short alias in bcp47 " + fixedCode); 287 } 288 } 289 } 290 } 291 292 if (DEBUG) { 293 for (Entry<String, String> e : shortened.entrySet()) { 294 System.out.println('"' + e.getKey() + "\", \"" + e.getValue() + "\","); 295 } 296 } 297 } 298 299 private String shorten(String subcode, Map<String, String> shortened) { 300 String result = shortened.get(subcode); 301 if (result != null) return result; 302 303 switch (subcode) { 304 case "temperature": 305 result = "temp"; 306 break; 307 case "acceleration": 308 result = "accel"; 309 break; 310 case "frequency": 311 result = "freq"; 312 break; 313 default: 314 result = subcode.substring(0, 8); 315 break; 316 } 317 // shortened.put(subcode, result); 318 return result; 319 } 320 321 public void TestLanguageTagParser() { 322 String[][] tests = { 323 { "en-cyrl_ru_variant2_variant1", "en_Cyrl_RU_VARIANT1_VARIANT2", "en-Cyrl-RU-variant1-variant2" }, 324 { "EN-U-CO-PHONEBK-EM-EMOJI-T_RU", "en_t_ru_u_co_phonebk_em_emoji", "en-t-ru-u-co-phonebk-em-emoji" }, 325 }; 326 LanguageTagParser ltp = new LanguageTagParser(); 327 for (String[] test : tests) { 328 String source = test[0]; 329 String expectedLanguageSubtagParserIcu = test[1]; 330 String expectedLanguageSubtagParserBCP = test[2]; 331 ltp.set(source); 332 String actualLanguageSubtagParserIcu = ltp.toString(); 333 assertEquals("Language subtag (ICU) for " + source, expectedLanguageSubtagParserIcu, actualLanguageSubtagParserIcu); 334 String actualLanguageSubtagParserBCP = ltp.toString(LanguageTagParser.OutputOption.BCP47); 335 assertEquals("Language subtag (BCP47) for " + source, expectedLanguageSubtagParserBCP, actualLanguageSubtagParserBCP); 336 } 337 } 338 339 public void TestLanguageTagCanonicalizer() { 340 String[][] tests = { 341 { "de-fonipa", "de_FONIPA" }, 342 { "el-1901-polytoni-aaland", "el_AX_1901_POLYTON" }, 343 { "en-POLYTONI-WHATEVER-ANYTHING-AALAND", "en_AX_ANYTHING_POLYTON_WHATEVER" }, 344 { "eng-840", "en" }, 345 { "sh_ba", "sr_Latn_BA" }, 346 { "iw-arab-010", "he_Arab_AQ" }, 347 { "und", "und" }, 348 { "und_us", "und_US" }, 349 { "und_su", "und_RU" }, 350 }; 351 LanguageTagCanonicalizer canon = new LanguageTagCanonicalizer(); 352 for (String[] inputExpected : tests) { 353 assertEquals("Canonicalize", inputExpected[1], canon.transform(inputExpected[0])); 354 } 355 } 356 } 357