Home | History | Annotate | Download | only in unittest
      1 package org.unicode.cldr.unittest;
      2 
      3 import java.io.File;
      4 import java.util.Arrays;
      5 import java.util.Collections;
      6 import java.util.HashSet;
      7 import java.util.List;
      8 import java.util.Map;
      9 import java.util.Map.Entry;
     10 import java.util.Set;
     11 
     12 import org.unicode.cldr.util.CLDRPaths;
     13 import org.unicode.cldr.util.CldrUtility;
     14 import org.unicode.cldr.util.LanguageTagCanonicalizer;
     15 import org.unicode.cldr.util.LanguageTagParser;
     16 import org.unicode.cldr.util.StandardCodes.LstrType;
     17 import org.unicode.cldr.util.Validity;
     18 import org.unicode.cldr.util.Validity.Status;
     19 
     20 import com.google.common.base.Splitter;
     21 import com.google.common.collect.ImmutableMap;
     22 import com.google.common.collect.ImmutableSet;
     23 import com.ibm.icu.text.UnicodeSet;
     24 
     25 public class TestValidity extends TestFmwkPlus {
     26 
     27     private boolean DEBUG = false;
     28 
     29     public static void main(String[] args) {
     30         new TestValidity().run(args);
     31     }
     32 
     33     Validity validity = Validity.getInstance();
     34 
     35     public void TestBasicValidity() {
     36         Object[][] tests = {
     37             { LstrType.language, Validity.Status.regular, true, "aa", "en" },
     38             { LstrType.language, null, false, "eng" }, // null means never found under any status
     39             { LstrType.language, null, false, "root" },
     40             { LstrType.language, Validity.Status.special, true, "mul" },
     41             { LstrType.language, Validity.Status.deprecated, true, "aju" },
     42             { LstrType.language, Validity.Status.private_use, true, "qaa" },
     43             { LstrType.language, Validity.Status.unknown, true, "und" },
     44 
     45             { LstrType.script, Validity.Status.regular, true, "Zyyy" },
     46             { LstrType.script, Validity.Status.special, true, "Zsye" },
     47             { LstrType.script, Validity.Status.regular, true, "Zyyy" },
     48             { LstrType.script, Validity.Status.unknown, true, "Zzzz" },
     49 
     50             { LstrType.region, Validity.Status.deprecated, true, "QU" },
     51             { LstrType.region, Validity.Status.macroregion, true, "EU" },
     52             { LstrType.region, Validity.Status.regular, true, "XK" },
     53             { LstrType.region, Validity.Status.macroregion, true, "001" },
     54             { LstrType.region, Validity.Status.private_use, true, "AA" },
     55             { LstrType.region, Validity.Status.unknown, true, "ZZ" },
     56 
     57             { LstrType.subdivision, Validity.Status.unknown, true, "kzzzzz" },
     58             { LstrType.subdivision, Validity.Status.regular, true, "usca" },
     59             { LstrType.subdivision, Validity.Status.deprecated, true, "albr" },
     60 
     61             { LstrType.currency, Validity.Status.regular, true, "USD" },
     62             { LstrType.currency, Validity.Status.unknown, true, "XXX" },
     63             { LstrType.currency, Validity.Status.deprecated, true, "ADP" },
     64 
     65             { LstrType.unit, Validity.Status.regular, true, "area-acre" },
     66         };
     67         for (Object[] test : tests) {
     68             LstrType lstr = (LstrType) test[0];
     69             Validity.Status subtypeRaw = (Validity.Status) test[1];
     70             Boolean desired = (Boolean) test[2];
     71             for (int i = 3; i < test.length; ++i) {
     72                 String code = (String) test[i];
     73                 List<Status> subtypes = subtypeRaw == null ? Arrays.asList(Status.values()) : Collections.singletonList(subtypeRaw);
     74                 for (Status subtype : subtypes) {
     75                     Set<String> actual = validity.getStatusToCodes(lstr).get(subtype);
     76                     assertRelation("Validity", desired, CldrUtility.ifNull(actual, Collections.EMPTY_SET), TestFmwkPlus.CONTAINS, code);
     77                 }
     78             }
     79         }
     80         if (isVerbose()) {
     81 
     82             for (LstrType lstrType : LstrType.values()) {
     83                 logln(lstrType.toString());
     84                 final Map<Status, Set<String>> statusToCodes = validity.getStatusToCodes(lstrType);
     85                 for (Entry<Validity.Status, Set<String>> entry2 : statusToCodes.entrySet()) {
     86                     logln("\t" + entry2.getKey());
     87                     logln("\t\t" + entry2.getValue());
     88                 }
     89             }
     90         }
     91     }
     92 
     93     static final Set<String> ALLOWED_UNDELETIONS = ImmutableSet.of("ug331", "nlbq1", "nlbq2", "nlbq3", "no21", "no22");
     94     static final Set<String> ALLOWED_MISSING = ImmutableSet.of("root", "POSIX", "REVISED", "SAAHO");
     95 
     96     public void TestCompatibility() {
     97         // Only run the rest in exhaustive mode, since it requires CLDR_ARCHIVE_DIRECTORY
     98         if (getInclusion() <= 5) {
     99             return;
    100         }
    101         Set<String> messages = new HashSet<>();
    102         File archive = new File(CLDRPaths.ARCHIVE_DIRECTORY);
    103         for (File cldrArchive : archive.listFiles()) {
    104             if (!cldrArchive.getName().startsWith("cldr-")) {
    105                 continue;
    106             }
    107             File oldValidityLocation = new File(cldrArchive, File.separator + "common" + File.separator + "validity" + File.separator);
    108             if (!oldValidityLocation.exists()) {
    109                 logln("Skipping " + oldValidityLocation);
    110                 continue;
    111             }
    112             logln("Checking " + oldValidityLocation.toString());
    113 //            final String oldValidityLocation = CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + ToolConstants.PREVIOUS_CHART_VERSION +
    114 //                File.separator + "common" + File.separator + "validity" + File.separator;
    115             Validity oldValidity = Validity.getInstance(oldValidityLocation.toString() + File.separator);
    116 
    117             for (LstrType type : LstrType.values()) {
    118                 final Map<Status, Set<String>> statusToCodes = oldValidity.getStatusToCodes(type);
    119                 if (statusToCodes == null) {
    120                     logln("validity data unavailable: " + type);
    121                     continue;
    122                 }
    123                 for (Entry<Status, Set<String>> e2 : statusToCodes.entrySet()) {
    124                     Status oldStatus = e2.getKey();
    125                     for (String code : e2.getValue()) {
    126                         Status newStatus = getNewStatus(type, code);
    127                         if (oldStatus == newStatus) {
    128                             continue;
    129                         }
    130 
    131                         if (newStatus == null) {
    132                             if (ALLOWED_MISSING.contains(code)) {
    133                                 continue;
    134                             }
    135                             errln(messages, type + ":" + code + ":" + oldStatus + " => " + newStatus
    136                                 + "  missing in new data");
    137                         }
    138 
    139                         if (oldStatus == Status.private_use && newStatus == Status.special) {
    140                             logln(messages, "OK: " + type + ":" + code + " was " + oldStatus + " => " + newStatus);
    141                             continue;
    142                         }
    143                         if (oldStatus == Status.special && newStatus == Status.unknown) {
    144                             if (type == LstrType.subdivision && code.endsWith("zzzz")) {
    145                                 continue;
    146                             }
    147                             logln(messages, "OK: " + type + ":" + code + " was " + oldStatus + " => " + newStatus);
    148                             continue;
    149                         }
    150                         if (oldStatus == Status.regular) {
    151                             if (newStatus == Status.deprecated) {
    152 //                                logln(messages, "OK: " + type + ":" + code + " was " + oldStatus + " => " + newStatus);
    153                                 continue;
    154                             }
    155                             errln(messages, type + ":" + code + ":" + oldStatus + " => " + newStatus
    156                                 + "  regular item changed, and didn't become deprecated");
    157                         }
    158                         if (oldStatus == Status.deprecated) {
    159                             if (ALLOWED_UNDELETIONS.contains(code)) {
    160                                 continue;
    161                             }
    162                             errln(messages, type + ":" + code + ":" + oldStatus + " => " + newStatus
    163                                 + " // add to exception list if really un-deprecated");
    164                         } else {
    165                             errln(messages, type + ":" + code + " was " + oldStatus + " => " + newStatus);
    166                         }
    167                     }
    168                 }
    169             }
    170         }
    171     }
    172 
    173     private void logln(Set<String> messages, String string) {
    174         if (!messages.contains(string)) {
    175             logln(string);
    176             messages.add(string);
    177         }
    178     }
    179 
    180     private void errln(Set<String> messages, String string) {
    181         if (!messages.contains(string)) {
    182             errln(string);
    183             messages.add(string);
    184         }
    185     }
    186 
    187 
    188     private Status getNewStatus(LstrType type, String code) {
    189         Map<Status, Set<String>> info = validity.getStatusToCodes(type);
    190         for (Entry<Status, Set<String>> e : info.entrySet()) {
    191             if (e.getValue().contains(code)) {
    192                 return e.getKey();
    193             }
    194         }
    195         return null;
    196     }
    197 
    198     public void TestBothDirections() {
    199         for (LstrType type : LstrType.values()) {
    200             Map<Status, Set<String>> statusToCodes = validity.getStatusToCodes(type);
    201             Map<String, Status> codeToStatus = validity.getCodeToStatus(type);
    202             assertEquals("null at same time", statusToCodes == null, codeToStatus == null);
    203             if (statusToCodes == null) {
    204                 logln("validity data unavailable: " + type);
    205                 continue;
    206             }
    207             for (Entry<Status, Set<String>> entry : statusToCodes.entrySet()) {
    208                 Status status = entry.getKey();
    209                 for (String code : entry.getValue()) {
    210                     assertEquals("Forward works", status, codeToStatus.get(code));
    211                 }
    212             }
    213             for (Entry<String, Status> entry : codeToStatus.entrySet()) {
    214                 final String code = entry.getKey();
    215                 final Status status = entry.getValue();
    216                 assertTrue("Reverse works: " + status, statusToCodes.get(status).contains(code));
    217             }
    218         }
    219     }
    220 
    221     public void TestUnits() {
    222         Splitter HYPHEN_SPLITTER = Splitter.on('-');
    223         UnicodeSet allowed = new UnicodeSet("[a-z0-9A-Z]").freeze();
    224         Validity validity = Validity.getInstance();
    225         Map<String, String> shortened = ImmutableMap.<String, String> builder()
    226             .put("acceleration", "accel")
    227             .put("revolution", "revol")
    228             .put("centimeter", "cmeter")
    229             .put("kilometer", "kmeter")
    230             .put("milligram", "mgram")
    231             .put("deciliter", "dliter")
    232             .put("millimole", "mmole")
    233             .put("consumption", "consumpt")
    234             .put("100kilometers", "100km")
    235             .put("microsecond", "microsec")
    236             .put("millisecond", "millisec")
    237             .put("nanosecond", "nanosec")
    238             .put("milliampere", "milliamp")
    239             .put("foodcalorie", "foodcal")
    240             .put("kilocalorie", "kilocal")
    241             .put("kilojoule", "kjoule")
    242             .put("frequency", "freq")
    243             .put("gigahertz", "gigahertz")
    244             .put("kilohertz", "khertz")
    245             .put("megahertz", "megahertz")
    246             .put("astronomical", "astro")
    247             .put("decimeter", "dmeter")
    248             .put("micrometer", "micmeter")
    249             .put("scandinavian", "scand")
    250             .put("millimeter", "mmeter")
    251             .put("nanometer", "nanomete")
    252             .put("picometer", "pmeter")
    253             .put("microgram", "migram")
    254             .put("horsepower", "horsep")
    255             .put("milliwatt", "mwatt")
    256             .put("hectopascal", "hpascal")
    257             .put("temperature", "temp")
    258             .put("fahrenheit", "fahren")
    259             .put("centiliter", "cliter")
    260             .put("hectoliter", "hliter")
    261             .put("megaliter", "megliter")
    262             .put("milliliter", "mliter")
    263             .put("tablespoon", "tblspoon")
    264             .build();
    265 
    266         for (Entry<LstrType, Map<Status, Set<String>>> e1 : validity.getData().entrySet()) {
    267             LstrType lstrType = e1.getKey();
    268             for (Entry<Status, Set<String>> e2 : e1.getValue().entrySet()) {
    269                 Status status = e2.getKey();
    270                 for (String code : e2.getValue()) {
    271                     StringBuilder fixed = new StringBuilder();
    272                     for (String subcode : HYPHEN_SPLITTER.split(code)) {
    273                         if (fixed.length() > 0) {
    274                             fixed.append('-');
    275                         }
    276                         if (!allowed.containsAll(subcode)) {
    277                             errln("subcode has illegal character: " + subcode + ", in " + code);
    278                         } else if (subcode.length() > 8) {
    279                             fixed.append(shorten(subcode, shortened));
    280                         } else {
    281                             fixed.append(subcode);
    282                         }
    283                     }
    284                     String fixedCode = fixed.toString();
    285                     if (!fixedCode.equals(code)) {
    286                         warnln("code has overlong subcode: " + code + " should have short alias in bcp47 " + fixedCode);
    287                     }
    288                 }
    289             }
    290         }
    291 
    292         if (DEBUG) {
    293             for (Entry<String, String> e : shortened.entrySet()) {
    294                 System.out.println('"' + e.getKey() + "\", \"" + e.getValue() + "\",");
    295             }
    296         }
    297     }
    298 
    299     private String shorten(String subcode, Map<String, String> shortened) {
    300         String result = shortened.get(subcode);
    301         if (result != null) return result;
    302 
    303         switch (subcode) {
    304         case "temperature":
    305             result = "temp";
    306             break;
    307         case "acceleration":
    308             result = "accel";
    309             break;
    310         case "frequency":
    311             result = "freq";
    312             break;
    313         default:
    314             result = subcode.substring(0, 8);
    315             break;
    316         }
    317         // shortened.put(subcode, result);
    318         return result;
    319     }
    320 
    321     public void TestLanguageTagParser() {
    322         String[][] tests = {
    323             { "en-cyrl_ru_variant2_variant1", "en_Cyrl_RU_VARIANT1_VARIANT2", "en-Cyrl-RU-variant1-variant2" },
    324             { "EN-U-CO-PHONEBK-EM-EMOJI-T_RU", "en_t_ru_u_co_phonebk_em_emoji", "en-t-ru-u-co-phonebk-em-emoji" },
    325         };
    326         LanguageTagParser ltp = new LanguageTagParser();
    327         for (String[] test : tests) {
    328             String source = test[0];
    329             String expectedLanguageSubtagParserIcu = test[1];
    330             String expectedLanguageSubtagParserBCP = test[2];
    331             ltp.set(source);
    332             String actualLanguageSubtagParserIcu = ltp.toString();
    333             assertEquals("Language subtag (ICU) for " + source, expectedLanguageSubtagParserIcu, actualLanguageSubtagParserIcu);
    334             String actualLanguageSubtagParserBCP = ltp.toString(LanguageTagParser.OutputOption.BCP47);
    335             assertEquals("Language subtag (BCP47) for " + source, expectedLanguageSubtagParserBCP, actualLanguageSubtagParserBCP);
    336         }
    337     }
    338 
    339     public void TestLanguageTagCanonicalizer() {
    340         String[][] tests = {
    341             { "de-fonipa", "de_FONIPA" },
    342             { "el-1901-polytoni-aaland", "el_AX_1901_POLYTON" },
    343             { "en-POLYTONI-WHATEVER-ANYTHING-AALAND", "en_AX_ANYTHING_POLYTON_WHATEVER" },
    344             { "eng-840", "en" },
    345             { "sh_ba", "sr_Latn_BA" },
    346             { "iw-arab-010", "he_Arab_AQ" },
    347             { "und", "und" },
    348             { "und_us", "und_US" },
    349             { "und_su", "und_RU" },
    350         };
    351         LanguageTagCanonicalizer canon = new LanguageTagCanonicalizer();
    352         for (String[] inputExpected : tests) {
    353             assertEquals("Canonicalize", inputExpected[1], canon.transform(inputExpected[0]));
    354         }
    355     }
    356 }
    357