Home | History | Annotate | Download | only in unittest
      1 package org.unicode.cldr.unittest;
      2 
      3 import java.util.Arrays;
      4 import java.util.Collection;
      5 import java.util.Collections;
      6 import java.util.HashSet;
      7 import java.util.LinkedHashSet;
      8 import java.util.Map;
      9 import java.util.Map.Entry;
     10 import java.util.Set;
     11 import java.util.TreeMap;
     12 import java.util.TreeSet;
     13 import java.util.regex.Pattern;
     14 
     15 import org.unicode.cldr.util.Annotations;
     16 import org.unicode.cldr.util.Annotations.AnnotationSet;
     17 import org.unicode.cldr.util.CLDRConfig;
     18 import org.unicode.cldr.util.CLDRFile;
     19 import org.unicode.cldr.util.CLDRPaths;
     20 import org.unicode.cldr.util.Emoji;
     21 import org.unicode.cldr.util.Factory;
     22 import org.unicode.cldr.util.SimpleFactory;
     23 import org.unicode.cldr.util.XListFormatter;
     24 import org.unicode.cldr.util.XListFormatter.ListTypeLength;
     25 
     26 import com.google.common.base.CharMatcher;
     27 import com.google.common.base.Splitter;
     28 import com.google.common.collect.ImmutableSet;
     29 import com.google.common.collect.ImmutableSortedSet;
     30 import com.google.common.collect.Multimap;
     31 import com.google.common.collect.TreeMultimap;
     32 import com.ibm.icu.dev.util.CollectionUtilities;
     33 import com.ibm.icu.dev.util.UnicodeMap;
     34 import com.ibm.icu.dev.util.UnicodeMap.EntryRange;
     35 import com.ibm.icu.text.UnicodeSet;
     36 
     37 public class TestAnnotations extends TestFmwkPlus {
     38     private static final boolean SHOW_LIST = false;
     39     private static final boolean SHOW_ENGLISH = false;
     40 
     41     public static void main(String[] args) {
     42         new TestAnnotations().run(args);
     43     }
     44 
     45     enum Containment {
     46         contains, empty, not_contains
     47     }
     48 
     49     public void TestBasic() {
     50         String[][] tests = {
     51             { "en", "[\u2650]", "contains", "sagitarius", "zodiac" },
     52             { "en", "[\u0020]", "empty" },
     53             { "en", "[\u2651]", "not_contains", "foobar" },
     54         };
     55         for (String[] test : tests) {
     56             UnicodeMap<Annotations> data = Annotations.getData(test[0]);
     57             UnicodeSet us = new UnicodeSet(test[1]);
     58             Set<String> annotations = new LinkedHashSet<>();
     59             Containment contains = Containment.valueOf(test[2]);
     60             for (int i = 3; i < test.length; ++i) {
     61                 annotations.add(test[i]);
     62             }
     63             for (String s : us) {
     64                 Set<String> set = data.get(s).getKeywords();
     65                 if (set == null) {
     66                     set = Collections.emptySet();
     67                 }
     68                 switch (contains) {
     69                 case contains:
     70                     if (Collections.disjoint(set, annotations)) {
     71                         LinkedHashSet<String> temp = new LinkedHashSet<>(annotations);
     72                         temp.removeAll(set);
     73                         assertEquals("Missing items", Collections.EMPTY_SET, temp);
     74                     }
     75                     break;
     76                 case not_contains:
     77                     if (!Collections.disjoint(set, annotations)) {
     78                         LinkedHashSet<String> temp = new LinkedHashSet<>(annotations);
     79                         temp.retainAll(set);
     80                         assertEquals("Extra items", Collections.EMPTY_SET, temp);
     81                     }
     82                     break;
     83                 case empty:
     84                     assertEquals("mismatch", Collections.emptySet(), set);
     85                     break;
     86                 }
     87             }
     88         }
     89     }
     90 
     91     public void TestList() {
     92         if (!SHOW_LIST) {
     93             return;
     94         }
     95         if (isVerbose()) {
     96             for (String locale : Annotations.getAvailable()) {
     97                 for (EntryRange<Annotations> s : Annotations.getData(locale).entryRanges()) {
     98                     logln(s.toString());
     99                 }
    100             }
    101         }
    102     }
    103 
    104     public void TestNames() {
    105         AnnotationSet eng = Annotations.getDataSet("en");
    106         String[][] tests = { // the expected value for keywords can use , as well as |.
    107             {"", "man: light skin tone", "adult | man | light skin tone"},
    108             {"", "man: blond hair", "blond | blond-haired man | man | man: blond hair"},
    109             {"", "man: light skin tone, blond hair", "blond, blond-haired man, man, man: blond hair, light skin tone, blond hair"},
    110             {"", "man: red hair", "adult | man | red hair"},
    111             { "", "man: light skin tone, red hair", "adult | man | light skin tone| red hair"},
    112             { "", "flag: European Union", "flag" },
    113             { "#", "keycap: #", "keycap" },
    114             { "9", "keycap: 9", "keycap" },
    115             { "", "kiss", "couple | kiss" },
    116             { "", "kiss: woman, woman", "couple | kiss | woman" },
    117             { "", "couple with heart", "couple | couple with heart | love" },
    118             { "", "couple with heart: woman, woman", "couple | couple with heart | love | woman" },
    119             { "", "family", "family" },
    120             { "", "family: woman, woman, girl", "family | woman | girl" },
    121             { "", "boy: light skin tone", "boy | young | light skin tone" },
    122             { "", "woman: dark skin tone", "adult | woman | dark skin tone" },
    123             { "", "man judge", "justice | man | man judge | scales" },
    124             { "", "man judge: dark skin tone", "justice | man | man judge | scales | dark skin tone" },
    125             { "", "woman judge", "judge | scales | woman" },
    126             { "", "woman judge: medium-light skin tone", "judge | scales | woman | medium-light skin tone" },
    127             { "", "police officer", "cop | officer | police" },
    128             { "", "police officer: dark skin tone", "cop | officer | police | dark skin tone" },
    129             { "", "man police officer", "cop | man | officer | police" },
    130             { "", "man police officer: medium-light skin tone", "cop | man | officer | police | medium-light skin tone" },
    131             { "", "woman police officer", "cop | officer | police | woman" },
    132             { "", "woman police officer: dark skin tone", "cop | officer | police | woman | dark skin tone" },
    133             { "", "person biking", "bicycle | biking | cyclist | person biking" },
    134             { "", "person biking: dark skin tone", "bicycle | biking | cyclist | person biking | dark skin tone" },
    135             { "", "man biking", "bicycle | biking | cyclist | man" },
    136             { "", "man biking: dark skin tone", "bicycle | biking | cyclist | man | dark skin tone" },
    137             { "", "woman biking", "bicycle | biking | cyclist | woman" },
    138             { "", "woman biking: dark skin tone", "bicycle | biking | cyclist | woman | dark skin tone" },
    139         };
    140 
    141         Splitter BAR = Splitter.on(CharMatcher.anyOf("|,")).trimResults();
    142         boolean ok = true;
    143         for (String[] test : tests) {
    144             String emoji = test[0];
    145             String expectedName = test[1];
    146             Set<String> expectedKeywords = new HashSet<>(BAR.splitToList(test[2]));
    147             final String shortName = eng.getShortName(emoji);
    148             final Set<String> keywords = eng.getKeywords(emoji);
    149             ok &= assertEquals("short name for " + emoji, expectedName, shortName);
    150             ok &= assertEquals("keywords for " + emoji, expectedKeywords, keywords);
    151         }
    152         if (!ok) {
    153             System.out.println("Possible replacement, but check");
    154             for (String[] test : tests) {
    155                 String emoji = test[0];
    156                 final String shortName = eng.getShortName(emoji);
    157                 final Set<String> keywords = eng.getKeywords(emoji);
    158                 System.out.println("{\"" + emoji
    159                     + "\",\"" + shortName
    160                     + "\",\"" + CollectionUtilities.join(keywords, " | ")
    161                     + "\"},");
    162             }
    163         }
    164 
    165     }
    166 
    167     // comment this out, since we now have console check for this.
    168     public void TestUniqueness() {
    169 //        if (logKnownIssue("cldrbug:10104", "Disable until the uniqueness problems are fixed")) {
    170 //            return;
    171 //        }
    172         Set<String> locales = new TreeSet<>();
    173 
    174         locales.add("en");
    175         locales.addAll(Annotations.getAvailable());
    176         locales.remove("root");
    177 //        if (getInclusion() < 6) {
    178 //            locales.retainAll(CLDRConfig.getInstance().getStandardCodes().getLocaleCoverageLocales(Organization.cldr));
    179 //        }
    180         //locales.remove("sr_Latn");
    181         Multimap<String, String> localeToNameToEmoji = TreeMultimap.create();
    182         Multimap<String, String> nameToEmoji = TreeMultimap.create();
    183         UnicodeMap<Annotations> english = Annotations.getData("en");
    184         AnnotationSet englishSet = Annotations.getDataSet("en");
    185         UnicodeSet englishKeys = getCurrent(english.keySet());
    186         Map<String, UnicodeSet> localeToMissing = new TreeMap<>();
    187 
    188         for (String locale : locales) {
    189             logln("uniqueness: " + locale);
    190             AnnotationSet data = Annotations.getDataSet(locale);
    191             nameToEmoji.clear();
    192             localeToMissing.put(locale, new UnicodeSet(englishKeys).removeAll(data.keySet()).freeze());
    193             for (String emoji : Emoji.getAllRgi()) { // Entry<String, Annotations> value : data.entrySet()) {
    194                 String name = data.getShortName(emoji);
    195                 if (name == null) {
    196                     continue;
    197                 }
    198                 nameToEmoji.put(name, emoji);
    199             }
    200             for (Entry<String, Collection<String>> entry : nameToEmoji.asMap().entrySet()) {
    201                 String name = entry.getKey();
    202                 Collection<String> emojis = entry.getValue();
    203                 if (emojis.size() > 1) {
    204                     errln("Duplicate name in " + locale + ": " + name + " for "
    205                         + CollectionUtilities.join(emojis, " & "));
    206                     localeToNameToEmoji.putAll(locale + "\t" + name, emojis);
    207                 }
    208             }
    209         }
    210         if (isVerbose() && !localeToNameToEmoji.isEmpty()) {
    211             System.out.println("\nCollisions");
    212             for (Entry<String, String> entry : localeToNameToEmoji.entries()) {
    213                 String locale = entry.getKey();
    214                 String emoji = entry.getValue();
    215                 System.out.println(locale
    216                     + "\t" + englishSet.getShortName(emoji)
    217                     + "\t" + emoji);
    218             }
    219         }
    220         if (SHOW_LIST && !localeToMissing.isEmpty()) {
    221             System.out.println("\nMissing");
    222             int count = 2;
    223             for (Entry<String, UnicodeSet> entry : localeToMissing.entrySet()) {
    224                 String locale = entry.getKey();
    225                 for (String emoji : entry.getValue()) {
    226                     System.out.println(locale
    227                         + "\t" + emoji
    228                         + "\t" + englishSet.getShortName(emoji)
    229                         + "\t" + "=GOOGLETRANSLATE(C" + count + ",\"en\",A" + count + ")"
    230                     // =GOOGLETRANSLATE(C2,"en",A2)
    231                     );
    232                     ++count;
    233                 }
    234             }
    235         }
    236 
    237     }
    238 
    239     private UnicodeSet getCurrent(UnicodeSet keySet) {
    240         UnicodeSet currentAge = new UnicodeSet("[:age=9.0:]");
    241         UnicodeSet result = new UnicodeSet();
    242         for (String s : keySet) {
    243             if (currentAge.containsAll(s)) {
    244                 result.add(s);
    245             }
    246         }
    247         return result.freeze();
    248     }
    249 
    250     public void testAnnotationPaths() {
    251         assertTrue("", Emoji.getNonConstructed().contains(""));
    252         Factory factoryAnnotations = SimpleFactory.make(CLDRPaths.ANNOTATIONS_DIRECTORY, ".*");
    253         for (String locale : Arrays.asList("en", "root")) {
    254             CLDRFile enAnnotations = factoryAnnotations.make(locale, false);
    255             //               //ldml/annotations/annotation[@cp=""][@type="tts"]
    256             Set<String> annotationPaths = enAnnotations.getPaths("//ldml/anno",
    257                 Pattern.compile("//ldml/annotations/annotation.*tts.*").matcher(""), new TreeSet<>());
    258             Set<String> annotationPathsExpected = Emoji.getNamePaths();
    259             checkAMinusBIsC(locale + ".xml - Emoji.getNamePaths", annotationPaths, annotationPathsExpected, Collections.<String> emptySet());
    260             checkAMinusBIsC("Emoji.getNamePaths - " + locale + ".xml", annotationPathsExpected, annotationPaths, Collections.<String> emptySet());
    261         }
    262     }
    263 
    264     public void testSuperfluousAnnotationPaths() {
    265         Factory factoryAnnotations = SimpleFactory.make(CLDRPaths.ANNOTATIONS_DIRECTORY, ".*");
    266         ImmutableSet<String> rootPaths = ImmutableSortedSet.copyOf(factoryAnnotations.make("root", false).iterator("//ldml/annotations/"));
    267 
    268         CLDRFile englishAnnotations = factoryAnnotations.make("en", false);
    269         ImmutableSet<String> englishPaths = ImmutableSortedSet.copyOf(englishAnnotations.iterator("//ldml/annotations/"));
    270 
    271         Set<String> superfluous2 = setDifference(rootPaths, englishPaths);
    272         assertTrue("en contains root", superfluous2.isEmpty());
    273         if (!superfluous2.isEmpty()) {
    274             for (String path : superfluous2) {
    275 //              XPathParts parts = XPathParts.getFrozenInstance(path);
    276 //              String emoji = parts.getAttributeValue(-1, "cp");
    277                 System.out.println("locale=en; action=add; path=" + path + "; value=XXX");
    278             }
    279         }
    280 
    281         Set<String> allSuperfluous = new TreeSet<>();
    282         for (String locale : factoryAnnotations.getAvailable()) {
    283             ImmutableSet<String> currentPaths = ImmutableSortedSet.copyOf(factoryAnnotations.make(locale, false).iterator("//ldml/annotations/"));
    284             Set<String> superfluous = setDifference(currentPaths, rootPaths);
    285             assertTrue("root contains " + locale, superfluous.isEmpty());
    286             allSuperfluous.addAll(superfluous);
    287             for (String s : currentPaths) {
    288                 if (s.contains("\uFE0F")) {
    289                     errln("Contains FE0F: " + s);
    290                     break;
    291                 }
    292             }
    293         }
    294         // get items to fix
    295         if (!allSuperfluous.isEmpty()) {
    296             for (String path : allSuperfluous) {
    297 //                XPathParts parts = XPathParts.getFrozenInstance(path);
    298 //                String emoji = parts.getAttributeValue(-1, "cp");
    299                 System.out.println("locale=/.*/; action=delete; path=" + path);
    300             }
    301         }
    302     }
    303 
    304     private Set<String> setDifference(ImmutableSet<String> a, ImmutableSet<String> b) {
    305         Set<String> superfluous = new LinkedHashSet<>(a);
    306         superfluous.removeAll(b);
    307         return superfluous;
    308     }
    309 
    310     private void checkAMinusBIsC(String title, Set<String> a, Set<String> b, Set<String> c) {
    311         Set<String> aMb = new TreeSet<>(a);
    312         aMb.removeAll(b);
    313         assertEquals(title, c, aMb);
    314     }
    315 
    316     public void testListFormatter() {
    317         Object[][] tests = {
    318             {"en", ListTypeLength.NORMAL, "ABC", "A, B, and C"},
    319             {"en", ListTypeLength.AND_SHORT, "ABC", "A, B, and C"},
    320             {"en", ListTypeLength.OR_WIDE, "ABC", "A, B, or C"}
    321         };
    322         Factory factory = CLDRConfig.getInstance().getCldrFactory();
    323         for (Object[] test : tests) {
    324             CLDRFile cldrFile = factory.make((String)(test[0]), true);
    325             ListTypeLength listTypeLength = (ListTypeLength)(test[1]);
    326             String expected = (String)test[3];
    327             XListFormatter xlistFormatter = new XListFormatter(cldrFile, listTypeLength);
    328             String source = (String)test[2];
    329             String actual = xlistFormatter.formatCodePoints(source);
    330             assertEquals(test[0] + ", " + listTypeLength + ", " + source, expected, actual);
    331         }
    332     }
    333 }
    334