1 package org.unicode.cldr.unittest; 2 3 import java.util.Arrays; 4 import java.util.Collection; 5 import java.util.Collections; 6 import java.util.HashSet; 7 import java.util.LinkedHashSet; 8 import java.util.Map; 9 import java.util.Map.Entry; 10 import java.util.Set; 11 import java.util.TreeMap; 12 import java.util.TreeSet; 13 import java.util.regex.Pattern; 14 15 import org.unicode.cldr.util.Annotations; 16 import org.unicode.cldr.util.Annotations.AnnotationSet; 17 import org.unicode.cldr.util.CLDRConfig; 18 import org.unicode.cldr.util.CLDRFile; 19 import org.unicode.cldr.util.CLDRPaths; 20 import org.unicode.cldr.util.Emoji; 21 import org.unicode.cldr.util.Factory; 22 import org.unicode.cldr.util.SimpleFactory; 23 import org.unicode.cldr.util.XListFormatter; 24 import org.unicode.cldr.util.XListFormatter.ListTypeLength; 25 26 import com.google.common.base.CharMatcher; 27 import com.google.common.base.Splitter; 28 import com.google.common.collect.ImmutableSet; 29 import com.google.common.collect.ImmutableSortedSet; 30 import com.google.common.collect.Multimap; 31 import com.google.common.collect.TreeMultimap; 32 import com.ibm.icu.dev.util.CollectionUtilities; 33 import com.ibm.icu.dev.util.UnicodeMap; 34 import com.ibm.icu.dev.util.UnicodeMap.EntryRange; 35 import com.ibm.icu.text.UnicodeSet; 36 37 public class TestAnnotations extends TestFmwkPlus { 38 private static final boolean SHOW_LIST = false; 39 private static final boolean SHOW_ENGLISH = false; 40 41 public static void main(String[] args) { 42 new TestAnnotations().run(args); 43 } 44 45 enum Containment { 46 contains, empty, not_contains 47 } 48 49 public void TestBasic() { 50 String[][] tests = { 51 { "en", "[\u2650]", "contains", "sagitarius", "zodiac" }, 52 { "en", "[\u0020]", "empty" }, 53 { "en", "[\u2651]", "not_contains", "foobar" }, 54 }; 55 for (String[] test : tests) { 56 UnicodeMap<Annotations> data = Annotations.getData(test[0]); 57 UnicodeSet us = new UnicodeSet(test[1]); 58 Set<String> annotations = new LinkedHashSet<>(); 59 Containment contains = Containment.valueOf(test[2]); 60 for (int i = 3; i < test.length; ++i) { 61 annotations.add(test[i]); 62 } 63 for (String s : us) { 64 Set<String> set = data.get(s).getKeywords(); 65 if (set == null) { 66 set = Collections.emptySet(); 67 } 68 switch (contains) { 69 case contains: 70 if (Collections.disjoint(set, annotations)) { 71 LinkedHashSet<String> temp = new LinkedHashSet<>(annotations); 72 temp.removeAll(set); 73 assertEquals("Missing items", Collections.EMPTY_SET, temp); 74 } 75 break; 76 case not_contains: 77 if (!Collections.disjoint(set, annotations)) { 78 LinkedHashSet<String> temp = new LinkedHashSet<>(annotations); 79 temp.retainAll(set); 80 assertEquals("Extra items", Collections.EMPTY_SET, temp); 81 } 82 break; 83 case empty: 84 assertEquals("mismatch", Collections.emptySet(), set); 85 break; 86 } 87 } 88 } 89 } 90 91 public void TestList() { 92 if (!SHOW_LIST) { 93 return; 94 } 95 if (isVerbose()) { 96 for (String locale : Annotations.getAvailable()) { 97 for (EntryRange<Annotations> s : Annotations.getData(locale).entryRanges()) { 98 logln(s.toString()); 99 } 100 } 101 } 102 } 103 104 public void TestNames() { 105 AnnotationSet eng = Annotations.getDataSet("en"); 106 String[][] tests = { // the expected value for keywords can use , as well as |. 107 {"", "man: light skin tone", "adult | man | light skin tone"}, 108 {"", "man: blond hair", "blond | blond-haired man | man | man: blond hair"}, 109 {"", "man: light skin tone, blond hair", "blond, blond-haired man, man, man: blond hair, light skin tone, blond hair"}, 110 {"", "man: red hair", "adult | man | red hair"}, 111 { "", "man: light skin tone, red hair", "adult | man | light skin tone| red hair"}, 112 { "", "flag: European Union", "flag" }, 113 { "#", "keycap: #", "keycap" }, 114 { "9", "keycap: 9", "keycap" }, 115 { "", "kiss", "couple | kiss" }, 116 { "", "kiss: woman, woman", "couple | kiss | woman" }, 117 { "", "couple with heart", "couple | couple with heart | love" }, 118 { "", "couple with heart: woman, woman", "couple | couple with heart | love | woman" }, 119 { "", "family", "family" }, 120 { "", "family: woman, woman, girl", "family | woman | girl" }, 121 { "", "boy: light skin tone", "boy | young | light skin tone" }, 122 { "", "woman: dark skin tone", "adult | woman | dark skin tone" }, 123 { "", "man judge", "justice | man | man judge | scales" }, 124 { "", "man judge: dark skin tone", "justice | man | man judge | scales | dark skin tone" }, 125 { "", "woman judge", "judge | scales | woman" }, 126 { "", "woman judge: medium-light skin tone", "judge | scales | woman | medium-light skin tone" }, 127 { "", "police officer", "cop | officer | police" }, 128 { "", "police officer: dark skin tone", "cop | officer | police | dark skin tone" }, 129 { "", "man police officer", "cop | man | officer | police" }, 130 { "", "man police officer: medium-light skin tone", "cop | man | officer | police | medium-light skin tone" }, 131 { "", "woman police officer", "cop | officer | police | woman" }, 132 { "", "woman police officer: dark skin tone", "cop | officer | police | woman | dark skin tone" }, 133 { "", "person biking", "bicycle | biking | cyclist | person biking" }, 134 { "", "person biking: dark skin tone", "bicycle | biking | cyclist | person biking | dark skin tone" }, 135 { "", "man biking", "bicycle | biking | cyclist | man" }, 136 { "", "man biking: dark skin tone", "bicycle | biking | cyclist | man | dark skin tone" }, 137 { "", "woman biking", "bicycle | biking | cyclist | woman" }, 138 { "", "woman biking: dark skin tone", "bicycle | biking | cyclist | woman | dark skin tone" }, 139 }; 140 141 Splitter BAR = Splitter.on(CharMatcher.anyOf("|,")).trimResults(); 142 boolean ok = true; 143 for (String[] test : tests) { 144 String emoji = test[0]; 145 String expectedName = test[1]; 146 Set<String> expectedKeywords = new HashSet<>(BAR.splitToList(test[2])); 147 final String shortName = eng.getShortName(emoji); 148 final Set<String> keywords = eng.getKeywords(emoji); 149 ok &= assertEquals("short name for " + emoji, expectedName, shortName); 150 ok &= assertEquals("keywords for " + emoji, expectedKeywords, keywords); 151 } 152 if (!ok) { 153 System.out.println("Possible replacement, but check"); 154 for (String[] test : tests) { 155 String emoji = test[0]; 156 final String shortName = eng.getShortName(emoji); 157 final Set<String> keywords = eng.getKeywords(emoji); 158 System.out.println("{\"" + emoji 159 + "\",\"" + shortName 160 + "\",\"" + CollectionUtilities.join(keywords, " | ") 161 + "\"},"); 162 } 163 } 164 165 } 166 167 // comment this out, since we now have console check for this. 168 public void TestUniqueness() { 169 // if (logKnownIssue("cldrbug:10104", "Disable until the uniqueness problems are fixed")) { 170 // return; 171 // } 172 Set<String> locales = new TreeSet<>(); 173 174 locales.add("en"); 175 locales.addAll(Annotations.getAvailable()); 176 locales.remove("root"); 177 // if (getInclusion() < 6) { 178 // locales.retainAll(CLDRConfig.getInstance().getStandardCodes().getLocaleCoverageLocales(Organization.cldr)); 179 // } 180 //locales.remove("sr_Latn"); 181 Multimap<String, String> localeToNameToEmoji = TreeMultimap.create(); 182 Multimap<String, String> nameToEmoji = TreeMultimap.create(); 183 UnicodeMap<Annotations> english = Annotations.getData("en"); 184 AnnotationSet englishSet = Annotations.getDataSet("en"); 185 UnicodeSet englishKeys = getCurrent(english.keySet()); 186 Map<String, UnicodeSet> localeToMissing = new TreeMap<>(); 187 188 for (String locale : locales) { 189 logln("uniqueness: " + locale); 190 AnnotationSet data = Annotations.getDataSet(locale); 191 nameToEmoji.clear(); 192 localeToMissing.put(locale, new UnicodeSet(englishKeys).removeAll(data.keySet()).freeze()); 193 for (String emoji : Emoji.getAllRgi()) { // Entry<String, Annotations> value : data.entrySet()) { 194 String name = data.getShortName(emoji); 195 if (name == null) { 196 continue; 197 } 198 nameToEmoji.put(name, emoji); 199 } 200 for (Entry<String, Collection<String>> entry : nameToEmoji.asMap().entrySet()) { 201 String name = entry.getKey(); 202 Collection<String> emojis = entry.getValue(); 203 if (emojis.size() > 1) { 204 errln("Duplicate name in " + locale + ": " + name + " for " 205 + CollectionUtilities.join(emojis, " & ")); 206 localeToNameToEmoji.putAll(locale + "\t" + name, emojis); 207 } 208 } 209 } 210 if (isVerbose() && !localeToNameToEmoji.isEmpty()) { 211 System.out.println("\nCollisions"); 212 for (Entry<String, String> entry : localeToNameToEmoji.entries()) { 213 String locale = entry.getKey(); 214 String emoji = entry.getValue(); 215 System.out.println(locale 216 + "\t" + englishSet.getShortName(emoji) 217 + "\t" + emoji); 218 } 219 } 220 if (SHOW_LIST && !localeToMissing.isEmpty()) { 221 System.out.println("\nMissing"); 222 int count = 2; 223 for (Entry<String, UnicodeSet> entry : localeToMissing.entrySet()) { 224 String locale = entry.getKey(); 225 for (String emoji : entry.getValue()) { 226 System.out.println(locale 227 + "\t" + emoji 228 + "\t" + englishSet.getShortName(emoji) 229 + "\t" + "=GOOGLETRANSLATE(C" + count + ",\"en\",A" + count + ")" 230 // =GOOGLETRANSLATE(C2,"en",A2) 231 ); 232 ++count; 233 } 234 } 235 } 236 237 } 238 239 private UnicodeSet getCurrent(UnicodeSet keySet) { 240 UnicodeSet currentAge = new UnicodeSet("[:age=9.0:]"); 241 UnicodeSet result = new UnicodeSet(); 242 for (String s : keySet) { 243 if (currentAge.containsAll(s)) { 244 result.add(s); 245 } 246 } 247 return result.freeze(); 248 } 249 250 public void testAnnotationPaths() { 251 assertTrue("", Emoji.getNonConstructed().contains("")); 252 Factory factoryAnnotations = SimpleFactory.make(CLDRPaths.ANNOTATIONS_DIRECTORY, ".*"); 253 for (String locale : Arrays.asList("en", "root")) { 254 CLDRFile enAnnotations = factoryAnnotations.make(locale, false); 255 // //ldml/annotations/annotation[@cp=""][@type="tts"] 256 Set<String> annotationPaths = enAnnotations.getPaths("//ldml/anno", 257 Pattern.compile("//ldml/annotations/annotation.*tts.*").matcher(""), new TreeSet<>()); 258 Set<String> annotationPathsExpected = Emoji.getNamePaths(); 259 checkAMinusBIsC(locale + ".xml - Emoji.getNamePaths", annotationPaths, annotationPathsExpected, Collections.<String> emptySet()); 260 checkAMinusBIsC("Emoji.getNamePaths - " + locale + ".xml", annotationPathsExpected, annotationPaths, Collections.<String> emptySet()); 261 } 262 } 263 264 public void testSuperfluousAnnotationPaths() { 265 Factory factoryAnnotations = SimpleFactory.make(CLDRPaths.ANNOTATIONS_DIRECTORY, ".*"); 266 ImmutableSet<String> rootPaths = ImmutableSortedSet.copyOf(factoryAnnotations.make("root", false).iterator("//ldml/annotations/")); 267 268 CLDRFile englishAnnotations = factoryAnnotations.make("en", false); 269 ImmutableSet<String> englishPaths = ImmutableSortedSet.copyOf(englishAnnotations.iterator("//ldml/annotations/")); 270 271 Set<String> superfluous2 = setDifference(rootPaths, englishPaths); 272 assertTrue("en contains root", superfluous2.isEmpty()); 273 if (!superfluous2.isEmpty()) { 274 for (String path : superfluous2) { 275 // XPathParts parts = XPathParts.getFrozenInstance(path); 276 // String emoji = parts.getAttributeValue(-1, "cp"); 277 System.out.println("locale=en; action=add; path=" + path + "; value=XXX"); 278 } 279 } 280 281 Set<String> allSuperfluous = new TreeSet<>(); 282 for (String locale : factoryAnnotations.getAvailable()) { 283 ImmutableSet<String> currentPaths = ImmutableSortedSet.copyOf(factoryAnnotations.make(locale, false).iterator("//ldml/annotations/")); 284 Set<String> superfluous = setDifference(currentPaths, rootPaths); 285 assertTrue("root contains " + locale, superfluous.isEmpty()); 286 allSuperfluous.addAll(superfluous); 287 for (String s : currentPaths) { 288 if (s.contains("\uFE0F")) { 289 errln("Contains FE0F: " + s); 290 break; 291 } 292 } 293 } 294 // get items to fix 295 if (!allSuperfluous.isEmpty()) { 296 for (String path : allSuperfluous) { 297 // XPathParts parts = XPathParts.getFrozenInstance(path); 298 // String emoji = parts.getAttributeValue(-1, "cp"); 299 System.out.println("locale=/.*/; action=delete; path=" + path); 300 } 301 } 302 } 303 304 private Set<String> setDifference(ImmutableSet<String> a, ImmutableSet<String> b) { 305 Set<String> superfluous = new LinkedHashSet<>(a); 306 superfluous.removeAll(b); 307 return superfluous; 308 } 309 310 private void checkAMinusBIsC(String title, Set<String> a, Set<String> b, Set<String> c) { 311 Set<String> aMb = new TreeSet<>(a); 312 aMb.removeAll(b); 313 assertEquals(title, c, aMb); 314 } 315 316 public void testListFormatter() { 317 Object[][] tests = { 318 {"en", ListTypeLength.NORMAL, "ABC", "A, B, and C"}, 319 {"en", ListTypeLength.AND_SHORT, "ABC", "A, B, and C"}, 320 {"en", ListTypeLength.OR_WIDE, "ABC", "A, B, or C"} 321 }; 322 Factory factory = CLDRConfig.getInstance().getCldrFactory(); 323 for (Object[] test : tests) { 324 CLDRFile cldrFile = factory.make((String)(test[0]), true); 325 ListTypeLength listTypeLength = (ListTypeLength)(test[1]); 326 String expected = (String)test[3]; 327 XListFormatter xlistFormatter = new XListFormatter(cldrFile, listTypeLength); 328 String source = (String)test[2]; 329 String actual = xlistFormatter.formatCodePoints(source); 330 assertEquals(test[0] + ", " + listTypeLength + ", " + source, expected, actual); 331 } 332 } 333 } 334