Home | History | Annotate | Download | only in util
      1 package org.unicode.cldr.util;
      2 
      3 import java.io.File;
      4 import java.io.IOException;
      5 import java.util.ArrayList;
      6 import java.util.Arrays;
      7 import java.util.Collection;
      8 import java.util.Collections;
      9 import java.util.LinkedHashSet;
     10 import java.util.Locale;
     11 import java.util.Map;
     12 import java.util.Set;
     13 import java.util.TreeSet;
     14 import java.util.concurrent.ConcurrentHashMap;
     15 import java.util.regex.Pattern;
     16 
     17 import org.unicode.cldr.test.EmojiSubdivisionNames;
     18 import org.unicode.cldr.tool.ChartAnnotations;
     19 import org.unicode.cldr.util.XMLFileReader.SimpleHandler;
     20 
     21 import com.google.common.base.Objects;
     22 import com.google.common.base.Splitter;
     23 import com.google.common.collect.ImmutableSet;
     24 import com.google.common.collect.ImmutableSet.Builder;
     25 import com.ibm.icu.dev.util.CollectionUtilities;
     26 import com.ibm.icu.dev.util.UnicodeMap;
     27 import com.ibm.icu.impl.Utility;
     28 import com.ibm.icu.lang.CharSequences;
     29 import com.ibm.icu.text.SimpleFormatter;
     30 import com.ibm.icu.text.Transform;
     31 import com.ibm.icu.text.UTF16;
     32 import com.ibm.icu.text.UnicodeSet;
     33 import com.ibm.icu.text.UnicodeSet.SpanCondition;
     34 import com.ibm.icu.text.UnicodeSetSpanner;
     35 import com.ibm.icu.util.ICUUncheckedIOException;
     36 
     37 public class Annotations {
     38     private static final boolean DEBUG = false;
     39 
     40     public static final String BAD_MARKER = "";
     41     public static final String MISSING_MARKER = "";
     42     public static final String ENGLISH_MARKER = "";
     43     public static final String EQUIVALENT = "";
     44 
     45     public static final Splitter splitter = Splitter.on(Pattern.compile("[|;]")).trimResults().omitEmptyStrings();
     46     static final Splitter dotSplitter = Splitter.on(".").trimResults();
     47 
     48     static final Map<String, Map<String, AnnotationSet>> cache = new ConcurrentHashMap<>();
     49     static final Set<String> LOCALES;
     50     static final String DIR;
     51     private static final AnnotationSet ENGLISH_DATA;
     52 
     53     private final Set<String> annotations;
     54     private final String tts;
     55 
     56     static {
     57         File directory = new File(CLDRPaths.COMMON_DIRECTORY, "annotations");
     58         try {
     59             DIR = directory.getCanonicalPath();
     60         } catch (IOException e) {
     61             throw new ICUUncheckedIOException(e);
     62         }
     63         if (DEBUG) {
     64             System.out.println(DIR);
     65         }
     66         Builder<String> temp = ImmutableSet.builder();
     67         for (File file : directory.listFiles()) {
     68             if (DEBUG) {
     69                 try {
     70                     System.out.println(file.getCanonicalPath());
     71                 } catch (IOException e) {
     72                 }
     73             }
     74             String name = file.toString();
     75             String shortName = file.getName();
     76             if (!shortName.endsWith(".xml") || // skip non-XML
     77                 shortName.startsWith("#") || // skip other junk files
     78                 shortName.startsWith(".")
     79 //                || shortName.contains("001") // skip world english for now
     80                 ) continue; // skip dot files (backups, etc)
     81             temp.add(dotSplitter.split(shortName).iterator().next());
     82         }
     83         LOCALES = temp.build();
     84         ENGLISH_DATA = getDataSet("en");
     85     }
     86 
     87     static class MyHandler extends SimpleHandler {
     88         private final String locale;
     89         private final UnicodeMap<Annotations> localeData = new UnicodeMap<>();
     90         private final AnnotationSet parentData;
     91         private final Map<String, AnnotationSet> dirCache;
     92 
     93         public MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData) {
     94             this.locale = locale;
     95             this.parentData = parentData;
     96             this.dirCache = dirCache;
     97         }
     98 
     99         public AnnotationSet cleanup() {
    100             // add parent data (may be overridden)
    101             UnicodeMap<Annotations> templocaleData = null;
    102             if (parentData != null) {
    103                 templocaleData = new UnicodeMap<>();
    104                 UnicodeSet keys = new UnicodeSet(parentData.baseData.keySet()).addAll(localeData.keySet());
    105                 for (String key : keys) {
    106                     Annotations parentValue = parentData.baseData.get(key);
    107                     Annotations myValue = localeData.get(key);
    108                     if (parentValue == null) {
    109                         templocaleData.put(key, myValue);
    110                     } else if (myValue == null) {
    111                         templocaleData.put(key, parentValue);
    112                     } else { // need to combine
    113                         String tts = myValue.tts == null
    114                             ? parentValue.tts : myValue.tts;
    115                         Set<String> annotations = myValue.annotations == null || myValue.annotations.isEmpty()
    116                             ? parentValue.annotations : myValue.annotations;
    117                         templocaleData.put(key, new Annotations(annotations, tts));
    118                     }
    119                 }
    120             }
    121 
    122             final AnnotationSet result = new AnnotationSet(locale, localeData, templocaleData);
    123             dirCache.put(locale, result);
    124             return result;
    125         }
    126 
    127         @Override
    128         public void handlePathValue(String path, String value) {
    129             XPathParts parts = XPathParts.getFrozenInstance(path);
    130             String lastElement = parts.getElement(-1);
    131             if (!lastElement.equals("annotation")) {
    132                 if (!"identity".equals(parts.getElement(1))) {
    133                     throw new IllegalArgumentException("Unexpected path");
    134                 }
    135                 return;
    136             }
    137             String usString = parts.getAttributeValue(-1, "cp");
    138             UnicodeSet us1 = usString.startsWith("[") && usString.endsWith("]") ? new UnicodeSet(usString) : new UnicodeSet().add(usString);
    139             UnicodeSet us = new UnicodeSet();
    140             for (String s : us1) {
    141                 us.add(s.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""));
    142             }
    143             String tts = parts.getAttributeValue(-1, "tts");
    144             String type = parts.getAttributeValue(-1, "type");
    145             String alt = parts.getAttributeValue(-1, "alt");
    146 
    147             if (alt != null) {
    148                 // do nothing for now
    149             } else if ("tts".equals(type)) {
    150                 addItems(localeData, us, Collections.<String> emptySet(), value);
    151             } else {
    152                 Set<String> attributes = new TreeSet<>(splitter.splitToList(value));
    153                 addItems(localeData, us, attributes, tts);
    154             }
    155         }
    156 
    157         private void addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts) {
    158             for (String entry : us) {
    159                 addItems(unicodeMap, entry, attributes, tts);
    160             }
    161         }
    162 
    163         private void addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts) {
    164             Annotations annotations = unicodeMap.get(entry);
    165             if (annotations == null) {
    166                 unicodeMap.put(entry, new Annotations(attributes, tts));
    167             } else {
    168                 unicodeMap.put(entry, annotations.add(attributes, tts)); // creates new item
    169             }
    170         }
    171     }
    172 
    173     public Annotations(Set<String> attributes, String tts2) {
    174         annotations = attributes == null ? Collections.<String> emptySet() : ImmutableSet.copyOf(attributes);
    175         tts = tts2;
    176     }
    177 
    178     public Annotations add(Set<String> attributes, String tts2) {
    179         return new Annotations(getKeywords() == null ? attributes : attributes == null ? getKeywords() : union(attributes, getKeywords()),
    180             getShortName() == null ? tts2 : tts2 == null ? getShortName() : throwDup());
    181     }
    182 
    183     private String throwDup() {
    184         throw new IllegalArgumentException("Duplicate tts");
    185     }
    186 
    187     private Set<String> union(Set<String> a, Set<String> b) {
    188         TreeSet<String> result = new TreeSet<>(a);
    189         result.addAll(b);
    190         return result;
    191     }
    192 
    193     public static Set<String> getAvailable() {
    194         return LOCALES;
    195     }
    196 
    197     public static Set<String> getAvailableLocales() {
    198         return LOCALES;
    199     }
    200 
    201     public static final class AnnotationSet {
    202 
    203         private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
    204 
    205         static final Factory factory = CONFIG.getCldrFactory();
    206         static final CLDRFile ENGLISH = CONFIG.getEnglish();
    207         static final CLDRFile ENGLISH_ANNOTATIONS = null;
    208         static final Map<String,String> englishSubdivisionIdToName = EmojiSubdivisionNames.getSubdivisionIdToName("en");
    209         //CLDRConfig.getInstance().getAnnotationsFactory().make("en", false);
    210 
    211         private final String locale;
    212         private final UnicodeMap<Annotations> baseData;
    213         private final UnicodeMap<Annotations> unresolvedData;
    214         private final CLDRFile cldrFile;
    215         private final Map<String, String> subdivisionIdToName;
    216         private final SimpleFormatter initialPattern;
    217         private final Pattern initialRegexPattern;
    218         private final XListFormatter listPattern;
    219         private final Set<String> flagLabelSet;
    220         private final Set<String> keycapLabelSet;
    221         private final String keycapLabel;
    222         private final String flagLabel;
    223 //        private final String maleLabel;
    224 //        private final String femaleLabel;
    225         private final Map<String, Annotations> localeCache = new ConcurrentHashMap<>();
    226 
    227         static UnicodeSetSpanner uss = new UnicodeSetSpanner(EmojiConstants.COMPONENTS); // must be sync'ed
    228 
    229         private AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource) {
    230             this.locale = locale;
    231             unresolvedData = source.freeze();
    232             this.baseData = resolvedSource == null ? unresolvedData : resolvedSource.freeze();
    233             cldrFile = factory.make(locale, true);
    234             subdivisionIdToName = EmojiSubdivisionNames.getSubdivisionIdToName(locale);
    235             listPattern = new XListFormatter(cldrFile, EmojiConstants.COMPOSED_NAME_LIST);
    236             final String initialPatternString = getStringValue("//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]");
    237             initialPattern = SimpleFormatter.compile(initialPatternString);
    238             final String regexPattern = ("\\Q" + initialPatternString.replace("{0}", "\\E.*\\Q").replace("{1}", "\\E.*\\Q") + "\\E")
    239                 .replace("\\Q\\E", ""); // HACK to detect use of prefix pattern
    240             initialRegexPattern = Pattern.compile(regexPattern);
    241             flagLabelSet = getLabelSet("flag");
    242             flagLabel = flagLabelSet.isEmpty() ? null : flagLabelSet.iterator().next();
    243             keycapLabelSet = getLabelSet("keycap");
    244             keycapLabel = keycapLabelSet.isEmpty() ? null : keycapLabelSet.iterator().next();
    245 //            maleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"male\"]");
    246 //            femaleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"female\"]");
    247         }
    248 
    249         /**
    250          * @deprecated Use {@link #getLabelSet(String)} instead
    251          */
    252         private Set<String> getLabelSet() {
    253             return getLabelSet("flag");
    254         }
    255 
    256         private Set<String> getLabelSet(String typeAttributeValue) {
    257             String label = getStringValue("//ldml/characterLabels/characterLabel[@type=\"" + typeAttributeValue + "\"]");
    258             return label == null ? Collections.<String> emptySet() : Collections.singleton(label);
    259         }
    260 
    261         private String getStringValue(String xpath) {
    262             return getStringValue(xpath, cldrFile, ENGLISH);
    263         }
    264 
    265         private String getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english) {
    266             String result = cldrFile2.getStringValue(xpath);
    267             if (result == null) {
    268                 return ENGLISH_MARKER + english.getStringValue(xpath);
    269             }
    270             String sourceLocale = cldrFile2.getSourceLocaleID(xpath, null);
    271             if (sourceLocale.equals(XMLSource.CODE_FALLBACK_ID) || sourceLocale.equals(XMLSource.ROOT_ID)) {
    272                 return MISSING_MARKER + result;
    273             }
    274             return result;
    275         }
    276 
    277         public String getShortName(String code) {
    278             return getShortName(code, null);
    279         }
    280 
    281         public String getShortName(String code, Transform<String, String> otherSource) {
    282             if (code.equals("")) {
    283                 int debug = 0;
    284             }
    285 
    286             code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, "");
    287             Annotations stock = baseData.get(code);
    288             if (stock != null && stock.tts != null) {
    289                 return stock.tts;
    290             }
    291             stock = localeCache.get(code);
    292             if (stock != null) {
    293                 return stock.tts;
    294             }
    295             stock = synthesize(code, otherSource);
    296             if (stock != null) {
    297                 localeCache.put(code, stock);
    298                 return stock.tts;
    299             }
    300             return null;
    301         }
    302 
    303         public Set<String> getKeywords(String code) {
    304             code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, "");
    305             Annotations stock = baseData.get(code);
    306             if (stock != null && stock.annotations != null) {
    307                 return stock.annotations;
    308             }
    309             stock = localeCache.get(code);
    310             if (stock != null) {
    311                 return stock.annotations;
    312             }
    313             stock = synthesize(code, null);
    314             if (stock != null) {
    315                 localeCache.put(code, stock);
    316                 return stock.annotations;
    317             }
    318             return Collections.<String> emptySet();
    319         }
    320 
    321         /** Returns the set of all keys for which annotations are available. WARNING: keys have the Emoji Presentation Selector removed!
    322          */
    323         public UnicodeSet keySet() {
    324             return baseData.keySet();
    325         }
    326 
    327         private Annotations synthesize(String code, Transform<String, String> otherSource) {
    328             if (code.equals("")) {
    329                 int debug = 0;
    330             }
    331             String shortName = null;
    332             int len = code.codePointCount(0, code.length());
    333             boolean isKeycap10 = code.equals("");
    334             if (len == 1 && !isKeycap10) {
    335                 String tempName = null;
    336                 if (locale.equals("en")) {
    337                     if (otherSource != null) {
    338                         tempName = otherSource.transform(code);
    339                     }
    340                     if (tempName == null) {
    341                         return null;
    342                     }
    343                     return new Annotations(Collections.<String> emptySet(), tempName);
    344                 } else { // fall back to English if possible, but mark it.
    345                     tempName = getDataSet("en").getShortName(code);
    346                     if (tempName == null) {
    347                         return null;
    348                     }
    349                     return new Annotations(Collections.<String> emptySet(), ENGLISH_MARKER + tempName);
    350                 }
    351             } else if (EmojiConstants.REGIONAL_INDICATORS.containsAll(code)) {
    352                 String countryCode = EmojiConstants.getFlagCode(code);
    353                 String path = CLDRFile.getKey(CLDRFile.TERRITORY_NAME, countryCode);
    354                 String regionName = getStringValue(path);
    355                 if (regionName == null) {
    356                     regionName = ENGLISH_MARKER + ENGLISH.getStringValue(path);
    357                 }
    358                 String flagName = flagLabel == null ? regionName : initialPattern.format(flagLabel, regionName);
    359                 return new Annotations(flagLabelSet, flagName);
    360             } else if (code.startsWith(EmojiConstants.BLACK_FLAG)
    361                 && code.endsWith(EmojiConstants.TAG_TERM)) {
    362                 String subdivisionCode = EmojiConstants.getTagSpec(code);
    363                 String subdivisionName = subdivisionIdToName.get(subdivisionCode);
    364                 if (subdivisionName == null) {
    365                     subdivisionName = englishSubdivisionIdToName.get(subdivisionCode);
    366                     if (subdivisionName != null) {
    367                         subdivisionName = ENGLISH_MARKER + subdivisionCode;
    368                     } else {
    369                         subdivisionName = MISSING_MARKER + subdivisionCode;
    370                     }
    371                 }
    372                 String flagName = flagLabel == null ? subdivisionName : initialPattern.format(flagLabel, subdivisionName);
    373                 return new Annotations(flagLabelSet, flagName);
    374             } else if (isKeycap10 || code.contains(EmojiConstants.KEYCAP_MARK_STRING)) {
    375                 final String rem = code.equals("") ? "10" : UTF16.valueOf(code.charAt(0));
    376                 shortName = initialPattern.format(keycapLabel, rem);
    377                 return new Annotations(keycapLabelSet, shortName);
    378             }
    379             UnicodeSet skipSet = EmojiConstants.REM_SKIP_SET;
    380             String rem = "";
    381             SimpleFormatter startPattern = initialPattern;
    382             if (EmojiConstants.COMPONENTS.containsSome(code)) {
    383                 synchronized (uss) {
    384                     rem = uss.deleteFrom(code, SpanCondition.NOT_CONTAINED);
    385                     code = uss.deleteFrom(code, SpanCondition.CONTAINED);
    386                 }
    387             }
    388             if (code.contains(EmojiConstants.JOINER_STRING)) {
    389 //                if (code.endsWith(EmojiConstants.JOINER_MALE_SIGN)){
    390 //                    if (matchesInitialPattern(code)) { // "","police officer: man, medium-light skin tone"
    391 //                        rem = EmojiConstants.MAN + rem;
    392 //                        code = code.substring(0,code.length()-EmojiConstants.JOINER_MALE_SIGN.length());
    393 //                    } // otherwise "","man biking: dark skin tone"
    394 //                } else if (code.endsWith(EmojiConstants.JOINER_FEMALE_SIGN)){
    395 //                    if (matchesInitialPattern(code)) { //
    396 //                        rem = EmojiConstants.WOMAN + rem;
    397 //                        code = code.substring(0,code.length()-EmojiConstants.JOINER_FEMALE_SIGN.length());
    398 //                    }
    399 //                } else
    400                 if (code.contains(EmojiConstants.KISS)) {
    401                     rem = code + rem;
    402                     code = "";
    403                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
    404                 } else if (code.contains(EmojiConstants.HEART)) {
    405                     rem = code + rem;
    406                     code = "";
    407                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
    408                 } else if (code.contains(EmojiConstants.HANDSHAKE)) {
    409                     code = code.startsWith(EmojiConstants.MAN) ? ""
    410                         : code.endsWith(EmojiConstants.MAN) ? ""
    411                             : "";
    412                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
    413                 } else if (EmojiConstants.FAMILY_MARKERS.containsAll(code)) {
    414                     rem = code + rem;
    415                     code = "";
    416                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
    417 //                } else {
    418 //                    startPattern = listPattern;
    419                 }
    420                 // left over is "","judge: man, dark skin tone"
    421             }
    422             return getBasePlusRemainder(cldrFile, code, rem, skipSet, startPattern, otherSource);
    423         }
    424 
    425         private boolean matchesInitialPattern(String code) {
    426             Annotations baseAnnotation = baseData.get(code);
    427             String baseName = baseAnnotation == null ? null : baseAnnotation.getShortName();
    428             return baseName != null && initialRegexPattern.matcher(baseName).matches();
    429         }
    430 
    431         private Annotations getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern,
    432             Transform<String, String> otherSource) {
    433             String shortName = null;
    434             Set<String> annotations = new LinkedHashSet<>();
    435             boolean needMarker = true;
    436 
    437             if (base != null) {
    438                 needMarker = false;
    439                 Annotations stock = baseData.get(base);
    440                 if (stock != null) {
    441                     shortName = stock.getShortName();
    442                     annotations.addAll(stock.getKeywords());
    443                 } else if (otherSource != null) {
    444                     shortName = otherSource.transform(base);
    445                     if (shortName == null) {
    446                         return null;
    447                     }
    448                 } else {
    449                     return null;
    450                 }
    451             }
    452 
    453             boolean hackBlond = EmojiConstants.HAIR_EXPLICIT.contains(base.codePointAt(0));
    454             Collection<String> arguments = new ArrayList<>();
    455             int lastSkin = -1;
    456 
    457             for (int mod : CharSequences.codePoints(rem)) {
    458                 if (ignore.contains(mod)) {
    459                     continue;
    460                 }
    461                 if (EmojiConstants.MODIFIERS.contains(mod)) {
    462                     if (lastSkin == mod) {
    463                         continue;
    464                     }
    465                     lastSkin = mod; // collapse skin tones. TODO fix if we ever do multi-skin families
    466                 }
    467                 Annotations stock = baseData.get(mod);
    468                 String modName = null;
    469                 if (stock != null) {
    470                     modName = stock.getShortName();
    471                 } else if (otherSource != null) {
    472                     modName = otherSource.transform(base);
    473                 }
    474                 if (modName == null) {
    475                     needMarker = true;
    476                     if (ENGLISH_DATA != null) {
    477                         Annotations engName = ENGLISH_DATA.baseData.get(mod);
    478                         if (engName != null) {
    479                             modName = engName.getShortName();
    480                         }
    481                     }
    482                     if (modName == null) {
    483                         modName = Utility.hex(mod); // ultimate fallback
    484                     }
    485                 }
    486                 if (hackBlond && shortName != null) {
    487                     // HACK: make the blond names look like the other hair names
    488                     // Split the short name into pieces, if possible, and insert the modName first
    489                     String sep = initialPattern.format("", "");
    490                     int splitPoint = shortName.indexOf(sep);
    491                     if (splitPoint >= 0) {
    492                         String modName0 = shortName.substring(splitPoint+sep.length());
    493                         shortName = shortName.substring(0, splitPoint);
    494                         if (modName != null) {
    495                             arguments.add(modName);
    496                             annotations.add(modName);
    497                         }
    498                         modName = modName0;
    499                     }
    500                     hackBlond = false;
    501                 }
    502 
    503                 if (modName != null) {
    504                     arguments.add(modName);
    505                     annotations.add(modName);
    506                 }
    507             }
    508             if (!arguments.isEmpty()) {
    509                 shortName = pattern.format(shortName, listPattern.format(arguments));
    510             }
    511             Annotations result = new Annotations(annotations, (needMarker ? ENGLISH_MARKER : "") + shortName);
    512             return result;
    513         }
    514 
    515         /**
    516          * @deprecated Use {@link #toString(String,boolean,AnnotationSet)} instead
    517          */
    518         public String toString(String code, boolean html) {
    519             return toString(code, html, null);
    520         }
    521 
    522         public String toString(String code, boolean html, AnnotationSet parentAnnotations) {
    523             if (locale.equals("be") && code.equals("")) {
    524                 int debug = 0;
    525             }
    526             String shortName = getShortName(code);
    527             if (shortName == null || shortName.startsWith(BAD_MARKER) || shortName.startsWith(ENGLISH_MARKER)) {
    528                 return MISSING_MARKER;
    529             }
    530 
    531             String parentShortName = parentAnnotations == null ? null : parentAnnotations.getShortName(code);
    532             if (shortName != null && Objects.equal(shortName, parentShortName)) {
    533                 shortName = EQUIVALENT;
    534             }
    535 
    536             Set<String> keywords = getKeywordsMinus(code);
    537             Set<String> parentKeywords = parentAnnotations == null ? null : parentAnnotations.getKeywordsMinus(code);
    538             if (keywords != null && !keywords.isEmpty() && Objects.equal(keywords, parentKeywords)) {
    539                 keywords = Collections.singleton(EQUIVALENT);
    540             }
    541 
    542             String result = CollectionUtilities.join(keywords, " |\u00a0");
    543             if (shortName != null) {
    544                 String ttsString = (html ? "*<b>" : "*") + shortName + (html ? "</b>" : "*");
    545                 if (result.isEmpty()) {
    546                     result = ttsString;
    547                 } else {
    548                     result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result;
    549                 }
    550             }
    551             return result;
    552         }
    553 
    554         public UnicodeMap<Annotations> getExplicitValues() {
    555             return baseData;
    556         }
    557 
    558         public UnicodeMap<Annotations> getUnresolvedExplicitValues() {
    559             return unresolvedData;
    560         }
    561 
    562         public Set<String> getKeywordsMinus(String code) {
    563             String shortName = getShortName(code);
    564             Set<String> keywords = getKeywords(code);
    565             if (shortName != null && keywords.contains(shortName)) {
    566                 keywords = new LinkedHashSet<String>(keywords);
    567                 keywords.remove(shortName);
    568             }
    569             return keywords;
    570         }
    571     }
    572 
    573     public static AnnotationSet getDataSet(String locale) {
    574         return getDataSet(DIR, locale);
    575     }
    576 
    577     public static AnnotationSet getDataSet(String dir, String locale) {
    578         if (dir == null) {
    579             dir = DIR;
    580         }
    581         Map<String, AnnotationSet> dirCache = cache.get(dir);
    582         if (dirCache == null) {
    583             cache.put(dir, dirCache = new ConcurrentHashMap<>());
    584         }
    585         AnnotationSet result = dirCache.get(locale);
    586         if (result != null) {
    587             return result;
    588         }
    589         if (!LOCALES.contains(locale)) {
    590             return null;
    591         }
    592         String parentString = LocaleIDParser.getSimpleParent(locale);
    593         AnnotationSet parentData = null;
    594         if (parentString != null && !parentString.equals("root")) {
    595             parentData = getDataSet(dir, parentString);
    596         }
    597         MyHandler myHandler = new MyHandler(dirCache, locale, parentData);
    598         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
    599         xfr.read(dir + "/" + locale + ".xml", -1, true);
    600         return myHandler.cleanup();
    601     }
    602 
    603     public static UnicodeMap<Annotations> getData(String locale) {
    604         return getData(DIR, locale);
    605     }
    606 
    607     public static UnicodeMap<Annotations> getData(String dir, String locale) {
    608         AnnotationSet result = getDataSet(dir, locale);
    609         return result == null ? null : result.baseData;
    610     }
    611 
    612     @Override
    613     public String toString() {
    614         return toString(false);
    615     }
    616 
    617     public String toString(boolean html) {
    618         Set<String> annotations2 = getKeywords();
    619         if (getShortName() != null && annotations2.contains(getShortName())) {
    620             annotations2 = new LinkedHashSet<String>(getKeywords());
    621             annotations2.remove(getShortName());
    622         }
    623         String result = CollectionUtilities.join(annotations2, " |\u00a0");
    624         if (getShortName() != null) {
    625             String ttsString = (html ? "*<b>" : "*") + getShortName() + (html ? "</b>" : "*");
    626             if (result.isEmpty()) {
    627                 result = ttsString;
    628             } else {
    629                 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result;
    630             }
    631         }
    632         return result;
    633     }
    634 
    635     /**
    636      * @return the annotations
    637      */
    638     public Set<String> getKeywords() {
    639         return annotations;
    640     }
    641 
    642     /**
    643      * @return the tts
    644      */
    645     public String getShortName() {
    646         return tts;
    647     }
    648 
    649     public static void main(String[] args) {
    650         if (true) {
    651             writeList();
    652         } else {
    653             writeEnglish();
    654         }
    655     }
    656 
    657     private static void writeList() {
    658         AnnotationSet eng = Annotations.getDataSet("en");
    659         Annotations an = eng.baseData.get("");
    660         final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues();
    661         Set<String> keys = new TreeSet<>(ChartAnnotations.RBC);
    662         map.keySet().addAllTo(keys);
    663 //        keys.add("");
    664         for (String key : keys) {
    665             System.out.println(Utility.hex(key, 4, "_").toLowerCase(Locale.ROOT)
    666                 + "\t" + key
    667                 + "\t" + map.get(key).getShortName()
    668                 + "\t" + CollectionUtilities.join(map.get(key).getKeywords(), " | "));
    669         }
    670         for (String s : Arrays.asList(
    671             "", "",
    672             "", "",
    673             "", "",
    674             "", "",
    675             "", "", "", "",
    676             "", "", "", "", "",
    677             "", "", "", "", "", "")) {
    678             final String shortName = eng.getShortName(s);
    679             final Set<String> keywords = eng.getKeywords(s);
    680             System.out.println("{\"" + s + "\",\"" + shortName + "\",\"" + CollectionUtilities.join(keywords, "|") + "\"},");
    681         }
    682     }
    683 
    684     private static void writeEnglish() {
    685         AnnotationSet eng = Annotations.getDataSet("en");
    686         System.out.println(Annotations.getAvailable());
    687         AnnotationSet eng100 = Annotations.getDataSet("en_001");
    688         UnicodeMap<Annotations> map100 = eng100.getUnresolvedExplicitValues();
    689         final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues();
    690         Set<String> keys = new TreeSet<>(ChartAnnotations.RBC);
    691         map.keySet().addAllTo(keys);
    692         for (String key : keys) {
    693             Annotations value = map.get(key);
    694             Annotations value100 = map100.get(key);
    695             Set<String> keywords100 = (value100 == null ? null : value100.getKeywords());
    696             System.out.println(key + "\tname\t"
    697                 + "\t" + value.getShortName()
    698                 + "\t" + (value100 == null ? "" : value100.getShortName())
    699                 + "\t" + CollectionUtilities.join(value.getKeywords(), " | ")
    700                 + "\t" + (keywords100 == null ? "" : CollectionUtilities.join(keywords100, " | ")));
    701         }
    702     }
    703 }
    704