Home | History | Annotate | Download | only in util
      1 package org.unicode.cldr.util;
      2 
      3 import java.util.HashMap;
      4 import java.util.Iterator;
      5 import java.util.Map;
      6 import java.util.Set;
      7 
      8 import org.unicode.cldr.draft.FileUtilities;
      9 
     10 import com.google.common.base.Splitter;
     11 import com.google.common.collect.ImmutableSet;
     12 import com.ibm.icu.dev.util.UnicodeMap;
     13 import com.ibm.icu.impl.Utility;
     14 import com.ibm.icu.lang.CharSequences;
     15 import com.ibm.icu.text.UnicodeSet;
     16 
     17 public class Emoji {
     18     public static final String EMOJI_VARIANT = "\uFE0F";
     19     public static final String COMBINING_ENCLOSING_KEYCAP = "\u20E3";
     20     public static final String ZWJ = "\u200D";
     21     public static final UnicodeSet REGIONAL_INDICATORS = new UnicodeSet(0x1F1E6, 0x1F1FF).freeze();
     22     public static final UnicodeSet MODIFIERS = new UnicodeSet("[-]").freeze();
     23     public static final UnicodeSet TAGS = new UnicodeSet(0xE0000, 0xE007F).freeze();
     24     public static final UnicodeSet FAMILY = new UnicodeSet("[\u200D -  ]").freeze();
     25     public static final UnicodeSet GENDER = new UnicodeSet().add(0x2640).add(0x2642).freeze();
     26     public static final UnicodeSet SPECIALS = new UnicodeSet("[{}{}{}]").freeze();
     27     public static final UnicodeSet MAN_WOMAN = new UnicodeSet("[ ]").freeze();
     28     public static final UnicodeSet OBJECT = new UnicodeSet("[                ]").freeze();
     29 
     30     static final UnicodeMap<String> emojiToMajorCategory = new UnicodeMap<>();
     31     static final UnicodeMap<String> emojiToMinorCategory = new UnicodeMap<>();
     32     static final Map<String, Integer> minorToOrder = new HashMap<>();
     33     static final UnicodeSet nonConstructed = new UnicodeSet();
     34     static final UnicodeSet allRgi = new UnicodeSet();
     35     static final UnicodeSet allRgiNoES = new UnicodeSet();
     36 
     37     static {
     38         /*
     39             # group: Smileys & People
     40             # subgroup: face-positive
     41             1F600 ; fully-qualified     #  grinning face
     42          */
     43         Splitter semi = Splitter.on(';').trimResults();
     44         String majorCategory = null;
     45         String minorCategory = null;
     46         for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) {
     47             if (line.startsWith("#")) {
     48                 line = line.substring(1).trim();
     49                 if (line.startsWith("group:")) {
     50                     majorCategory = line.substring("group:".length()).trim();
     51                 } else if (line.startsWith("subgroup:")) {
     52                     minorCategory = line.substring("subgroup:".length()).trim();
     53                     if (!minorToOrder.containsKey(minorCategory)) {
     54                         minorToOrder.put(minorCategory, minorToOrder.size());
     55                     }
     56                 }
     57                 continue;
     58             }
     59             line = line.trim();
     60             if (line.isEmpty()) {
     61                 continue;
     62             }
     63             Iterator<String> it = semi.split(line).iterator();
     64             String emojiHex = it.next();
     65             String original = Utility.fromHex(emojiHex, 4, " ");
     66             String type = it.next();
     67             if (type.startsWith("fully-qualified")) {
     68                 allRgi.add(original);
     69                 allRgiNoES.add(original.replace(Emoji.EMOJI_VARIANT, ""));
     70             }
     71             emojiToMajorCategory.put(original, majorCategory);
     72             emojiToMinorCategory.put(original, minorCategory);
     73 
     74             // add all the non-constructed values to a set for annotations
     75 
     76             String minimal = original.replace(EMOJI_VARIANT, "");
     77             boolean singleton = CharSequences.getSingleCodePoint(minimal) != Integer.MAX_VALUE;
     78 
     79             // skip constructed values
     80             if (minimal.contains(COMBINING_ENCLOSING_KEYCAP)
     81                 || REGIONAL_INDICATORS.containsSome(minimal)
     82                 || TAGS.containsSome(minimal)
     83                 || !singleton && MODIFIERS.containsSome(minimal)
     84                 || !singleton && FAMILY.containsAll(minimal)) {
     85                 // do nothing
     86             } else if (minimal.contains(ZWJ)) { // only do certain ZWJ sequences
     87                 if (SPECIALS.contains(minimal)
     88                     || GENDER.containsSome(minimal)
     89                     || MAN_WOMAN.contains(minimal.codePointAt(0)) && OBJECT.contains(minimal.codePointBefore(minimal.length()))) {
     90                     nonConstructed.add(minimal);
     91                 }
     92             } else if (!minimal.contains("")) {
     93                 nonConstructed.add(minimal);
     94             }
     95 
     96         }
     97         emojiToMajorCategory.freeze();
     98         emojiToMinorCategory.freeze();
     99         nonConstructed.add(MODIFIERS); // needed for names
    100         nonConstructed.freeze();
    101         allRgi.freeze();
    102         allRgiNoES.freeze();
    103     }
    104 
    105     public static UnicodeSet getAllRgi() {
    106         return allRgi;
    107     }
    108 
    109     public static UnicodeSet getAllRgiNoES() {
    110         return allRgiNoES;
    111     }
    112 
    113     public static String getMinorCategory(String emoji) {
    114         String minorCat = emojiToMinorCategory.get(emoji);
    115         if (minorCat == null) {
    116             throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji
    117                 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ...");
    118         }
    119         return minorCat;
    120     }
    121 
    122     public static int getMinorToOrder(String minor) {
    123         Integer result = minorToOrder.get(minor);
    124         return result == null ? Integer.MAX_VALUE : result;
    125     }
    126 
    127     public static String getMajorCategory(String emoji) {
    128         String majorCat = emojiToMajorCategory.get(emoji);
    129         if (majorCat == null) {
    130             throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji
    131                 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"major\", ...");
    132         }
    133         return majorCat;
    134     }
    135 
    136     public static Set<String> getMajorCategories() {
    137         return emojiToMajorCategory.values();
    138     }
    139 
    140     public static Set<String> getMinorCategories() {
    141         return emojiToMinorCategory.values();
    142     }
    143 
    144     public static UnicodeSet getNonConstructed() {
    145         return nonConstructed;
    146     }
    147 
    148     private static Set<String> NAME_PATHS = null;
    149     private static Set<String> KEYWORD_PATHS = null;
    150     public static final String TYPE_TTS = "[@type=\"tts\"]";
    151 
    152     public static synchronized Set<String> getNamePaths() {
    153         return NAME_PATHS != null ? NAME_PATHS : (NAME_PATHS = buildPaths(TYPE_TTS));
    154     }
    155 
    156     public static synchronized Set<String> getKeywordPaths() {
    157         return KEYWORD_PATHS != null ? KEYWORD_PATHS : (KEYWORD_PATHS = buildPaths(""));
    158     }
    159 
    160     private static ImmutableSet<String> buildPaths(String suffix) {
    161         ImmutableSet.Builder<String> builder = ImmutableSet.builder();
    162         for (String s : Emoji.getNonConstructed()) {
    163             String base = "//ldml/annotations/annotation[@cp=\"" + s + "\"]" + suffix;
    164             builder.add(base);
    165         }
    166         return builder.build();
    167     }
    168 }
    169