Home | History | Annotate | Download | only in tool
      1 package org.unicode.cldr.tool;
      2 
      3 import java.util.HashMap;
      4 import java.util.HashSet;
      5 import java.util.Iterator;
      6 import java.util.LinkedHashSet;
      7 import java.util.Map;
      8 import java.util.Map.Entry;
      9 import java.util.Set;
     10 import java.util.TreeMap;
     11 import java.util.TreeSet;
     12 import java.util.regex.Matcher;
     13 
     14 import org.unicode.cldr.util.CLDRConfig;
     15 import org.unicode.cldr.util.CLDRFile;
     16 import org.unicode.cldr.util.Factory;
     17 import org.unicode.cldr.util.Pair;
     18 import org.unicode.cldr.util.PatternCache;
     19 import org.unicode.cldr.util.SupplementalDataInfo;
     20 import org.unicode.cldr.util.Timer;
     21 import org.unicode.cldr.util.XPathParts;
     22 
     23 import com.google.common.base.Splitter;
     24 import com.ibm.icu.text.UnicodeSet;
     25 
     26 public class ListUnits {
     27     private static final UnicodeSet BIDI_CONTROL = new UnicodeSet("[:bidi_control:]").freeze();
     28     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
     29     private static final SupplementalDataInfo SUPP = CONFIG.getSupplementalDataInfo();
     30     private static final Task TASK = Task.listSimpleUnits;
     31 
     32     private enum Task {
     33         listUnits, listSimpleUnits, showDecimals, getDigits,
     34     }
     35 
     36     enum Type {
     37         root, en, other;
     38         static Type fromString(String type) {
     39             return type.equals("en") ? en : type.equals("root") ? root : other;
     40         }
     41     }
     42 
     43     public static void main(String[] args) {
     44         Factory cldrFactory = CONFIG.getCldrFactory();
     45         Set<String> defaultContent = SUPP.getDefaultContentLocales();
     46         Set<String> seen = new HashSet<>();
     47 
     48         LinkedHashSet<String> items = new LinkedHashSet<>();
     49         items.add("root");
     50         items.add("en");
     51         items.addAll(cldrFactory.getAvailableLanguages());
     52         Map<String, Data> rootMap = new HashMap<>();
     53         Map<String, Data> enMap = new HashMap<>();
     54 
     55         Timer timer = new Timer();
     56         int count = 0;
     57         XPathParts parts = new XPathParts();
     58         Splitter SEMI = Splitter.on(";").trimResults();
     59         Matcher currencyMatcher = PatternCache.get("([^0#]*).*[0#]([^0#]*)").matcher("");
     60 
     61         for (String locale : items) {
     62             Type type = Type.fromString(locale);
     63             if (type == Type.root || type == Type.en || defaultContent.contains(locale)) {
     64                 continue;
     65             }
     66             CLDRFile cldrFile = cldrFactory.make(locale, true);
     67 //            DecimalFormat format = new DecimalFormat(currencyPattern);
     68 //            String prefix = format.getPositivePrefix();
     69 //            String suffix = format.getPositiveSuffix();
     70 
     71 //            ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile);
     72 //            DecimalFormat format = builder.getCurrencyFormat("XXX");
     73 //            String prefix = format.getPositivePrefix().replace("XXX", "\u00a4");
     74 //            String suffix = format.getPositiveSuffix().replace("XXX", "\u00a4");
     75             switch (TASK) {
     76             case showDecimals: {
     77                 String compactPathPrefix = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]";
     78                 String currencyPattern = cldrFile
     79                     .getStringValue(
     80                         "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]");
     81                 String firstPart = SEMI.split(currencyPattern).iterator().next();
     82                 if (!currencyMatcher.reset(firstPart).matches()) {
     83                     throw new IllegalArgumentException("bad matcher");
     84                 }
     85                 String prefix = currencyMatcher.group(1);
     86                 String suffix = currencyMatcher.group(2);
     87                 System.out.println("\n#" + locale + "\t" + prefix + "\t" + suffix + "\t" + currencyPattern + "");
     88                 TreeMap<String, String> data = new TreeMap<>();
     89                 for (String path : cldrFile.fullIterable()) {
     90 //                    if (s.contains("decimalFormats")) {
     91 //                        System.out.println(s);
     92 //                    }
     93                     if (path.startsWith(compactPathPrefix)) {
     94                         String value = cldrFile.getStringValue(path);
     95                         String mod = path.replace("decimal", "currency") + "[@draft=\"provisional\"]";
     96                         //                        // locale=en ; action=add ; new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine
     97                         data.put(mod, "locale=" + locale
     98                             + " ; action=add"
     99                             + " ; new_value=" + prefix + value + suffix
    100                             + " ; new_path=" + mod);
    101                     }
    102                 }
    103                 for (Entry<String, String> line : data.entrySet()) {
    104                     System.out.println(line.getValue());
    105                 }
    106                 data.clear();
    107                 break;
    108             }
    109             case listUnits:
    110             case listSimpleUnits: {
    111                 Set<String> units = getUnits(cldrFile, TASK, type == Type.root ? rootMap : type == Type.en ? enMap : null);
    112                 if (type == Type.en) {
    113                     TreeSet<String> missing = new TreeSet<>(seen);
    114                     missing.removeAll(units);
    115                     for (String unit : missing) {
    116                         // locale=en ; action=add ; new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine
    117                         Data data = rootMap.get(unit);
    118                         if (data != null) {
    119                             System.out.println(data);
    120                         }
    121                     }
    122                 }
    123                 Splitter HYPHEN = Splitter.on('-');
    124                 String oldBase = "";
    125                 for (String unit : units) {
    126                     if (!seen.contains(unit)) {
    127                         switch (TASK) {
    128                         case listSimpleUnits:
    129                             String base = HYPHEN.split(unit).iterator().next();
    130                             if (!base.equals(oldBase)) {
    131                                 oldBase = base;
    132                                 System.out.println();
    133                             } else {
    134                                 System.out.print(' ');
    135                             }
    136                             System.out.print(unit);
    137                             break;
    138                         case listUnits:
    139                             System.out.println("\t" + unit.replace("/", "\t")
    140                                 .replaceFirst("-", "\t") + "\t" + locale);
    141                             break;
    142                         }
    143                         seen.add(unit);
    144                     }
    145                 }
    146                 break;
    147             }
    148             case getDigits: {
    149                 getDigits(cldrFile);
    150                 break;
    151             }
    152             }
    153         }
    154         System.out.println();
    155         System.out.println("#Done: " + count + ", " + timer);
    156     }
    157 
    158     static void getDigits(CLDRFile cldrFile) {
    159         System.out.println(cldrFile.getLocaleID());
    160         String numberSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem");
    161         Set<String> seen = new HashSet<>();
    162         seen.add(numberSystem);
    163         Pair<UnicodeSet, UnicodeSet> main = getCharacters(cldrFile, numberSystem);
    164         System.out.println("\tdefault: " + numberSystem + ", " + main.getFirst().toPattern(false) + ", " + main.getSecond().toPattern(false));
    165         for (Iterator<String> it = cldrFile.iterator("//ldml/numbers/otherNumberingSystems"); it.hasNext();) {
    166             String path = it.next();
    167             String otherNumberingSystem = cldrFile.getWinningValue(path);
    168             if (seen.contains(otherNumberingSystem)) {
    169                 continue;
    170             }
    171             seen.add(otherNumberingSystem);
    172             main = getCharacters(cldrFile, otherNumberingSystem);
    173             System.out.println("\tother: " + otherNumberingSystem
    174                 + ", " + main.getFirst().toPattern(false) + "\t" + main.getSecond().toPattern(false));
    175         }
    176     }
    177 
    178     private static Pair<UnicodeSet, UnicodeSet> getCharacters(CLDRFile cldrFileToCheck, String numberSystem) {
    179         String digitString = SUPP.getDigits(numberSystem);
    180         UnicodeSet digits = digitString == null ? UnicodeSet.EMPTY : new UnicodeSet().addAll(digitString);
    181 
    182         UnicodeSet punctuation = new UnicodeSet();
    183         Set<String> errors = new LinkedHashSet<>();
    184         add(cldrFileToCheck, "decimal", numberSystem, punctuation, errors);
    185         //add(cldrFileToCheck, "exponential", numberSystem, punctuation, errors);
    186         add(cldrFileToCheck, "group", numberSystem, punctuation, errors);
    187         //add(cldrFileToCheck, "infinity", numberSystem, punctuation, errors);
    188         add(cldrFileToCheck, "minusSign", numberSystem, punctuation, errors);
    189         //add(cldrFileToCheck, "nan", numberSystem, punctuation, errors);
    190         add(cldrFileToCheck, "list", numberSystem, punctuation, errors);
    191         add(cldrFileToCheck, "percentSign", numberSystem, punctuation, errors);
    192         add(cldrFileToCheck, "perMille", numberSystem, punctuation, errors);
    193         add(cldrFileToCheck, "plusSign", numberSystem, punctuation, errors);
    194         // symbols.setZeroDigit(getSymbolString(cldrFileToCheck, "nativeZeroDigit", numberSystem));
    195         if (!errors.isEmpty() && digitString != null) {
    196             System.out.println("Missing: " + numberSystem + "\t" + errors);
    197         }
    198         punctuation.removeAll(BIDI_CONTROL);
    199         return Pair.of(digits, punctuation);
    200     }
    201 
    202     private static void add(CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors) {
    203         final String result = getSymbolString(cldrFileToCheck, subtype, numberSystem);
    204         if (result == null) {
    205             errors.add(subtype);
    206         } else {
    207             punctuation.addAll(result);
    208         }
    209     }
    210 
    211     private static String getSymbolString(CLDRFile cldrFile, String key, String numsys) {
    212         return cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"" + numsys + "\"]/" + key);
    213     }
    214 
    215     static final class Data {
    216         public Data(String path2, String stringValue) {
    217             path = path2;
    218             value = stringValue;
    219         }
    220 
    221         final String path;
    222         final String value;
    223 
    224         public String toString() {
    225             return "locale=en"
    226                 + " ; action=add"
    227                 + " ; new_path=" + path
    228                 + " ; new_value=" + value;
    229         }
    230     }
    231 
    232     private static Set<String> getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra) {
    233         Set<String> seen = new TreeSet<String>();
    234         for (String path : cldrFile) {
    235             if (!path.contains("/unit")) {
    236                 continue;
    237             }
    238             XPathParts parts = XPathParts.getFrozenInstance(path);
    239             String unit = parts.findAttributeValue("unit", "type");
    240             if (unit == null) {
    241                 continue;
    242             }
    243             String key = unit;
    244             if (task == Task.listUnits) {
    245                 String length = parts.findAttributeValue("unitLength", "type");
    246                 String per = "perUnitPattern".equals(parts.getElement(-1)) ? "per" : "";
    247                 key = unit + "/" + length + "/" + per;
    248             }
    249             seen.add(key);
    250             if (extra != null && !path.endsWith("/alias")) {
    251                 extra.put(key, new Data(path, cldrFile.getStringValue(path)));
    252             }
    253         }
    254         return seen;
    255     }
    256 }
    257