Home | History | Annotate | Download | only in tool
      1 package org.unicode.cldr.tool;
      2 
      3 import java.util.ArrayList;
      4 import java.util.Collection;
      5 import java.util.Comparator;
      6 import java.util.EnumSet;
      7 import java.util.LinkedHashSet;
      8 import java.util.List;
      9 import java.util.Map;
     10 import java.util.Map.Entry;
     11 import java.util.Set;
     12 import java.util.TreeMap;
     13 import java.util.TreeSet;
     14 
     15 import org.unicode.cldr.util.CLDRConfig;
     16 import org.unicode.cldr.util.CLDRFile;
     17 import org.unicode.cldr.util.Factory;
     18 import org.unicode.cldr.util.ICUServiceBuilder;
     19 import org.unicode.cldr.util.LanguageTagParser;
     20 import org.unicode.cldr.util.Level;
     21 import org.unicode.cldr.util.Organization;
     22 import org.unicode.cldr.util.PluralRanges;
     23 import org.unicode.cldr.util.StandardCodes;
     24 import org.unicode.cldr.util.SupplementalDataInfo;
     25 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
     26 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
     27 
     28 import com.ibm.icu.dev.util.CollectionUtilities;
     29 import com.ibm.icu.impl.Relation;
     30 import com.ibm.icu.text.DecimalFormat;
     31 import com.ibm.icu.text.MessageFormat;
     32 import com.ibm.icu.text.PluralRules;
     33 import com.ibm.icu.text.PluralRules.FixedDecimal;
     34 import com.ibm.icu.util.Output;
     35 import com.ibm.icu.util.ULocale;
     36 
     37 public class GeneratePluralRanges {
     38     public GeneratePluralRanges(SupplementalDataInfo supplementalDataInfo) {
     39         SUPPLEMENTAL = supplementalDataInfo;
     40         prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
     41     }
     42 
     43     private static final boolean MINIMAL = true;
     44 
     45     public static void main(String[] args) {
     46         CLDRConfig testInfo = ToolConfig.getToolInstance();
     47         GeneratePluralRanges me = new GeneratePluralRanges(testInfo.getSupplementalDataInfo());
     48         me.reformatPluralRanges();
     49         //me.generateSamples(testInfo.getEnglish(), testInfo.getCldrFactory());
     50     }
     51 
     52     private void generateSamples(CLDRFile english, Factory factory) {
     53         //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns();
     54         // add all the items with plural ranges
     55         Set<String> sorted = new TreeSet<String>(SUPPLEMENTAL.getPluralRangesLocales());
     56         // add the core locales
     57 //        sorted.addAll(StandardCodes.make().getLocaleCoverageLocales("google", EnumSet.of(Level.MODERN)));
     58         sorted.addAll(StandardCodes.make().getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN)));
     59         // add any variant plural forms
     60         LanguageTagParser ltp = new LanguageTagParser();
     61         for (String locale : SUPPLEMENTAL.getPluralLocales()) {
     62             if (locale.contains("_")) {
     63                 if (sorted.contains(ltp.set(locale).getLanguage())) {
     64                     sorted.add(locale);
     65                 }
     66             }
     67         }
     68         //sorted.add("fil");
     69         System.out.println("Co.\tLocale Name\tStart\tEnd\tResult\tStart Sample\tEnd Sample\tStart Example\tEnd Example\tCombined Example");
     70         for (String locale : sorted) {
     71             PluralInfo pluralInfo3 = SUPPLEMENTAL.getPlurals(locale);
     72             if (locale.contains("_")) {
     73                 PluralInfo pluralInfo2 = SUPPLEMENTAL.getPlurals(ltp.set(locale).getLanguage());
     74                 if (pluralInfo2.equals(pluralInfo3)) {
     75                     continue;
     76                 }
     77             }
     78 
     79             Set<Count> counts3 = pluralInfo3.getCounts();
     80             if (counts3.size() == 1) {
     81                 continue; // skip japanese, etc.
     82             }
     83 
     84             List<RangeSample> list = getRangeInfo(factory.make(locale, true));
     85             if (list == null) {
     86                 System.out.println("Failure with " + locale);
     87                 continue;
     88             }
     89             for (RangeSample rangeSample : list) {
     90                 System.out.println(locale + "\t" + english.getName(locale)
     91                     + "\t" + rangeSample.start
     92                     + "\t" + rangeSample.end
     93                     + "\t" + (rangeSample.result == null ? "missing" : rangeSample.result)
     94                     + "\t" + rangeSample.min
     95                     + "\t" + rangeSample.max
     96                     + "\t" + rangeSample.startExample
     97                     + "\t" + rangeSample.endExample
     98                     + "\t" + rangeSample.resultExample);
     99             }
    100         }
    101     }
    102 
    103     public List<RangeSample> getRangeInfo(CLDRFile cldrFile) {
    104         String locale = cldrFile.getLocaleID();
    105         if (locale.equals("iw")) {
    106             locale = "he";
    107         }
    108         //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns();
    109         List<RangeSample> list = new ArrayList<RangeSample>();
    110         PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
    111         Set<Count> counts = pluralInfo.getCounts();
    112         PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
    113         if (pluralRanges == null && locale.contains("_")) {
    114             String locale2 = new ULocale(locale).getLanguage();
    115             pluralRanges = SUPPLEMENTAL.getPluralRanges(locale2);
    116         }
    117         if (pluralRanges == null) {
    118             return null;
    119         }
    120         ULocale ulocale = new ULocale(locale);
    121         PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(ulocale.toString()); // CldrUtility.get(samples, ulocale);
    122 //        if (samplePatterns == null && locale.contains("_")) {
    123 //            ulocale = new ULocale(ulocale.getLanguage());
    124 //            samplePatterns = CldrUtility.get(samples, ulocale);
    125 //            if (samplePatterns == null) {
    126 //                return null;
    127 //            }
    128 //        }
    129 
    130         Output<FixedDecimal> maxSample = new Output<FixedDecimal>();
    131         Output<FixedDecimal> minSample = new Output<FixedDecimal>();
    132 
    133         ICUServiceBuilder icusb = new ICUServiceBuilder();
    134         icusb.setCldrFile(cldrFile);
    135         DecimalFormat nf = icusb.getNumberFormat(1);
    136         //String decimal = cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal");
    137         String defaultNumberingSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem");
    138         String range = cldrFile.getWinningValue("//ldml/numbers/miscPatterns[@numberSystem=\""
    139             + defaultNumberingSystem
    140             + "\"]/pattern[@type=\"range\"]");
    141 
    142         //            if (decimal == null) {
    143         //                throw new IllegalArgumentException();
    144         //            }
    145         for (Count s : counts) {
    146             for (Count e : counts) {
    147                 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) {
    148                     continue;
    149                 }
    150                 Count r = pluralRanges.getExplicit(s, e);
    151                 String minFormatted = format(nf, minSample.value);
    152                 String maxFormatted = format(nf, maxSample.value);
    153                 String rangeFormatted = MessageFormat.format(range, minFormatted, maxFormatted);
    154 
    155                 list.add(new RangeSample(
    156                     s, e, r,
    157                     minSample.value,
    158                     maxSample.value,
    159                     getExample(locale, samplePatterns, s, minFormatted), getExample(locale, samplePatterns, e, maxFormatted),
    160                     getExample(locale, samplePatterns, r, rangeFormatted)));
    161             }
    162         }
    163         return list;
    164     }
    165 
    166     public static class RangeSample {
    167         // Category Examples    Minimal Pairs   Rules
    168         public RangeSample(Count start, Count end, Count result,
    169             FixedDecimal min, FixedDecimal max,
    170             String startExample, String endExample, String resultExample) {
    171             this.start = start;
    172             this.end = end;
    173             this.result = result;
    174             this.min = min;
    175             this.max = max;
    176             this.startExample = startExample;
    177             this.endExample = endExample;
    178             this.resultExample = resultExample;
    179         }
    180 
    181         final Count start;
    182         final Count end;
    183         final Count result;
    184         final FixedDecimal min;
    185         final FixedDecimal max;
    186         final String startExample;
    187         final String endExample;
    188         final String resultExample;
    189     }
    190 
    191     public static String format(DecimalFormat nf, FixedDecimal minSample) {
    192         nf.setMinimumFractionDigits(minSample.getVisibleDecimalDigitCount());
    193         nf.setMaximumFractionDigits(minSample.getVisibleDecimalDigitCount());
    194         return nf.format(minSample);
    195     }
    196 
    197     //    private String format(String decimal, Output<FixedDecimal> minSample) {
    198     //        return minSample.toString().replace(".", decimal);
    199     //    }
    200 
    201     public static String getExample(String locale, PluralMinimalPairs samplePatterns, Count r, String numString) {
    202         if (r == null) {
    203             return "missing";
    204         }
    205         String samplePattern;
    206         try {
    207             samplePattern = samplePatterns.get(PluralRules.PluralType.CARDINAL, r); // CldrUtility.get(samplePatterns.keywordToPattern, r);
    208         } catch (Exception e) {
    209             throw new IllegalArgumentException("Locale: " + locale + "; Count: " + r, e);
    210         }
    211         return samplePattern
    212             .replace('\u00A0', '\u0020')
    213             .replace("{0}", numString);
    214     }
    215 
    216     private final SupplementalDataInfo SUPPLEMENTAL;
    217     private final PluralRulesFactory prf;
    218 
    219     public static final Comparator<Set<String>> STRING_SET_COMPARATOR = new SetComparator<String, Set<String>>();
    220     public static final Comparator<Set<Count>> COUNT_SET_COMPARATOR = new SetComparator<Count, Set<Count>>();
    221 
    222     static final class SetComparator<T extends Comparable<T>, U extends Set<T>> implements Comparator<U> {
    223         public int compare(U o1, U o2) {
    224             return CollectionUtilities.compare((Collection<T>) o1, (Collection<T>) o2);
    225         }
    226     };
    227 
    228     public void reformatPluralRanges() {
    229         Map<Set<Count>, Relation<Set<String>, String>> seen = new TreeMap<Set<Count>, Relation<Set<String>, String>>(COUNT_SET_COMPARATOR);
    230 
    231         for (String locale : SUPPLEMENTAL.getPluralRangesLocales()) {
    232 
    233             PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
    234             PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
    235             Set<Count> counts = pluralInfo.getCounts();
    236 
    237             Set<String> s;
    238             if (false) {
    239                 System.out.println("Minimized, but not ready for prime-time");
    240                 s = minimize(pluralRanges, pluralInfo);
    241             } else {
    242                 s = reformat(pluralRanges, counts);
    243             }
    244             Relation<Set<String>, String> item = seen.get(counts);
    245             if (item == null) {
    246                 seen.put(counts,
    247                     item = Relation.of(new TreeMap<Set<String>, Set<String>>(STRING_SET_COMPARATOR), TreeSet.class));
    248             }
    249             item.put(s, locale);
    250         }
    251 
    252         for (Entry<Set<Count>, Relation<Set<String>, String>> entry0 : seen.entrySet()) {
    253             System.out.println("\n<!-- " + CollectionUtilities.join(entry0.getKey(), ", ") + " -->");
    254             for (Entry<Set<String>, Set<String>> entry : entry0.getValue().keyValuesSet()) {
    255                 System.out.println("\t\t<pluralRanges locales=\"" + CollectionUtilities.join(entry.getValue(), " ") + "\">");
    256                 for (String line : entry.getKey()) {
    257                     System.out.println("\t\t\t" + line);
    258                 }
    259                 System.out.println("\t\t</pluralRanges>");
    260             }
    261         }
    262     }
    263 
    264     enum RangeStrategy {
    265         other, end, start, mixed
    266     }
    267 
    268     public Set<String> reformat(PluralRanges pluralRanges, Set<Count> counts) {
    269         Set<String> s;
    270         s = new LinkedHashSet<String>();
    271         // first determine the general principle
    272 
    273         //        EnumSet<RangeStrategy> strategy = EnumSet.allOf(RangeStrategy.class);
    274         //        Count firstResult = null;
    275         //        for (Count start : counts) {
    276         //            for (Count end : counts) {
    277         //                Count result = pluralRanges.getExplicit(start, end);
    278         //                if (result == null) {
    279         //                    continue;
    280         //                } else if (firstResult == null) {
    281         //                    firstResult = result;
    282         //                }
    283         //                if (result != start) {
    284         //                    strategy.remove(RangeStrategy.start);
    285         //                }
    286         //                if (result != end) {
    287         //                    strategy.remove(RangeStrategy.end);
    288         //                }
    289         //                if (result != Count.other) {
    290         //                    strategy.remove(RangeStrategy.other);
    291         //                }
    292         //           }
    293         //        }
    294         //        s.add("<!-- Range Principle: " + strategy.iterator().next() + " -->");
    295         for (Count start : counts) {
    296             for (Count end : counts) {
    297                 Count result = pluralRanges.getExplicit(start, end);
    298                 if (result == null) {
    299                     continue;
    300                 }
    301                 String line = PluralRanges.showRange(start, end, result);
    302                 s.add(line);
    303             }
    304         }
    305         return s;
    306     }
    307 
    308     Set<String> minimize(PluralRanges pluralRanges, PluralInfo pluralInfo) {
    309         Set<String> result = new LinkedHashSet<String>();
    310         // make it easier to manage
    311         PluralRanges.Matrix matrix = new PluralRanges.Matrix();
    312         Output<FixedDecimal> maxSample = new Output<FixedDecimal>();
    313         Output<FixedDecimal> minSample = new Output<FixedDecimal>();
    314         for (Count s : Count.VALUES) {
    315             for (Count e : Count.VALUES) {
    316                 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) {
    317                     continue;
    318                 }
    319                 Count r = pluralRanges.getExplicit(s, e);
    320                 matrix.set(s, e, r);
    321             }
    322         }
    323         // if everything is 'other', we are done
    324         //        if (allOther == true) {
    325         //            return result;
    326         //        }
    327         EnumSet<Count> endDone = EnumSet.noneOf(Count.class);
    328         EnumSet<Count> startDone = EnumSet.noneOf(Count.class);
    329         if (MINIMAL) {
    330             for (Count end : pluralInfo.getCounts()) {
    331                 Count r = matrix.endSame(end);
    332                 if (r != null
    333                 //&& r != Count.other
    334                 ) {
    335                     result.add("<pluralRange" +
    336                         "              \t\tend=\"" + end
    337                         + "\"\tresult=\"" + r + "\"/>");
    338                     endDone.add(end);
    339                 }
    340             }
    341             Output<Boolean> emit = new Output<Boolean>();
    342             for (Count start : pluralInfo.getCounts()) {
    343                 Count r = matrix.startSame(start, endDone, emit);
    344                 if (r != null
    345                 // && r != Count.other
    346                 ) {
    347                     if (emit.value) {
    348                         result.add("<pluralRange" +
    349                             "\tstart=\"" + start
    350                             + "\"          \t\tresult=\"" + r + "\"/>");
    351                     }
    352                     startDone.add(start);
    353                 }
    354             }
    355         }
    356         //Set<String> skip = new LinkedHashSet<String>();
    357         for (Count end : pluralInfo.getCounts()) {
    358             if (endDone.contains(end)) {
    359                 continue;
    360             }
    361             for (Count start : pluralInfo.getCounts()) {
    362                 if (startDone.contains(start)) {
    363                     continue;
    364                 }
    365                 Count r = matrix.get(start, end);
    366                 if (r != null
    367                 //&& !(MINIMAL && r == Count.other)
    368                 ) {
    369                     result.add(PluralRanges.showRange(start, end, r));
    370                 } else {
    371                     result.add("<!-- <pluralRange" +
    372                         "\tstart=\"" + start
    373                         + "\" \tend=\"" + end
    374                         + "\" \tresult=\"" + r + "\"/> -->");
    375 
    376                 }
    377 
    378             }
    379         }
    380         return result;
    381     }
    382 
    383 }
    384