Home | History | Annotate | Download | only in text
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2013-2015, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.text;
     10 
     11 import java.util.ArrayList;
     12 import java.util.Collection;
     13 import java.util.Collections;
     14 import java.util.HashMap;
     15 import java.util.HashSet;
     16 import java.util.LinkedHashSet;
     17 import java.util.List;
     18 import java.util.Map;
     19 import java.util.Map.Entry;
     20 import java.util.Set;
     21 import java.util.TreeSet;
     22 
     23 import com.ibm.icu.text.PluralRules.FixedDecimal;
     24 import com.ibm.icu.text.PluralRules.KeywordStatus;
     25 import com.ibm.icu.util.Output;
     26 
     27 /**
     28  * @author markdavis
     29  * Refactor samples as first step to moving into CLDR
     30  *
     31  * @internal
     32  * @deprecated This API is ICU internal only.
     33  */
     34 @Deprecated
     35 public class PluralSamples {
     36 
     37     private PluralRules pluralRules;
     38     private final Map<String, List<Double>> _keySamplesMap;
     39 
     40     /**
     41      * @internal
     42      * @deprecated This API is ICU internal only.
     43      */
     44     @Deprecated
     45     public final Map<String, Boolean> _keyLimitedMap;
     46     private final Map<String, Set<FixedDecimal>> _keyFractionSamplesMap;
     47     private final Set<FixedDecimal> _fractionSamples;
     48 
     49     /**
     50      * @internal
     51      * @deprecated This API is ICU internal only.
     52      */
     53     @Deprecated
     54     public PluralSamples(PluralRules pluralRules) {
     55         this.pluralRules = pluralRules;
     56         Set<String> keywords = pluralRules.getKeywords();
     57         // ensure both _keySamplesMap and _keyLimitedMap are initialized.
     58         // If this were allowed to vary on a per-call basis, we'd have to recheck and
     59         // possibly rebuild the samples cache.  Doesn't seem worth it.
     60         // This 'max samples' value only applies to keywords that are unlimited, for
     61         // other keywords all the matching values are returned.  This might be a lot.
     62         final int MAX_SAMPLES = 3;
     63 
     64         Map<String, Boolean> temp = new HashMap<String, Boolean>();
     65         for (String k : keywords) {
     66             temp.put(k, pluralRules.isLimited(k));
     67         }
     68         _keyLimitedMap = temp;
     69 
     70         Map<String, List<Double>> sampleMap = new HashMap<String, List<Double>>();
     71         int keywordsRemaining = keywords.size();
     72 
     73         int limit = 128; // Math.max(5, getRepeatLimit() * MAX_SAMPLES) * 2;
     74 
     75         for (int i = 0; keywordsRemaining > 0 && i < limit; ++i) {
     76             keywordsRemaining = addSimpleSamples(pluralRules, MAX_SAMPLES, sampleMap, keywordsRemaining, i / 2.0);
     77         }
     78         // Hack for Celtic
     79         keywordsRemaining = addSimpleSamples(pluralRules, MAX_SAMPLES, sampleMap, keywordsRemaining, 1000000);
     80 
     81 
     82         // collect explicit samples
     83         Map<String, Set<FixedDecimal>> sampleFractionMap = new HashMap<String, Set<FixedDecimal>>();
     84         Set<FixedDecimal> mentioned = new TreeSet<FixedDecimal>();
     85         // make sure that there is at least one 'other' value
     86         Map<String, Set<FixedDecimal>> foundKeywords = new HashMap<String, Set<FixedDecimal>>();
     87         for (FixedDecimal s : mentioned) {
     88             String keyword = pluralRules.select(s);
     89             addRelation(foundKeywords, keyword, s);
     90         }
     91         main:
     92             if (foundKeywords.size() != keywords.size()) {
     93                 for (int i = 1; i < 1000; ++i) {
     94                     boolean done = addIfNotPresent(i, mentioned, foundKeywords);
     95                     if (done) break main;
     96                 }
     97                 // if we are not done, try tenths
     98                 for (int i = 10; i < 1000; ++i) {
     99                     boolean done = addIfNotPresent(i/10d, mentioned, foundKeywords);
    100                     if (done) break main;
    101                 }
    102                 System.out.println("Failed to find sample for each keyword: " + foundKeywords + "\n\t" + pluralRules + "\n\t" + mentioned);
    103             }
    104         mentioned.add(new FixedDecimal(0)); // always there
    105         mentioned.add(new FixedDecimal(1)); // always there
    106         mentioned.add(new FixedDecimal(2)); // always there
    107         mentioned.add(new FixedDecimal(0.1,1)); // always there
    108         mentioned.add(new FixedDecimal(1.99,2)); // always there
    109         mentioned.addAll(fractions(mentioned));
    110         for (FixedDecimal s : mentioned) {
    111             String keyword = pluralRules.select(s);
    112             Set<FixedDecimal> list = sampleFractionMap.get(keyword);
    113             if (list == null) {
    114                 list = new LinkedHashSet<FixedDecimal>(); // will be sorted because the iteration is
    115                 sampleFractionMap.put(keyword, list);
    116             }
    117             list.add(s);
    118         }
    119 
    120         if (keywordsRemaining > 0) {
    121             for (String k : keywords) {
    122                 if (!sampleMap.containsKey(k)) {
    123                     sampleMap.put(k, Collections.<Double>emptyList());
    124                 }
    125                 if (!sampleFractionMap.containsKey(k)) {
    126                     sampleFractionMap.put(k, Collections.<FixedDecimal>emptySet());
    127                 }
    128             }
    129         }
    130 
    131         // Make lists immutable so we can return them directly
    132         for (Entry<String, List<Double>> entry : sampleMap.entrySet()) {
    133             sampleMap.put(entry.getKey(), Collections.unmodifiableList(entry.getValue()));
    134         }
    135         for (Entry<String, Set<FixedDecimal>> entry : sampleFractionMap.entrySet()) {
    136             sampleFractionMap.put(entry.getKey(), Collections.unmodifiableSet(entry.getValue()));
    137         }
    138         _keySamplesMap = sampleMap;
    139         _keyFractionSamplesMap = sampleFractionMap;
    140         _fractionSamples = Collections.unmodifiableSet(mentioned);
    141     }
    142 
    143     private int addSimpleSamples(PluralRules pluralRules, final int MAX_SAMPLES, Map<String, List<Double>> sampleMap,
    144             int keywordsRemaining, double val) {
    145         String keyword = pluralRules.select(val);
    146         boolean keyIsLimited = _keyLimitedMap.get(keyword);
    147 
    148         List<Double> list = sampleMap.get(keyword);
    149         if (list == null) {
    150             list = new ArrayList<Double>(MAX_SAMPLES);
    151             sampleMap.put(keyword, list);
    152         } else if (!keyIsLimited && list.size() == MAX_SAMPLES) {
    153             return keywordsRemaining;
    154         }
    155         list.add(Double.valueOf(val));
    156 
    157         if (!keyIsLimited && list.size() == MAX_SAMPLES) {
    158             --keywordsRemaining;
    159         }
    160         return keywordsRemaining;
    161     }
    162 
    163     private void addRelation(Map<String, Set<FixedDecimal>> foundKeywords, String keyword, FixedDecimal s) {
    164         Set<FixedDecimal> set = foundKeywords.get(keyword);
    165         if (set == null) {
    166             foundKeywords.put(keyword, set = new HashSet<FixedDecimal>());
    167         }
    168         set.add(s);
    169     }
    170 
    171     private boolean addIfNotPresent(double d, Set<FixedDecimal> mentioned, Map<String, Set<FixedDecimal>> foundKeywords) {
    172         FixedDecimal numberInfo = new FixedDecimal(d);
    173         String keyword = pluralRules.select(numberInfo);
    174         if (!foundKeywords.containsKey(keyword) || keyword.equals("other")) {
    175             addRelation(foundKeywords, keyword, numberInfo);
    176             mentioned.add(numberInfo);
    177             if (keyword.equals("other")) {
    178                 if (foundKeywords.get("other").size() > 1) {
    179                     return true;
    180                 }
    181             }
    182         }
    183         return false;
    184     }
    185 
    186     private static final int[] TENS = {1, 10, 100, 1000, 10000, 100000, 1000000};
    187 
    188     private static final int LIMIT_FRACTION_SAMPLES = 3;
    189 
    190 
    191     private Set<FixedDecimal> fractions(Set<FixedDecimal> original) {
    192         Set<FixedDecimal> toAddTo = new HashSet<FixedDecimal>();
    193 
    194         Set<Integer> result = new HashSet<Integer>();
    195         for (FixedDecimal base1 : original) {
    196             result.add((int)base1.integerValue);
    197         }
    198         List<Integer> ints = new ArrayList<Integer>(result);
    199         Set<String> keywords = new HashSet<String>();
    200 
    201         for (int j = 0; j < ints.size(); ++j) {
    202             Integer base = ints.get(j);
    203             String keyword = pluralRules.select(base);
    204             if (keywords.contains(keyword)) {
    205                 continue;
    206             }
    207             keywords.add(keyword);
    208             toAddTo.add(new FixedDecimal(base,1)); // add .0
    209             toAddTo.add(new FixedDecimal(base,2)); // add .00
    210             Integer fract = getDifferentCategory(ints, keyword);
    211             if (fract >= TENS[LIMIT_FRACTION_SAMPLES-1]) { // make sure that we always get the value
    212                 toAddTo.add(new FixedDecimal(base + "." + fract));
    213             } else {
    214                 for (int visibleFractions = 1; visibleFractions < LIMIT_FRACTION_SAMPLES; ++visibleFractions) {
    215                     for (int i = 1; i <= visibleFractions; ++i) {
    216                         // with visible fractions = 3, and fract = 1, then we should get x.10, 0.01
    217                         // with visible fractions = 3, and fract = 15, then we should get x.15, x.15
    218                         if (fract >= TENS[i]) {
    219                             continue;
    220                         }
    221                         toAddTo.add(new FixedDecimal(base + fract/(double)TENS[i], visibleFractions));
    222                     }
    223                 }
    224             }
    225         }
    226         return toAddTo;
    227     }
    228 
    229     private Integer getDifferentCategory(List<Integer> ints, String keyword) {
    230         for (int i = ints.size() - 1; i >= 0; --i) {
    231             Integer other = ints.get(i);
    232             String keywordOther = pluralRules.select(other);
    233             if (!keywordOther.equals(keyword)) {
    234                 return other;
    235             }
    236         }
    237         return 37;
    238     }
    239 
    240     /**
    241      * @internal
    242      * @deprecated This API is ICU internal only.
    243      */
    244     @Deprecated
    245     public KeywordStatus getStatus(String keyword, int offset, Set<Double> explicits, Output<Double> uniqueValue) {
    246         if (uniqueValue != null) {
    247             uniqueValue.value = null;
    248         }
    249 
    250         if (!pluralRules.getKeywords().contains(keyword)) {
    251             return KeywordStatus.INVALID;
    252         }
    253         Collection<Double> values = pluralRules.getAllKeywordValues(keyword);
    254         if (values == null) {
    255             return KeywordStatus.UNBOUNDED;
    256         }
    257         int originalSize = values.size();
    258 
    259         if (explicits == null) {
    260             explicits = Collections.emptySet();
    261         }
    262 
    263         // Quick check on whether there are multiple elements
    264 
    265         if (originalSize > explicits.size()) {
    266             if (originalSize == 1) {
    267                 if (uniqueValue != null) {
    268                     uniqueValue.value = values.iterator().next();
    269                 }
    270                 return KeywordStatus.UNIQUE;
    271             }
    272             return KeywordStatus.BOUNDED;
    273         }
    274 
    275         // Compute if the quick test is insufficient.
    276 
    277         HashSet<Double> subtractedSet = new HashSet<Double>(values);
    278         for (Double explicit : explicits) {
    279             subtractedSet.remove(explicit - offset);
    280         }
    281         if (subtractedSet.size() == 0) {
    282             return KeywordStatus.SUPPRESSED;
    283         }
    284 
    285         if (uniqueValue != null && subtractedSet.size() == 1) {
    286             uniqueValue.value = subtractedSet.iterator().next();
    287         }
    288 
    289         return originalSize == 1 ? KeywordStatus.UNIQUE : KeywordStatus.BOUNDED;
    290     }
    291 
    292     Map<String, List<Double>> getKeySamplesMap() {
    293         return _keySamplesMap;
    294     }
    295 
    296     Map<String, Set<FixedDecimal>> getKeyFractionSamplesMap() {
    297         return _keyFractionSamplesMap;
    298     }
    299 
    300     Set<FixedDecimal> getFractionSamples() {
    301         return _fractionSamples;
    302     }
    303 
    304     /**
    305      * Returns all the values that trigger this keyword, or null if the number of such
    306      * values is unlimited.
    307      *
    308      * @param keyword the keyword
    309      * @return the values that trigger this keyword, or null.  The returned collection
    310      * is immutable. It will be empty if the keyword is not defined.
    311      * @stable ICU 4.8
    312      */
    313 
    314     Collection<Double> getAllKeywordValues(String keyword) {
    315         // HACK for now
    316         if (!pluralRules.getKeywords().contains(keyword)) {
    317             return Collections.<Double>emptyList();
    318         }
    319         Collection<Double> result = getKeySamplesMap().get(keyword);
    320 
    321         // We depend on MAX_SAMPLES here.  It's possible for a conjunction
    322         // of unlimited rules that 'looks' unlimited to return a limited
    323         // number of values.  There's no bounds to this limited number, in
    324         // general, because you can construct arbitrarily complex rules.  Since
    325         // we always generate 3 samples if a rule is really unlimited, that's
    326         // where we put the cutoff.
    327         if (result.size() > 2 && !_keyLimitedMap.get(keyword)) {
    328             return null;
    329         }
    330         return result;
    331     }
    332 }
    333