Home | History | Annotate | Download | only in locale
      1 //  2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 package com.ibm.icu.impl.locale;
      4 
      5 import java.util.Arrays;
      6 import java.util.Collection;
      7 import java.util.LinkedHashSet;
      8 import java.util.Map;
      9 import java.util.Map.Entry;
     10 import java.util.Set;
     11 
     12 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
     13 import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
     14 import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
     15 import com.ibm.icu.impl.locale.XCldrStub.Multimap;
     16 import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
     17 import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
     18 import com.ibm.icu.util.LocalePriorityList;
     19 import com.ibm.icu.util.Output;
     20 import com.ibm.icu.util.ULocale;
     21 
     22 /**
     23  * Immutable class that picks best match between user's desired locales and application's supported locales.
     24  * @author markdavis
     25  */
     26 public class XLocaleMatcher {
     27     private static final LSR UND = new LSR("und","","");
     28     private static final ULocale UND_LOCALE = new ULocale("und");
     29 
     30     // normally the default values, but can be set via constructor
     31 
     32     private final XLocaleDistance localeDistance;
     33     private final int thresholdDistance;
     34     private final int demotionPerAdditionalDesiredLocale;
     35     private final DistanceOption distanceOption;
     36 
     37     // built based on application's supported languages in constructor
     38 
     39     private final Map<LSR, Set<ULocale>> supportedLanguages; // the locales in the collection are ordered!
     40     private final Set<ULocale> exactSupportedLocales; // the locales in the collection are ordered!
     41     private final ULocale defaultLanguage;
     42 
     43 
     44     public static class Builder {
     45         private Set<ULocale> supportedLanguagesList;
     46         private int thresholdDistance = -1;
     47         private int demotionPerAdditionalDesiredLocale = -1;;
     48         private ULocale defaultLanguage;
     49         private XLocaleDistance localeDistance;
     50         private DistanceOption distanceOption;
     51         /**
     52          * @param languagePriorityList the languagePriorityList to set
     53          * @return this Builder object
     54          */
     55         public Builder setSupportedLocales(String languagePriorityList) {
     56             this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build());
     57             return this;
     58         }
     59         public Builder setSupportedLocales(LocalePriorityList languagePriorityList) {
     60             this.supportedLanguagesList = asSet(languagePriorityList);
     61             return this;
     62         }
     63         public Builder setSupportedLocales(Set<ULocale> languagePriorityList) {
     64             this.supportedLanguagesList = languagePriorityList;
     65             return this;
     66         }
     67 
     68         /**
     69          * @param thresholdDistance the thresholdDistance to set, with -1 = default
     70          * @return this Builder object
     71          */
     72         public Builder setThresholdDistance(int thresholdDistance) {
     73             this.thresholdDistance = thresholdDistance;
     74             return this;
     75         }
     76         /**
     77          * @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default
     78          * @return this Builder object
     79          */
     80         public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) {
     81             this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale;
     82             return this;
     83         }
     84 
     85         /**
     86          * @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault().
     87          * @return this Builder object
     88          */
     89         public Builder setLocaleDistance(XLocaleDistance localeDistance) {
     90             this.localeDistance = localeDistance;
     91             return this;
     92         }
     93 
     94         /**
     95          * Set the default language, with null = default = first supported language
     96          * @param defaultLanguage the default language
     97          * @return this Builder object
     98          */
     99         public Builder setDefaultLanguage(ULocale defaultLanguage) {
    100             this.defaultLanguage = defaultLanguage;
    101             return this;
    102         }
    103 
    104         /**
    105          * If true, then the language differences are smaller than than script differences.
    106          * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
    107          * @param distanceOption the distance option
    108          * @return this Builder object
    109          */
    110         public Builder setDistanceOption(DistanceOption distanceOption) {
    111             this.distanceOption = distanceOption;
    112             return this;
    113         }
    114 
    115         public XLocaleMatcher build() {
    116             return new XLocaleMatcher(this);
    117         }
    118     }
    119 
    120     /**
    121      * Returns a builder used in chaining parameters for building a Locale Matcher.
    122      * @return this Builder object
    123      */
    124     public static Builder builder() {
    125         return new Builder();
    126     }
    127 
    128     /** Convenience method */
    129     public XLocaleMatcher(String supportedLocales) {
    130         this(builder().setSupportedLocales(supportedLocales));
    131     }
    132     /** Convenience method */
    133     public XLocaleMatcher(LocalePriorityList supportedLocales) {
    134         this(builder().setSupportedLocales(supportedLocales));
    135     }
    136     /** Convenience method */
    137     public XLocaleMatcher(Set<ULocale> supportedLocales) {
    138         this(builder().setSupportedLocales(supportedLocales));
    139     }
    140 
    141     /**
    142      * Create a locale matcher with the given parameters.
    143      * @param supportedLocales
    144      * @param thresholdDistance
    145      * @param demotionPerAdditionalDesiredLocale
    146      * @param localeDistance
    147      * @param likelySubtags
    148      */
    149     private XLocaleMatcher(Builder builder) {
    150         localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault()
    151             : builder.localeDistance;
    152         thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance()
    153             : builder.thresholdDistance;
    154         // only do AFTER above are set
    155         Set<LSR> paradigms = extractLsrSet(localeDistance.getParadigms());
    156         final Multimap<LSR, ULocale> temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms);
    157         supportedLanguages = temp2.asMap();
    158         exactSupportedLocales = ImmutableSet.copyOf(temp2.values());
    159         defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage
    160             : supportedLanguages.isEmpty() ? null
    161                 : supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language
    162         demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1
    163             : builder.demotionPerAdditionalDesiredLocale;
    164         distanceOption = builder.distanceOption;
    165     }
    166 
    167     // Result is not immutable!
    168     private Set<LSR> extractLsrSet(Set<ULocale> languagePriorityList) {
    169         Set<LSR> result = new LinkedHashSet<LSR>();
    170         for (ULocale item : languagePriorityList) {
    171             final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
    172             result.add(max);
    173         }
    174         return result;
    175     }
    176 
    177     private Multimap<LSR,ULocale> extractLsrMap(Set<ULocale> languagePriorityList, Set<LSR> priorities) {
    178         Multimap<LSR, ULocale> builder = LinkedHashMultimap.create();
    179         for (ULocale item : languagePriorityList) {
    180             final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
    181             builder.put(max, item);
    182         }
    183         if (builder.size() > 1 && priorities != null) {
    184             // for the supported list, we put any priorities before all others, except for the first.
    185             Multimap<LSR, ULocale> builder2 = LinkedHashMultimap.create();
    186 
    187             // copy the long way so the priorities are in the same order as in the original
    188             boolean first = true;
    189             for (Entry<LSR, Set<ULocale>> entry : builder.asMap().entrySet()) {
    190                 final LSR key = entry.getKey();
    191                 if (first || priorities.contains(key)) {
    192                     builder2.putAll(key, entry.getValue());
    193                     first = false;
    194                 }
    195             }
    196             // now copy the rest
    197             builder2.putAll(builder);
    198             if (!builder2.equals(builder)) {
    199                 throw new IllegalArgumentException();
    200             }
    201             builder = builder2;
    202         }
    203         return ImmutableMultimap.copyOf(builder);
    204     }
    205 
    206 
    207     /** Convenience method */
    208     public ULocale getBestMatch(ULocale ulocale) {
    209         return getBestMatch(ulocale, null);
    210     }
    211     /** Convenience method */
    212     public ULocale getBestMatch(String languageList) {
    213         return getBestMatch(LocalePriorityList.add(languageList).build(), null);
    214     }
    215     /** Convenience method */
    216     public ULocale getBestMatch(ULocale... locales) {
    217         return getBestMatch(new LinkedHashSet<ULocale>(Arrays.asList(locales)), null);
    218     }
    219     /** Convenience method */
    220     public ULocale getBestMatch(Set<ULocale> desiredLanguages) {
    221         return getBestMatch(desiredLanguages, null);
    222     }
    223     /** Convenience method */
    224     public ULocale getBestMatch(LocalePriorityList desiredLanguages) {
    225         return getBestMatch(desiredLanguages, null);
    226     }
    227     /** Convenience method */
    228     public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output<ULocale> outputBestDesired) {
    229         return getBestMatch(asSet(desiredLanguages), outputBestDesired);
    230     }
    231 
    232     // TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList
    233     private static Set<ULocale> asSet(LocalePriorityList languageList) {
    234         Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
    235         for (ULocale locale : languageList) {
    236             temp.add(locale);
    237         };
    238         return temp;
    239     }
    240 
    241     /**
    242      * Get the best match between the desired languages and supported languages
    243      * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
    244      * @param outputBestDesired The one of the desired languages that matched best.
    245      * Set to null if the best match was not below the threshold distance.
    246      * @return the best match.
    247      */
    248     public ULocale getBestMatch(Set<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
    249         // fast path for singleton
    250         if (desiredLanguages.size() == 1) {
    251             return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired);
    252         }
    253         // TODO produce optimized version for single desired ULocale
    254         Multimap<LSR, ULocale> desiredLSRs = extractLsrMap(desiredLanguages,null);
    255         int bestDistance = Integer.MAX_VALUE;
    256         ULocale bestDesiredLocale = null;
    257         Collection<ULocale> bestSupportedLocales = null;
    258         int delta = 0;
    259         mainLoop:
    260             for (final Entry<LSR, ULocale> desiredLsrAndLocale : desiredLSRs.entries()) {
    261                 // quick check for exact match
    262                 ULocale desiredLocale = desiredLsrAndLocale.getValue();
    263                 LSR desiredLSR = desiredLsrAndLocale.getKey();
    264                 if (delta < bestDistance) {
    265                     if (exactSupportedLocales.contains(desiredLocale)) {
    266                         if (outputBestDesired != null) {
    267                             outputBestDesired.value = desiredLocale;
    268                         }
    269                         return desiredLocale;
    270                     }
    271                     // quick check for maximized locale
    272                     Collection<ULocale> found = supportedLanguages.get(desiredLSR);
    273                     if (found != null) {
    274                         // if we find one in the set, return first (lowest). We already know the exact one isn't there.
    275                         if (outputBestDesired != null) {
    276                             outputBestDesired.value = desiredLocale;
    277                         }
    278                         return found.iterator().next();
    279                     }
    280                 }
    281                 for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
    282                     int distance = delta + localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
    283                         thresholdDistance, distanceOption);
    284                     if (distance < bestDistance) {
    285                         bestDistance = distance;
    286                         bestDesiredLocale = desiredLocale;
    287                         bestSupportedLocales = supportedLsrAndLocale.getValue();
    288                         if (distance == 0) {
    289                             break mainLoop;
    290                         }
    291                     }
    292                 }
    293                 delta += demotionPerAdditionalDesiredLocale;
    294             }
    295         if (bestDistance >= thresholdDistance) {
    296             if (outputBestDesired != null) {
    297                 outputBestDesired.value = null;
    298             }
    299             return defaultLanguage;
    300         }
    301         if (outputBestDesired != null) {
    302             outputBestDesired.value = bestDesiredLocale;
    303         }
    304         // pick exact match if there is one
    305         if (bestSupportedLocales.contains(bestDesiredLocale)) {
    306             return bestDesiredLocale;
    307         }
    308         // otherwise return first supported, combining variants and extensions from bestDesired
    309         return bestSupportedLocales.iterator().next();
    310     }
    311 
    312     /**
    313      * Get the best match between the desired languages and supported languages
    314      * @param desiredLocale the supplied user's language.
    315      * @param outputBestDesired The one of the desired languages that matched best.
    316      * Set to null if the best match was not below the threshold distance.
    317      * @return the best match.
    318      */
    319     public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
    320         int bestDistance = Integer.MAX_VALUE;
    321         ULocale bestDesiredLocale = null;
    322         Collection<ULocale> bestSupportedLocales = null;
    323 
    324         // quick check for exact match, with hack for und
    325         final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale);
    326 
    327         if (exactSupportedLocales.contains(desiredLocale)) {
    328             if (outputBestDesired != null) {
    329                 outputBestDesired.value = desiredLocale;
    330             }
    331             return desiredLocale;
    332         }
    333         // quick check for maximized locale
    334         if (distanceOption == DistanceOption.NORMAL) {
    335             Collection<ULocale> found = supportedLanguages.get(desiredLSR);
    336             if (found != null) {
    337                 // if we find one in the set, return first (lowest). We already know the exact one isn't there.
    338                 if (outputBestDesired != null) {
    339                     outputBestDesired.value = desiredLocale;
    340                 }
    341                 return found.iterator().next();
    342             }
    343         }
    344         for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
    345             int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
    346                 thresholdDistance, distanceOption);
    347             if (distance < bestDistance) {
    348                 bestDistance = distance;
    349                 bestDesiredLocale = desiredLocale;
    350                 bestSupportedLocales = supportedLsrAndLocale.getValue();
    351                 if (distance == 0) {
    352                     break;
    353                 }
    354             }
    355         }
    356         if (bestDistance >= thresholdDistance) {
    357             if (outputBestDesired != null) {
    358                 outputBestDesired.value = null;
    359             }
    360             return defaultLanguage;
    361         }
    362         if (outputBestDesired != null) {
    363             outputBestDesired.value = bestDesiredLocale;
    364         }
    365         // pick exact match if there is one
    366         if (bestSupportedLocales.contains(bestDesiredLocale)) {
    367             return bestDesiredLocale;
    368         }
    369         // otherwise return first supported, combining variants and extensions from bestDesired
    370         return bestSupportedLocales.iterator().next();
    371     }
    372 
    373     /** Combine features of the desired locale into those of the supported, and return result. */
    374     public static ULocale combine(ULocale bestSupported, ULocale bestDesired) {
    375         // for examples of extensions, variants, see
    376         //  http://unicode.org/repos/cldr/tags/latest/common/bcp47/
    377         //  http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
    378 
    379         if (!bestSupported.equals(bestDesired) && bestDesired != null) {
    380             // add region, variants, extensions
    381             ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported);
    382 
    383             // copy the region from the desired, if there is one
    384             String region = bestDesired.getCountry();
    385             if (!region.isEmpty()) {
    386                 b.setRegion(region);
    387             }
    388 
    389             // copy the variants from desired, if there is one
    390             // note that this will override any subvariants. Eg "sco-ulster-fonipa" + "-fonupa" => "sco-fonupa" (nuking ulster)
    391             String variants = bestDesired.getVariant();
    392             if (!variants.isEmpty()) {
    393                 b.setVariant(variants);
    394             }
    395 
    396             // copy the extensions from desired, if there are any
    397             // note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "-u-nu-native" => "th-u-nu-native" (nuking calendar)
    398             for (char extensionKey : bestDesired.getExtensionKeys()) {
    399                 b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
    400             }
    401             bestSupported = b.build();
    402         }
    403         return bestSupported;
    404     }
    405 
    406     /** Returns the distance between the two languages. The values are not necessarily symmetric.
    407      * @param desired A locale desired by the user
    408      * @param supported A locale supported by a program.
    409      * @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance).
    410      * A language is first maximized with add likely subtags, then compared.
    411      */
    412     public int distance(ULocale desired, ULocale supported) {
    413         return localeDistance.distanceRaw(
    414             LSR.fromMaximalized(desired),
    415             LSR.fromMaximalized(supported), thresholdDistance, distanceOption);
    416     }
    417 
    418     /** Convenience method */
    419     public int distance(String desiredLanguage, String supportedLanguage) {
    420         return localeDistance.distanceRaw(
    421             LSR.fromMaximalized(new ULocale(desiredLanguage)),
    422             LSR.fromMaximalized(new ULocale(supportedLanguage)),
    423             thresholdDistance, distanceOption);
    424     }
    425 
    426     @Override
    427     public String toString() {
    428         return exactSupportedLocales.toString();
    429     }
    430 
    431     /** Return the inverse of the distance: that is, 1-distance(desired, supported) */
    432     public double match(ULocale desired, ULocale supported) {
    433         return (100-distance(desired, supported))/100.0;
    434     }
    435 
    436     /**
    437      * Returns a fraction between 0 and 1, where 1 means that the languages are a
    438      * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
    439      * <br>Note that
    440      * the precise values may change over time; no code should be made dependent
    441      * on the values remaining constant.
    442      * @param desired Desired locale
    443      * @param desiredMax Maximized locale (using likely subtags)
    444      * @param supported Supported locale
    445      * @param supportedMax Maximized locale (using likely subtags)
    446      * @return value between 0 and 1, inclusive.
    447      * @deprecated Use the form with 2 parameters instead.
    448      */
    449     @Deprecated
    450     public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
    451         return match(desired, supported);
    452     }
    453 
    454     /**
    455      * Canonicalize a locale (language). Note that for now, it is canonicalizing
    456      * according to CLDR conventions (he vs iw, etc), since that is what is needed
    457      * for likelySubtags.
    458      * @param ulocale language/locale code
    459      * @return ULocale with remapped subtags.
    460      * @stable ICU 4.4
    461      */
    462     public ULocale canonicalize(ULocale ulocale) {
    463         // TODO
    464         return null;
    465     }
    466 
    467     /**
    468      * @return the thresholdDistance. Any distance above this value is treated as a match failure.
    469      */
    470     public int getThresholdDistance() {
    471         return thresholdDistance;
    472     }
    473 }
    474