Home | History | Annotate | Download | only in util
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  ****************************************************************************************
      5  * Copyright (C) 2009-2016, Google, Inc.; International Business Machines Corporation
      6  * and others. All Rights Reserved.
      7  ****************************************************************************************
      8  */
      9 package com.ibm.icu.util;
     10 
     11 import java.util.HashMap;
     12 import java.util.HashSet;
     13 import java.util.Iterator;
     14 import java.util.LinkedHashMap;
     15 import java.util.LinkedHashSet;
     16 import java.util.Map;
     17 import java.util.Map.Entry;
     18 import java.util.Set;
     19 import java.util.regex.Matcher;
     20 import java.util.regex.Pattern;
     21 
     22 import com.ibm.icu.impl.ICUData;
     23 import com.ibm.icu.impl.ICUResourceBundle;
     24 import com.ibm.icu.impl.Relation;
     25 import com.ibm.icu.impl.Row;
     26 import com.ibm.icu.impl.Row.R3;
     27 import com.ibm.icu.impl.Utility;
     28 import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
     29 import com.ibm.icu.impl.locale.XLocaleMatcher;
     30 import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
     31 
     32 /**
     33  * Provides a way to match the languages (locales) supported by a product to the
     34  * languages (locales) acceptable to a user, and get the best match. For
     35  * example:
     36  *
     37  * <pre>
     38  * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en");
     39  *
     40  * // afterwards:
     41  * matcher.getBestMatch("en-US").toLanguageTag() =&gt; "en"
     42  * </pre>
     43  *
     44  * It takes into account when languages are close to one another, such as fil
     45  * and tl, and when language regional variants are close, like en-GB and en-AU.
     46  * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test
     47  * file.
     48  * <p>All classes implementing this interface should be immutable. Often a
     49  * product will just need one static instance, built with the languages
     50  * that it supports. However, it may want multiple instances with different
     51  * default languages based on additional information, such as the domain.
     52  *
     53  * @author markdavis (at) google.com
     54  * @stable ICU 4.4
     55  */
     56 public class LocaleMatcher {
     57 
     58     /**
     59      * @internal
     60      * @deprecated This API is ICU internal only.
     61      */
     62     @Deprecated
     63     public static final boolean DEBUG = false;
     64 
     65     private static final ULocale UNKNOWN_LOCALE = new ULocale("und");
     66 
     67     /**
     68      * Threshold for falling back to the default (first) language. May make this
     69      * a parameter in the future.
     70      */
     71     private static final double DEFAULT_THRESHOLD = 0.5;
     72 
     73     /**
     74      * The default language, in case the threshold is not met.
     75      */
     76     private final ULocale defaultLanguage;
     77 
     78     /**
     79      * The default language, in case the threshold is not met.
     80      */
     81     private final double threshold;
     82 
     83     /**
     84      * Create a new language matcher. The highest-weighted language is the
     85      * default. That means that if no other language is matches closer than a given
     86      * threshold, that default language is chosen. Typically the default is English,
     87      * but it could be different based on additional information, such as the domain
     88      * of the page.
     89      *
     90      * @param languagePriorityList weighted list
     91      * @stable ICU 4.4
     92      */
     93     public LocaleMatcher(LocalePriorityList languagePriorityList) {
     94         this(languagePriorityList, defaultWritten);
     95     }
     96 
     97     /**
     98      * Create a new language matcher from a String form. The highest-weighted
     99      * language is the default.
    100      *
    101      * @param languagePriorityListString String form of LanguagePriorityList
    102      * @stable ICU 4.4
    103      */
    104     public LocaleMatcher(String languagePriorityListString) {
    105         this(LocalePriorityList.add(languagePriorityListString).build());
    106     }
    107 
    108     /**
    109      * Internal testing function; may expose API later.
    110      * @param languagePriorityList LocalePriorityList to match
    111      * @param matcherData Internal matching data
    112      * @internal
    113      * @deprecated This API is ICU internal only.
    114      */
    115     @Deprecated
    116     public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) {
    117         this(languagePriorityList, matcherData, DEFAULT_THRESHOLD);
    118     }
    119 
    120     /**
    121      * Internal testing function; may expose API later.
    122      * @param languagePriorityList LocalePriorityList to match
    123      * @param matcherData Internal matching data
    124      * @internal
    125      * @deprecated This API is ICU internal only.
    126      */
    127     @Deprecated
    128     public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
    129         this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze();
    130         this.languagePriorityList = languagePriorityList;
    131         for (final ULocale language : languagePriorityList) {
    132             add(language, languagePriorityList.getWeight(language));
    133         }
    134         processMapping();
    135         Iterator<ULocale> it = languagePriorityList.iterator();
    136         defaultLanguage = it.hasNext() ? it.next() : null;
    137         this.threshold = threshold;
    138     }
    139 
    140 
    141     /**
    142      * Returns a fraction between 0 and 1, where 1 means that the languages are a
    143      * perfect match, and 0 means that they are completely different. Note that
    144      * the precise values may change over time; no code should be made dependent
    145      * on the values remaining constant.
    146      * @param desired Desired locale
    147      * @param desiredMax Maximized locale (using likely subtags)
    148      * @param supported Supported locale
    149      * @param supportedMax Maximized locale (using likely subtags)
    150      * @return value between 0 and 1, inclusive.
    151      * @stable ICU 4.4
    152      */
    153     public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
    154         return matcherData.match(desired, desiredMax, supported, supportedMax);
    155     }
    156 
    157 
    158     /**
    159      * Canonicalize a locale (language). Note that for now, it is canonicalizing
    160      * according to CLDR conventions (he vs iw, etc), since that is what is needed
    161      * for likelySubtags.
    162      * @param ulocale language/locale code
    163      * @return ULocale with remapped subtags.
    164      * @stable ICU 4.4
    165      */
    166     public ULocale canonicalize(ULocale ulocale) {
    167         // TODO Get the data from CLDR, use Java conventions.
    168         String lang = ulocale.getLanguage();
    169         String lang2 = canonicalMap.get(lang);
    170         String script = ulocale.getScript();
    171         String script2 = canonicalMap.get(script);
    172         String region = ulocale.getCountry();
    173         String region2 = canonicalMap.get(region);
    174         if (lang2 != null || script2 != null || region2 != null) {
    175             return new ULocale(
    176                 lang2 == null ? lang : lang2,
    177                     script2 == null ? script : script2,
    178                         region2 == null ? region : region2
    179                 );
    180         }
    181         return ulocale;
    182     }
    183 
    184     /**
    185      * Get the best match for a LanguagePriorityList
    186      *
    187      * @param languageList list to match
    188      * @return best matching language code
    189      * @stable ICU 4.4
    190      */
    191     public ULocale getBestMatch(LocalePriorityList languageList) {
    192         double bestWeight = 0;
    193         ULocale bestTableMatch = null;
    194         double penalty = 0;
    195         OutputDouble matchWeight = new OutputDouble();
    196         for (final ULocale language : languageList) {
    197             final ULocale matchLocale = getBestMatchInternal(language, matchWeight);
    198             final double weight = matchWeight.value * languageList.getWeight(language) - penalty;
    199             if (weight > bestWeight) {
    200                 bestWeight = weight;
    201                 bestTableMatch = matchLocale;
    202             }
    203             penalty += 0.07000001;
    204         }
    205         if (bestWeight < threshold) {
    206             bestTableMatch = defaultLanguage;
    207         }
    208         return bestTableMatch;
    209     }
    210 
    211     /**
    212      * Convenience method: Get the best match for a LanguagePriorityList
    213      *
    214      * @param languageList String form of language priority list
    215      * @return best matching language code
    216      * @stable ICU 4.4
    217      */
    218     public ULocale getBestMatch(String languageList) {
    219         return getBestMatch(LocalePriorityList.add(languageList).build());
    220     }
    221 
    222     /**
    223      * Get the best match for an individual language code.
    224      *
    225      * @param ulocale locale/language code to match
    226      * @return best matching language code
    227      * @stable ICU 4.4
    228      */
    229     public ULocale getBestMatch(ULocale ulocale) {
    230         return getBestMatchInternal(ulocale, null);
    231     }
    232 
    233     /**
    234      * @internal
    235      * @deprecated This API is ICU internal only.
    236      */
    237     @Deprecated
    238     public ULocale getBestMatch(ULocale... ulocales) {
    239         return getBestMatch(LocalePriorityList.add(ulocales).build());
    240     }
    241 
    242     /**
    243      * {@inheritDoc}
    244      * @stable ICU 4.4
    245      */
    246     @Override
    247     public String toString() {
    248         return "{" + defaultLanguage + ", "
    249             + localeToMaxLocaleAndWeight + "}";
    250     }
    251     // ================= Privates =====================
    252 
    253     /**
    254      * Get the best match for an individual language code.
    255      *
    256      * @param languageCode
    257      * @return best matching language code and weight (as per
    258      *         {@link #match(ULocale, ULocale)})
    259      */
    260     private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) {
    261         languageCode = canonicalize(languageCode);
    262         final ULocale maximized = addLikelySubtags(languageCode);
    263         if (DEBUG) {
    264             System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized);
    265         }
    266         double bestWeight = 0;
    267         ULocale bestTableMatch = null;
    268         String baseLanguage = maximized.getLanguage();
    269         Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage);
    270         if (searchTable != null) { // we preprocessed the table so as to filter by lanugage
    271             if (DEBUG) System.out.println("\tSearching: " + searchTable);
    272             for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) {
    273                 ULocale tableKey = tableKeyValue.get0();
    274                 ULocale maxLocale = tableKeyValue.get1();
    275                 Double matchedWeight = tableKeyValue.get2();
    276                 final double match = match(languageCode, maximized, tableKey, maxLocale);
    277                 if (DEBUG) {
    278                     System.out.println("\t" + tableKeyValue + ";\t" + match + "\n");
    279                 }
    280                 final double weight = match * matchedWeight;
    281                 if (weight > bestWeight) {
    282                     bestWeight = weight;
    283                     bestTableMatch = tableKey;
    284                     if (weight > 0.999d) { // bail on good enough match.
    285                         break;
    286                     }
    287                 }
    288             }
    289         }
    290         if (bestWeight < threshold) {
    291             bestTableMatch = defaultLanguage;
    292         }
    293         if (outputWeight != null) {
    294             outputWeight.value = bestWeight; // only return the weight when needed
    295         }
    296         return bestTableMatch;
    297     }
    298 
    299     /**
    300      * @internal
    301      * @deprecated This API is ICU internal only.
    302      */
    303     @Deprecated
    304     private static class OutputDouble { // TODO, move to where OutputInt is
    305         double value;
    306     }
    307 
    308     private void add(ULocale language, Double weight) {
    309         language = canonicalize(language);
    310         R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight);
    311         row.freeze();
    312         localeToMaxLocaleAndWeight.add(row);
    313     }
    314 
    315     /**
    316      * We preprocess the data to get just the possible matches for each desired base language.
    317      */
    318     private void processMapping() {
    319         for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) {
    320             String desired = desiredToMatchingLanguages.getKey();
    321             Set<String> supported = desiredToMatchingLanguages.getValue();
    322             for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
    323                 final ULocale key = localeToMaxAndWeight.get0();
    324                 String lang = key.getLanguage();
    325                 if (supported.contains(lang)) {
    326                     addFiltered(desired, localeToMaxAndWeight);
    327                 }
    328             }
    329         }
    330         // now put in the values directly, since languages always map to themselves
    331         for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
    332             final ULocale key = localeToMaxAndWeight.get0();
    333             String lang = key.getLanguage();
    334             addFiltered(lang, localeToMaxAndWeight);
    335         }
    336     }
    337 
    338     private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) {
    339         Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired);
    340         if (map == null) {
    341             desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<R3<ULocale, ULocale, Double>>());
    342         }
    343         map.add(localeToMaxAndWeight);
    344         if (DEBUG) {
    345             System.out.println(desired + ", " + localeToMaxAndWeight);
    346         }
    347     }
    348 
    349     Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<Row.R3<ULocale, ULocale, Double>>();
    350     Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData
    351     = new LinkedHashMap<String,Set<Row.R3<ULocale, ULocale, Double>>>();
    352 
    353     // =============== Special Mapping Information ==============
    354 
    355     /**
    356      * We need to add another method to addLikelySubtags that doesn't return
    357      * null, but instead substitutes Zzzz and ZZ if unknown. There are also
    358      * a few cases where addLikelySubtags needs to have expanded data, to handle
    359      * all deprecated codes.
    360      * @param languageCode
    361      * @return "fixed" addLikelySubtags
    362      */
    363     private ULocale addLikelySubtags(ULocale languageCode) {
    364         // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined
    365         // language would normally match English.  But that would produce the counterintuitive results
    366         // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and
    367         // getBestMatch("en", LocaleMatcher("it,und")) would be "und".
    368         //
    369         // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults)
    370         // so that max("und")="und". That produces the following, more desirable results:
    371         if (languageCode.equals(UNKNOWN_LOCALE)) {
    372             return UNKNOWN_LOCALE;
    373         }
    374         final ULocale result = ULocale.addLikelySubtags(languageCode);
    375         // should have method on getLikelySubtags for this
    376         if (result == null || result.equals(languageCode)) {
    377             final String language = languageCode.getLanguage();
    378             final String script = languageCode.getScript();
    379             final String region = languageCode.getCountry();
    380             return new ULocale((language.length()==0 ? "und"
    381                 : language)
    382                 + "_"
    383                 + (script.length()==0 ? "Zzzz" : script)
    384                 + "_"
    385                 + (region.length()==0 ? "ZZ" : region));
    386         }
    387         return result;
    388     }
    389 
    390     private static class LocalePatternMatcher {
    391         // a value of null means a wildcard; matches any.
    392         private String lang;
    393         private String script;
    394         private String region;
    395         private Level level;
    396         static Pattern pattern = Pattern.compile(
    397             "([a-z]{1,8}|\\*)"
    398                 + "(?:[_-]([A-Z][a-z]{3}|\\*))?"
    399                 + "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?");
    400 
    401         public LocalePatternMatcher(String toMatch) {
    402             Matcher matcher = pattern.matcher(toMatch);
    403             if (!matcher.matches()) {
    404                 throw new IllegalArgumentException("Bad pattern: " + toMatch);
    405             }
    406             lang = matcher.group(1);
    407             script = matcher.group(2);
    408             region = matcher.group(3);
    409             level = region != null ? Level.region : script != null ? Level.script : Level.language;
    410 
    411             if (lang.equals("*")) {
    412                 lang = null;
    413             }
    414             if (script != null && script.equals("*")) {
    415                 script = null;
    416             }
    417             if (region != null && region.equals("*")) {
    418                 region = null;
    419             }
    420         }
    421 
    422         boolean matches(ULocale ulocale) {
    423             if (lang != null && !lang.equals(ulocale.getLanguage())) {
    424                 return false;
    425             }
    426             if (script != null && !script.equals(ulocale.getScript())) {
    427                 return false;
    428             }
    429             if (region != null && !region.equals(ulocale.getCountry())) {
    430                 return false;
    431             }
    432             return true;
    433         }
    434 
    435         public Level getLevel() {
    436             return level;
    437         }
    438 
    439         public String getLanguage() {
    440             return (lang == null ? "*" : lang);
    441         }
    442 
    443         public String getScript() {
    444             return (script == null ? "*" : script);
    445         }
    446 
    447         public String getRegion() {
    448             return (region == null ? "*" : region);
    449         }
    450 
    451         @Override
    452         public String toString() {
    453             String result = getLanguage();
    454             if (level != Level.language) {
    455                 result += "-" + getScript();
    456                 if (level != Level.script) {
    457                     result += "-" + getRegion();
    458                 }
    459             }
    460             return result;
    461         }
    462 
    463         /* (non-Javadoc)
    464          * @see java.lang.Object#equals(java.lang.Object)
    465          */
    466         @Override
    467         public boolean equals(Object obj) {
    468             if (obj == this) {
    469                 return true;
    470             }
    471             if (obj == null || !(obj instanceof LocalePatternMatcher)) {
    472                 return false;
    473             }
    474             LocalePatternMatcher other = (LocalePatternMatcher) obj;
    475             return Utility.objectEquals(level, other.level)
    476                 && Utility.objectEquals(lang, other.lang)
    477                 && Utility.objectEquals(script, other.script)
    478                 && Utility.objectEquals(region, other.region);
    479         }
    480 
    481         /* (non-Javadoc)
    482          * @see java.lang.Object#hashCode()
    483          */
    484         @Override
    485         public int hashCode() {
    486             return level.ordinal()
    487                 ^ (lang == null ? 0 : lang.hashCode())
    488                 ^ (script == null ? 0 : script.hashCode())
    489                 ^ (region == null ? 0 : region.hashCode());
    490         }
    491     }
    492 
    493     enum Level {
    494         language(0.99),
    495         script(0.2),
    496         region(0.04);
    497 
    498         final double worst;
    499 
    500         Level(double d) {
    501             worst = d;
    502         }
    503     }
    504 
    505     private static class ScoreData implements Freezable<ScoreData> {
    506         @SuppressWarnings("unused")
    507         private static final double maxUnequal_changeD_sameS = 0.5;
    508 
    509         @SuppressWarnings("unused")
    510         private static final double maxUnequal_changeEqual = 0.75;
    511 
    512         LinkedHashSet<Row.R3<LocalePatternMatcher,LocalePatternMatcher,Double>> scores = new LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>();
    513         final Level level;
    514 
    515         public ScoreData(Level level) {
    516             this.level = level;
    517         }
    518 
    519         void addDataToScores(String desired, String supported, R3<LocalePatternMatcher,LocalePatternMatcher,Double> data) {
    520             //            Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desired);
    521             //            if (lang_result == null) {
    522             //                scores.put(desired, lang_result = new HashMap());
    523             //            }
    524             //            Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supported);
    525             //            if (result == null) {
    526             //                lang_result.put(supported, result = new LinkedHashSet());
    527             //            }
    528             //            result.add(data);
    529             boolean added = scores.add(data);
    530             if (!added) {
    531                 throw new ICUException("trying to add duplicate data: " +  data);
    532             }
    533         }
    534 
    535         double getScore(ULocale dMax, String desiredRaw, String desiredMax,
    536             ULocale sMax, String supportedRaw, String supportedMax) {
    537             double distance = 0;
    538             if (!desiredMax.equals(supportedMax)) {
    539                 distance = getRawScore(dMax, sMax);
    540             } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal
    541                 distance += 0.001;
    542             }
    543             return distance;
    544         }
    545 
    546         private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) {
    547             if (DEBUG) {
    548                 System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale);
    549             }
    550             for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result
    551                 if (datum.get0().matches(desiredLocale)
    552                     && datum.get1().matches(supportedLocale)) {
    553                     if (DEBUG) {
    554                         System.out.println("\t\t\t\tFOUND\t" + datum);
    555                     }
    556                     return datum.get2();
    557                 }
    558             }
    559             if (DEBUG) {
    560                 System.out.println("\t\t\t\tNOTFOUND\t" + level.worst);
    561             }
    562             return level.worst;
    563         }
    564 
    565         @Override
    566         public String toString() {
    567             StringBuilder result = new StringBuilder().append(level);
    568             for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) {
    569                 result.append("\n\t\t").append(score);
    570             }
    571             return result.toString();
    572         }
    573 
    574 
    575         @Override
    576         @SuppressWarnings("unchecked")
    577         public ScoreData cloneAsThawed() {
    578             try {
    579                 ScoreData result = (ScoreData) clone();
    580                 result.scores = (LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>) result.scores.clone();
    581                 result.frozen = false;
    582                 return result;
    583             } catch (CloneNotSupportedException e) {
    584                 throw new ICUCloneNotSupportedException(e); // will never happen
    585             }
    586 
    587         }
    588 
    589         private volatile boolean frozen = false;
    590 
    591         @Override
    592         public ScoreData freeze() {
    593             return this;
    594         }
    595 
    596         @Override
    597         public boolean isFrozen() {
    598             return frozen;
    599         }
    600 
    601         public Relation<String,String> getMatchingLanguages() {
    602             Relation<String,String> desiredToSupported = Relation.of(new LinkedHashMap<String,Set<String>>(), HashSet.class);
    603             for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) {
    604                 LocalePatternMatcher desired = item.get0();
    605                 LocalePatternMatcher supported = item.get1();
    606                 if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance
    607                     desiredToSupported.put(desired.lang, supported.lang);
    608                 }
    609             }
    610             desiredToSupported.freeze();
    611             return desiredToSupported;
    612         }
    613     }
    614 
    615     /**
    616      * Only for testing and use by tools. Interface may change!!
    617      * @internal
    618      * @deprecated This API is ICU internal only.
    619      */
    620     @Deprecated
    621     public static class LanguageMatcherData implements Freezable<LanguageMatcherData> {
    622         private ScoreData languageScores = new ScoreData(Level.language);
    623         private ScoreData scriptScores = new ScoreData(Level.script);
    624         private ScoreData regionScores = new ScoreData(Level.region);
    625         private Relation<String, String> matchingLanguages;
    626         private volatile boolean frozen = false;
    627 
    628 
    629         /**
    630          * @internal
    631          * @deprecated This API is ICU internal only.
    632          */
    633         @Deprecated
    634         public LanguageMatcherData() {
    635         }
    636 
    637         /**
    638          * @internal
    639          * @deprecated This API is ICU internal only.
    640          */
    641         @Deprecated
    642         public Relation<String, String> matchingLanguages() {
    643             return matchingLanguages;
    644         }
    645 
    646         /**
    647          * @internal
    648          * @deprecated This API is ICU internal only.
    649          */
    650         @Override
    651         @Deprecated
    652         public String toString() {
    653             return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores;
    654         }
    655 
    656         /**
    657          * @internal
    658          * @deprecated This API is ICU internal only.
    659          */
    660         @Deprecated
    661         public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) {
    662             double diff = 0;
    663             diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage());
    664             if (diff > 0.999d) { // with no language match, we bail
    665                 return 0.0d;
    666             }
    667             diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript());
    668             diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry());
    669 
    670             if (!a.getVariant().equals(b.getVariant())) {
    671                 diff += 0.01;
    672             }
    673             if (diff < 0.0d) {
    674                 diff = 0.0d;
    675             } else if (diff > 1.0d) {
    676                 diff = 1.0d;
    677             }
    678             if (DEBUG) {
    679                 System.out.println("\t\t\tTotal Distance\t" + diff);
    680             }
    681             return 1.0 - diff;
    682         }
    683 
    684         /**
    685          * @internal
    686          * @deprecated This API is ICU internal only.
    687          */
    688         @Deprecated
    689         public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) {
    690             return addDistance(desired, supported, percent, false, comment);
    691         }
    692         /**
    693          * @internal
    694          * @deprecated This API is ICU internal only.
    695          */
    696         @Deprecated
    697         public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) {
    698             return addDistance(desired, supported, percent, oneway, null);
    699         }
    700 
    701         private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) {
    702             if (DEBUG) {
    703                 System.out.println("\t<languageMatch desired=\"" + desired + "\"" +
    704                     " supported=\"" + supported + "\"" +
    705                     " percent=\"" + percent + "\""
    706                     + (oneway ? " oneway=\"true\"" : "")
    707                     + "/>"
    708                     + (comment == null ? "" : "\t<!-- " + comment + " -->"));
    709                 //                    //     .addDistance("nn", "nb", 4, true)
    710                 //                        System.out.println(".addDistance(\"" + desired + "\"" +
    711                 //                                ", \"" + supported + "\"" +
    712                 //                                ", " + percent + ""
    713                 //                                + (oneway ? "" : ", true")
    714                 //                                + (comment == null ? "" : ", \"" + comment + "\"")
    715                 //                                + ")"
    716                 //                        );
    717 
    718             }
    719             double score = 1-percent/100.0; // convert from percentage
    720             LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired);
    721             Level desiredLen = desiredMatcher.getLevel();
    722             LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported);
    723             Level supportedLen = supportedMatcher.getLevel();
    724             if (desiredLen != supportedLen) {
    725                 throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported);
    726             }
    727             R3<LocalePatternMatcher,LocalePatternMatcher,Double> data = Row.of(desiredMatcher, supportedMatcher, score);
    728             R3<LocalePatternMatcher,LocalePatternMatcher,Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score);
    729             boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher);
    730             switch (desiredLen) {
    731             case language:
    732                 String dlanguage = desiredMatcher.getLanguage();
    733                 String slanguage = supportedMatcher.getLanguage();
    734                 languageScores.addDataToScores(dlanguage, slanguage, data);
    735                 if (!oneway && !desiredEqualsSupported) {
    736                     languageScores.addDataToScores(slanguage, dlanguage, data2);
    737                 }
    738                 break;
    739             case script:
    740                 String dscript = desiredMatcher.getScript();
    741                 String sscript = supportedMatcher.getScript();
    742                 scriptScores.addDataToScores(dscript, sscript, data);
    743                 if (!oneway && !desiredEqualsSupported) {
    744                     scriptScores.addDataToScores(sscript, dscript, data2);
    745                 }
    746                 break;
    747             case region:
    748                 String dregion = desiredMatcher.getRegion();
    749                 String sregion = supportedMatcher.getRegion();
    750                 regionScores.addDataToScores(dregion, sregion, data);
    751                 if (!oneway && !desiredEqualsSupported) {
    752                     regionScores.addDataToScores(sregion, dregion, data2);
    753                 }
    754                 break;
    755             }
    756             return this;
    757         }
    758 
    759         /**
    760          * {@inheritDoc}
    761          * @internal
    762          * @deprecated This API is ICU internal only.
    763          */
    764         @Override
    765         @Deprecated
    766         public LanguageMatcherData cloneAsThawed() {
    767             LanguageMatcherData result;
    768             try {
    769                 result = (LanguageMatcherData) clone();
    770                 result.languageScores = languageScores.cloneAsThawed();
    771                 result.scriptScores = scriptScores.cloneAsThawed();
    772                 result.regionScores = regionScores.cloneAsThawed();
    773                 result.frozen = false;
    774                 return result;
    775             } catch (CloneNotSupportedException e) {
    776                 throw new ICUCloneNotSupportedException(e); // will never happen
    777             }
    778         }
    779 
    780         /**
    781          * {@inheritDoc}
    782          * @internal
    783          * @deprecated This API is ICU internal only.
    784          */
    785         @Override
    786         @Deprecated
    787         public LanguageMatcherData freeze() {
    788             languageScores.freeze();
    789             regionScores.freeze();
    790             scriptScores.freeze();
    791             matchingLanguages = languageScores.getMatchingLanguages();
    792             frozen = true;
    793             return this;
    794         }
    795 
    796         /**
    797          * {@inheritDoc}
    798          * @internal
    799          * @deprecated This API is ICU internal only.
    800          */
    801         @Override
    802         @Deprecated
    803         public boolean isFrozen() {
    804             return frozen;
    805         }
    806     }
    807 
    808     LanguageMatcherData matcherData;
    809     LocalePriorityList languagePriorityList;
    810 
    811     private static final LanguageMatcherData defaultWritten;
    812 
    813     private static HashMap<String,String> canonicalMap = new HashMap<String, String>();
    814 
    815 
    816     static {
    817         canonicalMap.put("iw", "he");
    818         canonicalMap.put("mo", "ro");
    819         canonicalMap.put("tl", "fil");
    820 
    821         ICUResourceBundle suppData = getICUSupplementalData();
    822         ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching");
    823         ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written");
    824         defaultWritten = new LanguageMatcherData();
    825 
    826         for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
    827             ICUResourceBundle item = (ICUResourceBundle) iter.next();
    828             /*
    829             "*_*_*",
    830             "*_*_*",
    831             "96",
    832              */
    833             // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" />
    834             boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
    835             defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway);
    836         }
    837         defaultWritten.freeze();
    838     }
    839 
    840     /**
    841      * @internal
    842      * @deprecated This API is ICU internal only.
    843      */
    844     @Deprecated
    845     public static ICUResourceBundle getICUSupplementalData() {
    846         ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance(
    847             ICUData.ICU_BASE_NAME,
    848             "supplementalData",
    849             ICUResourceBundle.ICU_DATA_CLASS_LOADER);
    850         return suppData;
    851     }
    852 
    853     /**
    854      * @internal
    855      * @deprecated This API is ICU internal only.
    856      */
    857     @Deprecated
    858     public static double match(ULocale a, ULocale b) {
    859         final LocaleMatcher matcher = new LocaleMatcher("");
    860         return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b));
    861     }
    862 
    863     transient XLocaleMatcher xLocaleMatcher = null;
    864     transient ULocale xDefaultLanguage = null;
    865     transient boolean xFavorScript = false;
    866 
    867     /**
    868      * Returns the distance between the two languages, using the new CLDR syntax (see getBestMatch).
    869      * The values are not necessarily symmetric.
    870      * @param desired A locale desired by the user
    871      * @param supported A locale supported by a program.
    872      * @return A return of 0 is a complete match, and 100 is a complete mismatch (above the thresholdDistance).
    873      * A language is first maximized with add likely subtags, then compared.
    874      * @internal
    875      * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
    876      */
    877     @Deprecated
    878     public int distance(ULocale desired, ULocale supported) {
    879         return getLocaleMatcher().distance(desired, supported);
    880     }
    881 
    882     private synchronized XLocaleMatcher getLocaleMatcher() {
    883         if (xLocaleMatcher == null) {
    884             Builder builder = XLocaleMatcher.builder();
    885             builder.setSupportedLocales(languagePriorityList);
    886             if (xDefaultLanguage != null) {
    887                 builder.setDefaultLanguage(xDefaultLanguage);
    888             }
    889             if (xFavorScript) {
    890                 builder.setDistanceOption(DistanceOption.SCRIPT_FIRST);
    891             }
    892             xLocaleMatcher = builder.build();
    893         }
    894         return xLocaleMatcher;
    895     }
    896 
    897     /**
    898      * Get the best match between the desired languages and supported languages
    899      * This supports the new CLDR syntax to provide for better matches within
    900      * regional clusters (such as maghreb Arabic vs non-maghreb Arabic, or regions that use en-GB vs en-US)
    901      * and also matching between regions and macroregions, such as comparing es-419 to es-AR).
    902      * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
    903      * @param outputBestDesired The one of the desired languages that matched best.
    904      * Set to null if the best match was not below the threshold distance.
    905      * @return best-match supported language
    906      * @internal
    907      * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
    908      */
    909     @Deprecated
    910     public ULocale getBestMatch(LinkedHashSet<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
    911         return getLocaleMatcher().getBestMatch(desiredLanguages, outputBestDesired);
    912     }
    913 
    914     /**
    915      * Set the default language, with null = default = first supported language
    916      * @param defaultLanguage Language to use in case the threshold for distance is exceeded.
    917      * @return this, for chaining
    918      * @internal
    919      * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
    920      */
    921     @Deprecated
    922     public synchronized LocaleMatcher setDefaultLanguage(ULocale defaultLanguage) {
    923         this.xDefaultLanguage = defaultLanguage;
    924         xLocaleMatcher = null;
    925         return this;
    926     }
    927 
    928     /**
    929      * If true, then the language differences are smaller than than script differences.
    930      * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
    931      * @param favorScript Set to true to treat script as most important.
    932      * @return this, for chaining.
    933      * @internal
    934      * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
    935      */
    936     @Deprecated
    937     public synchronized LocaleMatcher setFavorScript(boolean favorScript) {
    938         this.xFavorScript = favorScript;
    939         xLocaleMatcher = null;
    940         return this;
    941     }
    942 }
    943