1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 **************************************************************************************** 5 * Copyright (C) 2009-2016, Google, Inc.; International Business Machines Corporation 6 * and others. All Rights Reserved. 7 **************************************************************************************** 8 */ 9 package com.ibm.icu.util; 10 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.Iterator; 14 import java.util.LinkedHashMap; 15 import java.util.LinkedHashSet; 16 import java.util.Map; 17 import java.util.Map.Entry; 18 import java.util.Set; 19 import java.util.regex.Matcher; 20 import java.util.regex.Pattern; 21 22 import com.ibm.icu.impl.ICUData; 23 import com.ibm.icu.impl.ICUResourceBundle; 24 import com.ibm.icu.impl.Relation; 25 import com.ibm.icu.impl.Row; 26 import com.ibm.icu.impl.Row.R3; 27 import com.ibm.icu.impl.Utility; 28 import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption; 29 import com.ibm.icu.impl.locale.XLocaleMatcher; 30 import com.ibm.icu.impl.locale.XLocaleMatcher.Builder; 31 32 /** 33 * Provides a way to match the languages (locales) supported by a product to the 34 * languages (locales) acceptable to a user, and get the best match. For 35 * example: 36 * 37 * <pre> 38 * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en"); 39 * 40 * // afterwards: 41 * matcher.getBestMatch("en-US").toLanguageTag() => "en" 42 * </pre> 43 * 44 * It takes into account when languages are close to one another, such as fil 45 * and tl, and when language regional variants are close, like en-GB and en-AU. 46 * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test 47 * file. 48 * <p>All classes implementing this interface should be immutable. Often a 49 * product will just need one static instance, built with the languages 50 * that it supports. However, it may want multiple instances with different 51 * default languages based on additional information, such as the domain. 52 * 53 * @author markdavis (at) google.com 54 * @stable ICU 4.4 55 */ 56 public class LocaleMatcher { 57 58 /** 59 * @internal 60 * @deprecated This API is ICU internal only. 61 */ 62 @Deprecated 63 public static final boolean DEBUG = false; 64 65 private static final ULocale UNKNOWN_LOCALE = new ULocale("und"); 66 67 /** 68 * Threshold for falling back to the default (first) language. May make this 69 * a parameter in the future. 70 */ 71 private static final double DEFAULT_THRESHOLD = 0.5; 72 73 /** 74 * The default language, in case the threshold is not met. 75 */ 76 private final ULocale defaultLanguage; 77 78 /** 79 * The default language, in case the threshold is not met. 80 */ 81 private final double threshold; 82 83 /** 84 * Create a new language matcher. The highest-weighted language is the 85 * default. That means that if no other language is matches closer than a given 86 * threshold, that default language is chosen. Typically the default is English, 87 * but it could be different based on additional information, such as the domain 88 * of the page. 89 * 90 * @param languagePriorityList weighted list 91 * @stable ICU 4.4 92 */ 93 public LocaleMatcher(LocalePriorityList languagePriorityList) { 94 this(languagePriorityList, defaultWritten); 95 } 96 97 /** 98 * Create a new language matcher from a String form. The highest-weighted 99 * language is the default. 100 * 101 * @param languagePriorityListString String form of LanguagePriorityList 102 * @stable ICU 4.4 103 */ 104 public LocaleMatcher(String languagePriorityListString) { 105 this(LocalePriorityList.add(languagePriorityListString).build()); 106 } 107 108 /** 109 * Internal testing function; may expose API later. 110 * @param languagePriorityList LocalePriorityList to match 111 * @param matcherData Internal matching data 112 * @internal 113 * @deprecated This API is ICU internal only. 114 */ 115 @Deprecated 116 public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) { 117 this(languagePriorityList, matcherData, DEFAULT_THRESHOLD); 118 } 119 120 /** 121 * Internal testing function; may expose API later. 122 * @param languagePriorityList LocalePriorityList to match 123 * @param matcherData Internal matching data 124 * @internal 125 * @deprecated This API is ICU internal only. 126 */ 127 @Deprecated 128 public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) { 129 this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze(); 130 this.languagePriorityList = languagePriorityList; 131 for (final ULocale language : languagePriorityList) { 132 add(language, languagePriorityList.getWeight(language)); 133 } 134 processMapping(); 135 Iterator<ULocale> it = languagePriorityList.iterator(); 136 defaultLanguage = it.hasNext() ? it.next() : null; 137 this.threshold = threshold; 138 } 139 140 141 /** 142 * Returns a fraction between 0 and 1, where 1 means that the languages are a 143 * perfect match, and 0 means that they are completely different. Note that 144 * the precise values may change over time; no code should be made dependent 145 * on the values remaining constant. 146 * @param desired Desired locale 147 * @param desiredMax Maximized locale (using likely subtags) 148 * @param supported Supported locale 149 * @param supportedMax Maximized locale (using likely subtags) 150 * @return value between 0 and 1, inclusive. 151 * @stable ICU 4.4 152 */ 153 public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { 154 return matcherData.match(desired, desiredMax, supported, supportedMax); 155 } 156 157 158 /** 159 * Canonicalize a locale (language). Note that for now, it is canonicalizing 160 * according to CLDR conventions (he vs iw, etc), since that is what is needed 161 * for likelySubtags. 162 * @param ulocale language/locale code 163 * @return ULocale with remapped subtags. 164 * @stable ICU 4.4 165 */ 166 public ULocale canonicalize(ULocale ulocale) { 167 // TODO Get the data from CLDR, use Java conventions. 168 String lang = ulocale.getLanguage(); 169 String lang2 = canonicalMap.get(lang); 170 String script = ulocale.getScript(); 171 String script2 = canonicalMap.get(script); 172 String region = ulocale.getCountry(); 173 String region2 = canonicalMap.get(region); 174 if (lang2 != null || script2 != null || region2 != null) { 175 return new ULocale( 176 lang2 == null ? lang : lang2, 177 script2 == null ? script : script2, 178 region2 == null ? region : region2 179 ); 180 } 181 return ulocale; 182 } 183 184 /** 185 * Get the best match for a LanguagePriorityList 186 * 187 * @param languageList list to match 188 * @return best matching language code 189 * @stable ICU 4.4 190 */ 191 public ULocale getBestMatch(LocalePriorityList languageList) { 192 double bestWeight = 0; 193 ULocale bestTableMatch = null; 194 double penalty = 0; 195 OutputDouble matchWeight = new OutputDouble(); 196 for (final ULocale language : languageList) { 197 final ULocale matchLocale = getBestMatchInternal(language, matchWeight); 198 final double weight = matchWeight.value * languageList.getWeight(language) - penalty; 199 if (weight > bestWeight) { 200 bestWeight = weight; 201 bestTableMatch = matchLocale; 202 } 203 penalty += 0.07000001; 204 } 205 if (bestWeight < threshold) { 206 bestTableMatch = defaultLanguage; 207 } 208 return bestTableMatch; 209 } 210 211 /** 212 * Convenience method: Get the best match for a LanguagePriorityList 213 * 214 * @param languageList String form of language priority list 215 * @return best matching language code 216 * @stable ICU 4.4 217 */ 218 public ULocale getBestMatch(String languageList) { 219 return getBestMatch(LocalePriorityList.add(languageList).build()); 220 } 221 222 /** 223 * Get the best match for an individual language code. 224 * 225 * @param ulocale locale/language code to match 226 * @return best matching language code 227 * @stable ICU 4.4 228 */ 229 public ULocale getBestMatch(ULocale ulocale) { 230 return getBestMatchInternal(ulocale, null); 231 } 232 233 /** 234 * @internal 235 * @deprecated This API is ICU internal only. 236 */ 237 @Deprecated 238 public ULocale getBestMatch(ULocale... ulocales) { 239 return getBestMatch(LocalePriorityList.add(ulocales).build()); 240 } 241 242 /** 243 * {@inheritDoc} 244 * @stable ICU 4.4 245 */ 246 @Override 247 public String toString() { 248 return "{" + defaultLanguage + ", " 249 + localeToMaxLocaleAndWeight + "}"; 250 } 251 // ================= Privates ===================== 252 253 /** 254 * Get the best match for an individual language code. 255 * 256 * @param languageCode 257 * @return best matching language code and weight (as per 258 * {@link #match(ULocale, ULocale)}) 259 */ 260 private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) { 261 languageCode = canonicalize(languageCode); 262 final ULocale maximized = addLikelySubtags(languageCode); 263 if (DEBUG) { 264 System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized); 265 } 266 double bestWeight = 0; 267 ULocale bestTableMatch = null; 268 String baseLanguage = maximized.getLanguage(); 269 Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage); 270 if (searchTable != null) { // we preprocessed the table so as to filter by lanugage 271 if (DEBUG) System.out.println("\tSearching: " + searchTable); 272 for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) { 273 ULocale tableKey = tableKeyValue.get0(); 274 ULocale maxLocale = tableKeyValue.get1(); 275 Double matchedWeight = tableKeyValue.get2(); 276 final double match = match(languageCode, maximized, tableKey, maxLocale); 277 if (DEBUG) { 278 System.out.println("\t" + tableKeyValue + ";\t" + match + "\n"); 279 } 280 final double weight = match * matchedWeight; 281 if (weight > bestWeight) { 282 bestWeight = weight; 283 bestTableMatch = tableKey; 284 if (weight > 0.999d) { // bail on good enough match. 285 break; 286 } 287 } 288 } 289 } 290 if (bestWeight < threshold) { 291 bestTableMatch = defaultLanguage; 292 } 293 if (outputWeight != null) { 294 outputWeight.value = bestWeight; // only return the weight when needed 295 } 296 return bestTableMatch; 297 } 298 299 /** 300 * @internal 301 * @deprecated This API is ICU internal only. 302 */ 303 @Deprecated 304 private static class OutputDouble { // TODO, move to where OutputInt is 305 double value; 306 } 307 308 private void add(ULocale language, Double weight) { 309 language = canonicalize(language); 310 R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight); 311 row.freeze(); 312 localeToMaxLocaleAndWeight.add(row); 313 } 314 315 /** 316 * We preprocess the data to get just the possible matches for each desired base language. 317 */ 318 private void processMapping() { 319 for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) { 320 String desired = desiredToMatchingLanguages.getKey(); 321 Set<String> supported = desiredToMatchingLanguages.getValue(); 322 for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) { 323 final ULocale key = localeToMaxAndWeight.get0(); 324 String lang = key.getLanguage(); 325 if (supported.contains(lang)) { 326 addFiltered(desired, localeToMaxAndWeight); 327 } 328 } 329 } 330 // now put in the values directly, since languages always map to themselves 331 for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) { 332 final ULocale key = localeToMaxAndWeight.get0(); 333 String lang = key.getLanguage(); 334 addFiltered(lang, localeToMaxAndWeight); 335 } 336 } 337 338 private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) { 339 Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired); 340 if (map == null) { 341 desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<R3<ULocale, ULocale, Double>>()); 342 } 343 map.add(localeToMaxAndWeight); 344 if (DEBUG) { 345 System.out.println(desired + ", " + localeToMaxAndWeight); 346 } 347 } 348 349 Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<Row.R3<ULocale, ULocale, Double>>(); 350 Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData 351 = new LinkedHashMap<String,Set<Row.R3<ULocale, ULocale, Double>>>(); 352 353 // =============== Special Mapping Information ============== 354 355 /** 356 * We need to add another method to addLikelySubtags that doesn't return 357 * null, but instead substitutes Zzzz and ZZ if unknown. There are also 358 * a few cases where addLikelySubtags needs to have expanded data, to handle 359 * all deprecated codes. 360 * @param languageCode 361 * @return "fixed" addLikelySubtags 362 */ 363 private ULocale addLikelySubtags(ULocale languageCode) { 364 // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined 365 // language would normally match English. But that would produce the counterintuitive results 366 // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and 367 // getBestMatch("en", LocaleMatcher("it,und")) would be "und". 368 // 369 // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults) 370 // so that max("und")="und". That produces the following, more desirable results: 371 if (languageCode.equals(UNKNOWN_LOCALE)) { 372 return UNKNOWN_LOCALE; 373 } 374 final ULocale result = ULocale.addLikelySubtags(languageCode); 375 // should have method on getLikelySubtags for this 376 if (result == null || result.equals(languageCode)) { 377 final String language = languageCode.getLanguage(); 378 final String script = languageCode.getScript(); 379 final String region = languageCode.getCountry(); 380 return new ULocale((language.length()==0 ? "und" 381 : language) 382 + "_" 383 + (script.length()==0 ? "Zzzz" : script) 384 + "_" 385 + (region.length()==0 ? "ZZ" : region)); 386 } 387 return result; 388 } 389 390 private static class LocalePatternMatcher { 391 // a value of null means a wildcard; matches any. 392 private String lang; 393 private String script; 394 private String region; 395 private Level level; 396 static Pattern pattern = Pattern.compile( 397 "([a-z]{1,8}|\\*)" 398 + "(?:[_-]([A-Z][a-z]{3}|\\*))?" 399 + "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?"); 400 401 public LocalePatternMatcher(String toMatch) { 402 Matcher matcher = pattern.matcher(toMatch); 403 if (!matcher.matches()) { 404 throw new IllegalArgumentException("Bad pattern: " + toMatch); 405 } 406 lang = matcher.group(1); 407 script = matcher.group(2); 408 region = matcher.group(3); 409 level = region != null ? Level.region : script != null ? Level.script : Level.language; 410 411 if (lang.equals("*")) { 412 lang = null; 413 } 414 if (script != null && script.equals("*")) { 415 script = null; 416 } 417 if (region != null && region.equals("*")) { 418 region = null; 419 } 420 } 421 422 boolean matches(ULocale ulocale) { 423 if (lang != null && !lang.equals(ulocale.getLanguage())) { 424 return false; 425 } 426 if (script != null && !script.equals(ulocale.getScript())) { 427 return false; 428 } 429 if (region != null && !region.equals(ulocale.getCountry())) { 430 return false; 431 } 432 return true; 433 } 434 435 public Level getLevel() { 436 return level; 437 } 438 439 public String getLanguage() { 440 return (lang == null ? "*" : lang); 441 } 442 443 public String getScript() { 444 return (script == null ? "*" : script); 445 } 446 447 public String getRegion() { 448 return (region == null ? "*" : region); 449 } 450 451 @Override 452 public String toString() { 453 String result = getLanguage(); 454 if (level != Level.language) { 455 result += "-" + getScript(); 456 if (level != Level.script) { 457 result += "-" + getRegion(); 458 } 459 } 460 return result; 461 } 462 463 /* (non-Javadoc) 464 * @see java.lang.Object#equals(java.lang.Object) 465 */ 466 @Override 467 public boolean equals(Object obj) { 468 if (obj == this) { 469 return true; 470 } 471 if (obj == null || !(obj instanceof LocalePatternMatcher)) { 472 return false; 473 } 474 LocalePatternMatcher other = (LocalePatternMatcher) obj; 475 return Utility.objectEquals(level, other.level) 476 && Utility.objectEquals(lang, other.lang) 477 && Utility.objectEquals(script, other.script) 478 && Utility.objectEquals(region, other.region); 479 } 480 481 /* (non-Javadoc) 482 * @see java.lang.Object#hashCode() 483 */ 484 @Override 485 public int hashCode() { 486 return level.ordinal() 487 ^ (lang == null ? 0 : lang.hashCode()) 488 ^ (script == null ? 0 : script.hashCode()) 489 ^ (region == null ? 0 : region.hashCode()); 490 } 491 } 492 493 enum Level { 494 language(0.99), 495 script(0.2), 496 region(0.04); 497 498 final double worst; 499 500 Level(double d) { 501 worst = d; 502 } 503 } 504 505 private static class ScoreData implements Freezable<ScoreData> { 506 @SuppressWarnings("unused") 507 private static final double maxUnequal_changeD_sameS = 0.5; 508 509 @SuppressWarnings("unused") 510 private static final double maxUnequal_changeEqual = 0.75; 511 512 LinkedHashSet<Row.R3<LocalePatternMatcher,LocalePatternMatcher,Double>> scores = new LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>(); 513 final Level level; 514 515 public ScoreData(Level level) { 516 this.level = level; 517 } 518 519 void addDataToScores(String desired, String supported, R3<LocalePatternMatcher,LocalePatternMatcher,Double> data) { 520 // Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desired); 521 // if (lang_result == null) { 522 // scores.put(desired, lang_result = new HashMap()); 523 // } 524 // Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supported); 525 // if (result == null) { 526 // lang_result.put(supported, result = new LinkedHashSet()); 527 // } 528 // result.add(data); 529 boolean added = scores.add(data); 530 if (!added) { 531 throw new ICUException("trying to add duplicate data: " + data); 532 } 533 } 534 535 double getScore(ULocale dMax, String desiredRaw, String desiredMax, 536 ULocale sMax, String supportedRaw, String supportedMax) { 537 double distance = 0; 538 if (!desiredMax.equals(supportedMax)) { 539 distance = getRawScore(dMax, sMax); 540 } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal 541 distance += 0.001; 542 } 543 return distance; 544 } 545 546 private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) { 547 if (DEBUG) { 548 System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale); 549 } 550 for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result 551 if (datum.get0().matches(desiredLocale) 552 && datum.get1().matches(supportedLocale)) { 553 if (DEBUG) { 554 System.out.println("\t\t\t\tFOUND\t" + datum); 555 } 556 return datum.get2(); 557 } 558 } 559 if (DEBUG) { 560 System.out.println("\t\t\t\tNOTFOUND\t" + level.worst); 561 } 562 return level.worst; 563 } 564 565 @Override 566 public String toString() { 567 StringBuilder result = new StringBuilder().append(level); 568 for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) { 569 result.append("\n\t\t").append(score); 570 } 571 return result.toString(); 572 } 573 574 575 @Override 576 @SuppressWarnings("unchecked") 577 public ScoreData cloneAsThawed() { 578 try { 579 ScoreData result = (ScoreData) clone(); 580 result.scores = (LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>) result.scores.clone(); 581 result.frozen = false; 582 return result; 583 } catch (CloneNotSupportedException e) { 584 throw new ICUCloneNotSupportedException(e); // will never happen 585 } 586 587 } 588 589 private volatile boolean frozen = false; 590 591 @Override 592 public ScoreData freeze() { 593 return this; 594 } 595 596 @Override 597 public boolean isFrozen() { 598 return frozen; 599 } 600 601 public Relation<String,String> getMatchingLanguages() { 602 Relation<String,String> desiredToSupported = Relation.of(new LinkedHashMap<String,Set<String>>(), HashSet.class); 603 for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) { 604 LocalePatternMatcher desired = item.get0(); 605 LocalePatternMatcher supported = item.get1(); 606 if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance 607 desiredToSupported.put(desired.lang, supported.lang); 608 } 609 } 610 desiredToSupported.freeze(); 611 return desiredToSupported; 612 } 613 } 614 615 /** 616 * Only for testing and use by tools. Interface may change!! 617 * @internal 618 * @deprecated This API is ICU internal only. 619 */ 620 @Deprecated 621 public static class LanguageMatcherData implements Freezable<LanguageMatcherData> { 622 private ScoreData languageScores = new ScoreData(Level.language); 623 private ScoreData scriptScores = new ScoreData(Level.script); 624 private ScoreData regionScores = new ScoreData(Level.region); 625 private Relation<String, String> matchingLanguages; 626 private volatile boolean frozen = false; 627 628 629 /** 630 * @internal 631 * @deprecated This API is ICU internal only. 632 */ 633 @Deprecated 634 public LanguageMatcherData() { 635 } 636 637 /** 638 * @internal 639 * @deprecated This API is ICU internal only. 640 */ 641 @Deprecated 642 public Relation<String, String> matchingLanguages() { 643 return matchingLanguages; 644 } 645 646 /** 647 * @internal 648 * @deprecated This API is ICU internal only. 649 */ 650 @Override 651 @Deprecated 652 public String toString() { 653 return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores; 654 } 655 656 /** 657 * @internal 658 * @deprecated This API is ICU internal only. 659 */ 660 @Deprecated 661 public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) { 662 double diff = 0; 663 diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage()); 664 if (diff > 0.999d) { // with no language match, we bail 665 return 0.0d; 666 } 667 diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript()); 668 diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry()); 669 670 if (!a.getVariant().equals(b.getVariant())) { 671 diff += 0.01; 672 } 673 if (diff < 0.0d) { 674 diff = 0.0d; 675 } else if (diff > 1.0d) { 676 diff = 1.0d; 677 } 678 if (DEBUG) { 679 System.out.println("\t\t\tTotal Distance\t" + diff); 680 } 681 return 1.0 - diff; 682 } 683 684 /** 685 * @internal 686 * @deprecated This API is ICU internal only. 687 */ 688 @Deprecated 689 public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) { 690 return addDistance(desired, supported, percent, false, comment); 691 } 692 /** 693 * @internal 694 * @deprecated This API is ICU internal only. 695 */ 696 @Deprecated 697 public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) { 698 return addDistance(desired, supported, percent, oneway, null); 699 } 700 701 private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) { 702 if (DEBUG) { 703 System.out.println("\t<languageMatch desired=\"" + desired + "\"" + 704 " supported=\"" + supported + "\"" + 705 " percent=\"" + percent + "\"" 706 + (oneway ? " oneway=\"true\"" : "") 707 + "/>" 708 + (comment == null ? "" : "\t<!-- " + comment + " -->")); 709 // // .addDistance("nn", "nb", 4, true) 710 // System.out.println(".addDistance(\"" + desired + "\"" + 711 // ", \"" + supported + "\"" + 712 // ", " + percent + "" 713 // + (oneway ? "" : ", true") 714 // + (comment == null ? "" : ", \"" + comment + "\"") 715 // + ")" 716 // ); 717 718 } 719 double score = 1-percent/100.0; // convert from percentage 720 LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired); 721 Level desiredLen = desiredMatcher.getLevel(); 722 LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported); 723 Level supportedLen = supportedMatcher.getLevel(); 724 if (desiredLen != supportedLen) { 725 throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported); 726 } 727 R3<LocalePatternMatcher,LocalePatternMatcher,Double> data = Row.of(desiredMatcher, supportedMatcher, score); 728 R3<LocalePatternMatcher,LocalePatternMatcher,Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score); 729 boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher); 730 switch (desiredLen) { 731 case language: 732 String dlanguage = desiredMatcher.getLanguage(); 733 String slanguage = supportedMatcher.getLanguage(); 734 languageScores.addDataToScores(dlanguage, slanguage, data); 735 if (!oneway && !desiredEqualsSupported) { 736 languageScores.addDataToScores(slanguage, dlanguage, data2); 737 } 738 break; 739 case script: 740 String dscript = desiredMatcher.getScript(); 741 String sscript = supportedMatcher.getScript(); 742 scriptScores.addDataToScores(dscript, sscript, data); 743 if (!oneway && !desiredEqualsSupported) { 744 scriptScores.addDataToScores(sscript, dscript, data2); 745 } 746 break; 747 case region: 748 String dregion = desiredMatcher.getRegion(); 749 String sregion = supportedMatcher.getRegion(); 750 regionScores.addDataToScores(dregion, sregion, data); 751 if (!oneway && !desiredEqualsSupported) { 752 regionScores.addDataToScores(sregion, dregion, data2); 753 } 754 break; 755 } 756 return this; 757 } 758 759 /** 760 * {@inheritDoc} 761 * @internal 762 * @deprecated This API is ICU internal only. 763 */ 764 @Override 765 @Deprecated 766 public LanguageMatcherData cloneAsThawed() { 767 LanguageMatcherData result; 768 try { 769 result = (LanguageMatcherData) clone(); 770 result.languageScores = languageScores.cloneAsThawed(); 771 result.scriptScores = scriptScores.cloneAsThawed(); 772 result.regionScores = regionScores.cloneAsThawed(); 773 result.frozen = false; 774 return result; 775 } catch (CloneNotSupportedException e) { 776 throw new ICUCloneNotSupportedException(e); // will never happen 777 } 778 } 779 780 /** 781 * {@inheritDoc} 782 * @internal 783 * @deprecated This API is ICU internal only. 784 */ 785 @Override 786 @Deprecated 787 public LanguageMatcherData freeze() { 788 languageScores.freeze(); 789 regionScores.freeze(); 790 scriptScores.freeze(); 791 matchingLanguages = languageScores.getMatchingLanguages(); 792 frozen = true; 793 return this; 794 } 795 796 /** 797 * {@inheritDoc} 798 * @internal 799 * @deprecated This API is ICU internal only. 800 */ 801 @Override 802 @Deprecated 803 public boolean isFrozen() { 804 return frozen; 805 } 806 } 807 808 LanguageMatcherData matcherData; 809 LocalePriorityList languagePriorityList; 810 811 private static final LanguageMatcherData defaultWritten; 812 813 private static HashMap<String,String> canonicalMap = new HashMap<String, String>(); 814 815 816 static { 817 canonicalMap.put("iw", "he"); 818 canonicalMap.put("mo", "ro"); 819 canonicalMap.put("tl", "fil"); 820 821 ICUResourceBundle suppData = getICUSupplementalData(); 822 ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching"); 823 ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written"); 824 defaultWritten = new LanguageMatcherData(); 825 826 for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) { 827 ICUResourceBundle item = (ICUResourceBundle) iter.next(); 828 /* 829 "*_*_*", 830 "*_*_*", 831 "96", 832 */ 833 // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" /> 834 boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3)); 835 defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway); 836 } 837 defaultWritten.freeze(); 838 } 839 840 /** 841 * @internal 842 * @deprecated This API is ICU internal only. 843 */ 844 @Deprecated 845 public static ICUResourceBundle getICUSupplementalData() { 846 ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance( 847 ICUData.ICU_BASE_NAME, 848 "supplementalData", 849 ICUResourceBundle.ICU_DATA_CLASS_LOADER); 850 return suppData; 851 } 852 853 /** 854 * @internal 855 * @deprecated This API is ICU internal only. 856 */ 857 @Deprecated 858 public static double match(ULocale a, ULocale b) { 859 final LocaleMatcher matcher = new LocaleMatcher(""); 860 return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b)); 861 } 862 863 transient XLocaleMatcher xLocaleMatcher = null; 864 transient ULocale xDefaultLanguage = null; 865 transient boolean xFavorScript = false; 866 867 /** 868 * Returns the distance between the two languages, using the new CLDR syntax (see getBestMatch). 869 * The values are not necessarily symmetric. 870 * @param desired A locale desired by the user 871 * @param supported A locale supported by a program. 872 * @return A return of 0 is a complete match, and 100 is a complete mismatch (above the thresholdDistance). 873 * A language is first maximized with add likely subtags, then compared. 874 * @internal 875 * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release. 876 */ 877 @Deprecated 878 public int distance(ULocale desired, ULocale supported) { 879 return getLocaleMatcher().distance(desired, supported); 880 } 881 882 private synchronized XLocaleMatcher getLocaleMatcher() { 883 if (xLocaleMatcher == null) { 884 Builder builder = XLocaleMatcher.builder(); 885 builder.setSupportedLocales(languagePriorityList); 886 if (xDefaultLanguage != null) { 887 builder.setDefaultLanguage(xDefaultLanguage); 888 } 889 if (xFavorScript) { 890 builder.setDistanceOption(DistanceOption.SCRIPT_FIRST); 891 } 892 xLocaleMatcher = builder.build(); 893 } 894 return xLocaleMatcher; 895 } 896 897 /** 898 * Get the best match between the desired languages and supported languages 899 * This supports the new CLDR syntax to provide for better matches within 900 * regional clusters (such as maghreb Arabic vs non-maghreb Arabic, or regions that use en-GB vs en-US) 901 * and also matching between regions and macroregions, such as comparing es-419 to es-AR). 902 * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first. 903 * @param outputBestDesired The one of the desired languages that matched best. 904 * Set to null if the best match was not below the threshold distance. 905 * @return best-match supported language 906 * @internal 907 * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release. 908 */ 909 @Deprecated 910 public ULocale getBestMatch(LinkedHashSet<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) { 911 return getLocaleMatcher().getBestMatch(desiredLanguages, outputBestDesired); 912 } 913 914 /** 915 * Set the default language, with null = default = first supported language 916 * @param defaultLanguage Language to use in case the threshold for distance is exceeded. 917 * @return this, for chaining 918 * @internal 919 * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release. 920 */ 921 @Deprecated 922 public synchronized LocaleMatcher setDefaultLanguage(ULocale defaultLanguage) { 923 this.xDefaultLanguage = defaultLanguage; 924 xLocaleMatcher = null; 925 return this; 926 } 927 928 /** 929 * If true, then the language differences are smaller than than script differences. 930 * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language. 931 * @param favorScript Set to true to treat script as most important. 932 * @return this, for chaining. 933 * @internal 934 * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release. 935 */ 936 @Deprecated 937 public synchronized LocaleMatcher setFavorScript(boolean favorScript) { 938 this.xFavorScript = favorScript; 939 xLocaleMatcher = null; 940 return this; 941 } 942 } 943