1 // 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 package com.ibm.icu.impl.locale; 4 5 import java.util.Arrays; 6 import java.util.Collection; 7 import java.util.LinkedHashSet; 8 import java.util.Map; 9 import java.util.Map.Entry; 10 import java.util.Set; 11 12 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap; 13 import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet; 14 import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap; 15 import com.ibm.icu.impl.locale.XCldrStub.Multimap; 16 import com.ibm.icu.impl.locale.XLikelySubtags.LSR; 17 import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption; 18 import com.ibm.icu.util.LocalePriorityList; 19 import com.ibm.icu.util.Output; 20 import com.ibm.icu.util.ULocale; 21 22 /** 23 * Immutable class that picks best match between user's desired locales and application's supported locales. 24 * @author markdavis 25 */ 26 public class XLocaleMatcher { 27 private static final LSR UND = new LSR("und","",""); 28 private static final ULocale UND_LOCALE = new ULocale("und"); 29 30 // normally the default values, but can be set via constructor 31 32 private final XLocaleDistance localeDistance; 33 private final int thresholdDistance; 34 private final int demotionPerAdditionalDesiredLocale; 35 private final DistanceOption distanceOption; 36 37 // built based on application's supported languages in constructor 38 39 private final Map<LSR, Set<ULocale>> supportedLanguages; // the locales in the collection are ordered! 40 private final Set<ULocale> exactSupportedLocales; // the locales in the collection are ordered! 41 private final ULocale defaultLanguage; 42 43 44 public static class Builder { 45 private Set<ULocale> supportedLanguagesList; 46 private int thresholdDistance = -1; 47 private int demotionPerAdditionalDesiredLocale = -1;; 48 private ULocale defaultLanguage; 49 private XLocaleDistance localeDistance; 50 private DistanceOption distanceOption; 51 /** 52 * @param languagePriorityList the languagePriorityList to set 53 * @return this Builder object 54 */ 55 public Builder setSupportedLocales(String languagePriorityList) { 56 this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build()); 57 return this; 58 } 59 public Builder setSupportedLocales(LocalePriorityList languagePriorityList) { 60 this.supportedLanguagesList = asSet(languagePriorityList); 61 return this; 62 } 63 public Builder setSupportedLocales(Set<ULocale> languagePriorityList) { 64 this.supportedLanguagesList = languagePriorityList; 65 return this; 66 } 67 68 /** 69 * @param thresholdDistance the thresholdDistance to set, with -1 = default 70 * @return this Builder object 71 */ 72 public Builder setThresholdDistance(int thresholdDistance) { 73 this.thresholdDistance = thresholdDistance; 74 return this; 75 } 76 /** 77 * @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default 78 * @return this Builder object 79 */ 80 public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) { 81 this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale; 82 return this; 83 } 84 85 /** 86 * @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault(). 87 * @return this Builder object 88 */ 89 public Builder setLocaleDistance(XLocaleDistance localeDistance) { 90 this.localeDistance = localeDistance; 91 return this; 92 } 93 94 /** 95 * Set the default language, with null = default = first supported language 96 * @param defaultLanguage the default language 97 * @return this Builder object 98 */ 99 public Builder setDefaultLanguage(ULocale defaultLanguage) { 100 this.defaultLanguage = defaultLanguage; 101 return this; 102 } 103 104 /** 105 * If true, then the language differences are smaller than than script differences. 106 * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language. 107 * @param distanceOption the distance option 108 * @return this Builder object 109 */ 110 public Builder setDistanceOption(DistanceOption distanceOption) { 111 this.distanceOption = distanceOption; 112 return this; 113 } 114 115 public XLocaleMatcher build() { 116 return new XLocaleMatcher(this); 117 } 118 } 119 120 /** 121 * Returns a builder used in chaining parameters for building a Locale Matcher. 122 * @return this Builder object 123 */ 124 public static Builder builder() { 125 return new Builder(); 126 } 127 128 /** Convenience method */ 129 public XLocaleMatcher(String supportedLocales) { 130 this(builder().setSupportedLocales(supportedLocales)); 131 } 132 /** Convenience method */ 133 public XLocaleMatcher(LocalePriorityList supportedLocales) { 134 this(builder().setSupportedLocales(supportedLocales)); 135 } 136 /** Convenience method */ 137 public XLocaleMatcher(Set<ULocale> supportedLocales) { 138 this(builder().setSupportedLocales(supportedLocales)); 139 } 140 141 /** 142 * Create a locale matcher with the given parameters. 143 * @param supportedLocales 144 * @param thresholdDistance 145 * @param demotionPerAdditionalDesiredLocale 146 * @param localeDistance 147 * @param likelySubtags 148 */ 149 private XLocaleMatcher(Builder builder) { 150 localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault() 151 : builder.localeDistance; 152 thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance() 153 : builder.thresholdDistance; 154 // only do AFTER above are set 155 Set<LSR> paradigms = extractLsrSet(localeDistance.getParadigms()); 156 final Multimap<LSR, ULocale> temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms); 157 supportedLanguages = temp2.asMap(); 158 exactSupportedLocales = ImmutableSet.copyOf(temp2.values()); 159 defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage 160 : supportedLanguages.isEmpty() ? null 161 : supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language 162 demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1 163 : builder.demotionPerAdditionalDesiredLocale; 164 distanceOption = builder.distanceOption; 165 } 166 167 // Result is not immutable! 168 private Set<LSR> extractLsrSet(Set<ULocale> languagePriorityList) { 169 Set<LSR> result = new LinkedHashSet<LSR>(); 170 for (ULocale item : languagePriorityList) { 171 final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item); 172 result.add(max); 173 } 174 return result; 175 } 176 177 private Multimap<LSR,ULocale> extractLsrMap(Set<ULocale> languagePriorityList, Set<LSR> priorities) { 178 Multimap<LSR, ULocale> builder = LinkedHashMultimap.create(); 179 for (ULocale item : languagePriorityList) { 180 final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item); 181 builder.put(max, item); 182 } 183 if (builder.size() > 1 && priorities != null) { 184 // for the supported list, we put any priorities before all others, except for the first. 185 Multimap<LSR, ULocale> builder2 = LinkedHashMultimap.create(); 186 187 // copy the long way so the priorities are in the same order as in the original 188 boolean first = true; 189 for (Entry<LSR, Set<ULocale>> entry : builder.asMap().entrySet()) { 190 final LSR key = entry.getKey(); 191 if (first || priorities.contains(key)) { 192 builder2.putAll(key, entry.getValue()); 193 first = false; 194 } 195 } 196 // now copy the rest 197 builder2.putAll(builder); 198 if (!builder2.equals(builder)) { 199 throw new IllegalArgumentException(); 200 } 201 builder = builder2; 202 } 203 return ImmutableMultimap.copyOf(builder); 204 } 205 206 207 /** Convenience method */ 208 public ULocale getBestMatch(ULocale ulocale) { 209 return getBestMatch(ulocale, null); 210 } 211 /** Convenience method */ 212 public ULocale getBestMatch(String languageList) { 213 return getBestMatch(LocalePriorityList.add(languageList).build(), null); 214 } 215 /** Convenience method */ 216 public ULocale getBestMatch(ULocale... locales) { 217 return getBestMatch(new LinkedHashSet<ULocale>(Arrays.asList(locales)), null); 218 } 219 /** Convenience method */ 220 public ULocale getBestMatch(Set<ULocale> desiredLanguages) { 221 return getBestMatch(desiredLanguages, null); 222 } 223 /** Convenience method */ 224 public ULocale getBestMatch(LocalePriorityList desiredLanguages) { 225 return getBestMatch(desiredLanguages, null); 226 } 227 /** Convenience method */ 228 public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output<ULocale> outputBestDesired) { 229 return getBestMatch(asSet(desiredLanguages), outputBestDesired); 230 } 231 232 // TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList 233 private static Set<ULocale> asSet(LocalePriorityList languageList) { 234 Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order 235 for (ULocale locale : languageList) { 236 temp.add(locale); 237 }; 238 return temp; 239 } 240 241 /** 242 * Get the best match between the desired languages and supported languages 243 * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first. 244 * @param outputBestDesired The one of the desired languages that matched best. 245 * Set to null if the best match was not below the threshold distance. 246 * @return the best match. 247 */ 248 public ULocale getBestMatch(Set<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) { 249 // fast path for singleton 250 if (desiredLanguages.size() == 1) { 251 return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired); 252 } 253 // TODO produce optimized version for single desired ULocale 254 Multimap<LSR, ULocale> desiredLSRs = extractLsrMap(desiredLanguages,null); 255 int bestDistance = Integer.MAX_VALUE; 256 ULocale bestDesiredLocale = null; 257 Collection<ULocale> bestSupportedLocales = null; 258 int delta = 0; 259 mainLoop: 260 for (final Entry<LSR, ULocale> desiredLsrAndLocale : desiredLSRs.entries()) { 261 // quick check for exact match 262 ULocale desiredLocale = desiredLsrAndLocale.getValue(); 263 LSR desiredLSR = desiredLsrAndLocale.getKey(); 264 if (delta < bestDistance) { 265 if (exactSupportedLocales.contains(desiredLocale)) { 266 if (outputBestDesired != null) { 267 outputBestDesired.value = desiredLocale; 268 } 269 return desiredLocale; 270 } 271 // quick check for maximized locale 272 Collection<ULocale> found = supportedLanguages.get(desiredLSR); 273 if (found != null) { 274 // if we find one in the set, return first (lowest). We already know the exact one isn't there. 275 if (outputBestDesired != null) { 276 outputBestDesired.value = desiredLocale; 277 } 278 return found.iterator().next(); 279 } 280 } 281 for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) { 282 int distance = delta + localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(), 283 thresholdDistance, distanceOption); 284 if (distance < bestDistance) { 285 bestDistance = distance; 286 bestDesiredLocale = desiredLocale; 287 bestSupportedLocales = supportedLsrAndLocale.getValue(); 288 if (distance == 0) { 289 break mainLoop; 290 } 291 } 292 } 293 delta += demotionPerAdditionalDesiredLocale; 294 } 295 if (bestDistance >= thresholdDistance) { 296 if (outputBestDesired != null) { 297 outputBestDesired.value = null; 298 } 299 return defaultLanguage; 300 } 301 if (outputBestDesired != null) { 302 outputBestDesired.value = bestDesiredLocale; 303 } 304 // pick exact match if there is one 305 if (bestSupportedLocales.contains(bestDesiredLocale)) { 306 return bestDesiredLocale; 307 } 308 // otherwise return first supported, combining variants and extensions from bestDesired 309 return bestSupportedLocales.iterator().next(); 310 } 311 312 /** 313 * Get the best match between the desired languages and supported languages 314 * @param desiredLocale the supplied user's language. 315 * @param outputBestDesired The one of the desired languages that matched best. 316 * Set to null if the best match was not below the threshold distance. 317 * @return the best match. 318 */ 319 public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) { 320 int bestDistance = Integer.MAX_VALUE; 321 ULocale bestDesiredLocale = null; 322 Collection<ULocale> bestSupportedLocales = null; 323 324 // quick check for exact match, with hack for und 325 final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale); 326 327 if (exactSupportedLocales.contains(desiredLocale)) { 328 if (outputBestDesired != null) { 329 outputBestDesired.value = desiredLocale; 330 } 331 return desiredLocale; 332 } 333 // quick check for maximized locale 334 if (distanceOption == DistanceOption.NORMAL) { 335 Collection<ULocale> found = supportedLanguages.get(desiredLSR); 336 if (found != null) { 337 // if we find one in the set, return first (lowest). We already know the exact one isn't there. 338 if (outputBestDesired != null) { 339 outputBestDesired.value = desiredLocale; 340 } 341 return found.iterator().next(); 342 } 343 } 344 for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) { 345 int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(), 346 thresholdDistance, distanceOption); 347 if (distance < bestDistance) { 348 bestDistance = distance; 349 bestDesiredLocale = desiredLocale; 350 bestSupportedLocales = supportedLsrAndLocale.getValue(); 351 if (distance == 0) { 352 break; 353 } 354 } 355 } 356 if (bestDistance >= thresholdDistance) { 357 if (outputBestDesired != null) { 358 outputBestDesired.value = null; 359 } 360 return defaultLanguage; 361 } 362 if (outputBestDesired != null) { 363 outputBestDesired.value = bestDesiredLocale; 364 } 365 // pick exact match if there is one 366 if (bestSupportedLocales.contains(bestDesiredLocale)) { 367 return bestDesiredLocale; 368 } 369 // otherwise return first supported, combining variants and extensions from bestDesired 370 return bestSupportedLocales.iterator().next(); 371 } 372 373 /** Combine features of the desired locale into those of the supported, and return result. */ 374 public static ULocale combine(ULocale bestSupported, ULocale bestDesired) { 375 // for examples of extensions, variants, see 376 // http://unicode.org/repos/cldr/tags/latest/common/bcp47/ 377 // http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml 378 379 if (!bestSupported.equals(bestDesired) && bestDesired != null) { 380 // add region, variants, extensions 381 ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported); 382 383 // copy the region from the desired, if there is one 384 String region = bestDesired.getCountry(); 385 if (!region.isEmpty()) { 386 b.setRegion(region); 387 } 388 389 // copy the variants from desired, if there is one 390 // note that this will override any subvariants. Eg "sco-ulster-fonipa" + "-fonupa" => "sco-fonupa" (nuking ulster) 391 String variants = bestDesired.getVariant(); 392 if (!variants.isEmpty()) { 393 b.setVariant(variants); 394 } 395 396 // copy the extensions from desired, if there are any 397 // note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "-u-nu-native" => "th-u-nu-native" (nuking calendar) 398 for (char extensionKey : bestDesired.getExtensionKeys()) { 399 b.setExtension(extensionKey, bestDesired.getExtension(extensionKey)); 400 } 401 bestSupported = b.build(); 402 } 403 return bestSupported; 404 } 405 406 /** Returns the distance between the two languages. The values are not necessarily symmetric. 407 * @param desired A locale desired by the user 408 * @param supported A locale supported by a program. 409 * @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance). 410 * A language is first maximized with add likely subtags, then compared. 411 */ 412 public int distance(ULocale desired, ULocale supported) { 413 return localeDistance.distanceRaw( 414 LSR.fromMaximalized(desired), 415 LSR.fromMaximalized(supported), thresholdDistance, distanceOption); 416 } 417 418 /** Convenience method */ 419 public int distance(String desiredLanguage, String supportedLanguage) { 420 return localeDistance.distanceRaw( 421 LSR.fromMaximalized(new ULocale(desiredLanguage)), 422 LSR.fromMaximalized(new ULocale(supportedLanguage)), 423 thresholdDistance, distanceOption); 424 } 425 426 @Override 427 public String toString() { 428 return exactSupportedLocales.toString(); 429 } 430 431 /** Return the inverse of the distance: that is, 1-distance(desired, supported) */ 432 public double match(ULocale desired, ULocale supported) { 433 return (100-distance(desired, supported))/100.0; 434 } 435 436 /** 437 * Returns a fraction between 0 and 1, where 1 means that the languages are a 438 * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0. 439 * <br>Note that 440 * the precise values may change over time; no code should be made dependent 441 * on the values remaining constant. 442 * @param desired Desired locale 443 * @param desiredMax Maximized locale (using likely subtags) 444 * @param supported Supported locale 445 * @param supportedMax Maximized locale (using likely subtags) 446 * @return value between 0 and 1, inclusive. 447 * @deprecated Use the form with 2 parameters instead. 448 */ 449 @Deprecated 450 public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { 451 return match(desired, supported); 452 } 453 454 /** 455 * Canonicalize a locale (language). Note that for now, it is canonicalizing 456 * according to CLDR conventions (he vs iw, etc), since that is what is needed 457 * for likelySubtags. 458 * @param ulocale language/locale code 459 * @return ULocale with remapped subtags. 460 * @stable ICU 4.4 461 */ 462 public ULocale canonicalize(ULocale ulocale) { 463 // TODO 464 return null; 465 } 466 467 /** 468 * @return the thresholdDistance. Any distance above this value is treated as a match failure. 469 */ 470 public int getThresholdDistance() { 471 return thresholdDistance; 472 } 473 } 474