1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.icu; 18 19 import java.util.Collections; 20 import java.util.HashMap; 21 import java.util.HashSet; 22 import java.util.LinkedHashSet; 23 import java.util.Locale; 24 import java.util.Map; 25 import java.util.Map.Entry; 26 import java.util.Set; 27 import libcore.util.BasicLruCache; 28 29 /** 30 * Makes ICU data accessible to Java. 31 */ 32 public final class ICU { 33 private static final BasicLruCache<String, String> CACHED_PATTERNS = 34 new BasicLruCache<String, String>(8); 35 36 private static Locale[] availableLocalesCache; 37 38 private static String[] isoCountries; 39 40 private static String[] isoLanguages; 41 42 /** 43 * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache. 44 */ 45 public static String[] getISOLanguages() { 46 if (isoLanguages == null) { 47 isoLanguages = getISOLanguagesNative(); 48 } 49 return isoLanguages.clone(); 50 } 51 52 /** 53 * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache. 54 */ 55 public static String[] getISOCountries() { 56 if (isoCountries == null) { 57 isoCountries = getISOCountriesNative(); 58 } 59 return isoCountries.clone(); 60 } 61 62 private static final int IDX_LANGUAGE = 0; 63 private static final int IDX_SCRIPT = 1; 64 private static final int IDX_REGION = 2; 65 private static final int IDX_VARIANT = 3; 66 67 /* 68 * Parse the {Language, Script, Region, Variant*} section of the ICU locale 69 * ID. This is the bit that appears before the keyword separate "@". The general 70 * structure is a series of ASCII alphanumeric strings (subtags) 71 * separated by underscores. 72 * 73 * Each subtag is interpreted according to its position in the list of subtags 74 * AND its length (groan...). The various cases are explained in comments 75 * below. 76 */ 77 private static void parseLangScriptRegionAndVariants(String string, 78 String[] outputArray) { 79 final int first = string.indexOf('_'); 80 final int second = string.indexOf('_', first + 1); 81 final int third = string.indexOf('_', second + 1); 82 83 if (first == -1) { 84 outputArray[IDX_LANGUAGE] = string; 85 } else if (second == -1) { 86 // Language and country ("ja_JP") OR 87 // Language and script ("en_Latn") OR 88 // Language and variant ("en_POSIX"). 89 90 outputArray[IDX_LANGUAGE] = string.substring(0, first); 91 final String secondString = string.substring(first + 1); 92 93 if (secondString.length() == 4) { 94 // 4 Letter ISO script code. 95 outputArray[IDX_SCRIPT] = secondString; 96 } else if (secondString.length() == 2 || secondString.length() == 3) { 97 // 2 or 3 Letter region code. 98 outputArray[IDX_REGION] = secondString; 99 } else { 100 // If we're here, the length of the second half is either 1 or greater 101 // than 5. Assume that ICU won't hand us malformed tags, and therefore 102 // assume the rest of the string is a series of variant tags. 103 outputArray[IDX_VARIANT] = secondString; 104 } 105 } else if (third == -1) { 106 // Language and country and variant ("ja_JP_TRADITIONAL") OR 107 // Language and script and variant ("en_Latn_POSIX") OR 108 // Language and script and region ("en_Latn_US"). OR 109 // Language and variant with multiple subtags ("en_POSIX_XISOP") 110 111 outputArray[IDX_LANGUAGE] = string.substring(0, first); 112 final String secondString = string.substring(first + 1, second); 113 final String thirdString = string.substring(second + 1); 114 115 if (secondString.length() == 4) { 116 // The second subtag is a script. 117 outputArray[IDX_SCRIPT] = secondString; 118 119 // The third subtag can be either a region or a variant, depending 120 // on its length. 121 if (thirdString.length() == 2 || thirdString.length() == 3 || 122 thirdString.isEmpty()) { 123 outputArray[IDX_REGION] = thirdString; 124 } else { 125 outputArray[IDX_VARIANT] = thirdString; 126 } 127 } else if (secondString.isEmpty() || 128 secondString.length() == 2 || secondString.length() == 3) { 129 // The second string is a region, and the third a variant. 130 outputArray[IDX_REGION] = secondString; 131 outputArray[IDX_VARIANT] = thirdString; 132 } else { 133 // Variant with multiple subtags. 134 outputArray[IDX_VARIANT] = string.substring(first + 1); 135 } 136 } else { 137 // Language, script, region and variant with 1 or more subtags 138 // ("en_Latn_US_POSIX") OR 139 // Language, region and variant with 2 or more subtags 140 // (en_US_POSIX_VARIANT). 141 outputArray[IDX_LANGUAGE] = string.substring(0, first); 142 final String secondString = string.substring(first + 1, second); 143 if (secondString.length() == 4) { 144 outputArray[IDX_SCRIPT] = secondString; 145 outputArray[IDX_REGION] = string.substring(second + 1, third); 146 outputArray[IDX_VARIANT] = string.substring(third + 1); 147 } else { 148 outputArray[IDX_REGION] = secondString; 149 outputArray[IDX_VARIANT] = string.substring(second + 1); 150 } 151 } 152 } 153 154 /** 155 * Returns the appropriate {@code Locale} given a {@code String} of the form returned 156 * by {@code toString}. This is very lenient, and doesn't care what's between the underscores: 157 * this method can parse strings that {@code Locale.toString} won't produce. 158 * Used to remove duplication. 159 */ 160 public static Locale localeFromIcuLocaleId(String localeId) { 161 // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h). 162 final int extensionsIndex = localeId.indexOf('@'); 163 164 Map<Character, String> extensionsMap = Collections.EMPTY_MAP; 165 Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP; 166 Set<String> unicodeAttributeSet = Collections.EMPTY_SET; 167 168 if (extensionsIndex != -1) { 169 extensionsMap = new HashMap<Character, String>(); 170 unicodeKeywordsMap = new HashMap<String, String>(); 171 unicodeAttributeSet = new HashSet<String>(); 172 173 // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string 174 // containing all "keywords" it could parse. An ICU keyword is a key-value pair 175 // separated by an "=" (ULOC_KEYWORD_ASSIGN). 176 // 177 // Each keyword item can be one of three things : 178 // - A unicode extension attribute list: In this case the item key is "attribute" 179 // and the value is a hyphen separated list of unicode attributes. 180 // - A unicode extension keyword: In this case, the item key will be larger than 181 // 1 char in length, and the value will be the unicode extension value. 182 // - A BCP-47 extension subtag: In this case, the item key will be exactly one 183 // char in length, and the value will be a sequence of unparsed subtags that 184 // represent the extension. 185 // 186 // Note that this implies that unicode extension keywords are "promoted" to 187 // to the same namespace as the top level extension subtags and their values. 188 // There can't be any collisions in practice because the BCP-47 spec imposes 189 // restrictions on their lengths. 190 final String extensionsString = localeId.substring(extensionsIndex + 1); 191 final String[] extensions = extensionsString.split(";"); 192 for (String extension : extensions) { 193 // This is the special key for the unicode attributes 194 if (extension.startsWith("attribute=")) { 195 String unicodeAttributeValues = extension.substring("attribute=".length()); 196 for (String unicodeAttribute : unicodeAttributeValues.split("-")) { 197 unicodeAttributeSet.add(unicodeAttribute); 198 } 199 } else { 200 final int separatorIndex = extension.indexOf('='); 201 202 if (separatorIndex == 1) { 203 // This is a BCP-47 extension subtag. 204 final String value = extension.substring(2); 205 final char extensionId = extension.charAt(0); 206 207 extensionsMap.put(extensionId, value); 208 } else { 209 // This is a unicode extension keyword. 210 unicodeKeywordsMap.put(extension.substring(0, separatorIndex), 211 extension.substring(separatorIndex + 1)); 212 } 213 } 214 } 215 } 216 217 final String[] outputArray = new String[] { "", "", "", "" }; 218 if (extensionsIndex == -1) { 219 parseLangScriptRegionAndVariants(localeId, outputArray); 220 } else { 221 parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex), 222 outputArray); 223 } 224 Locale.Builder builder = new Locale.Builder(); 225 builder.setLanguage(outputArray[IDX_LANGUAGE]); 226 builder.setRegion(outputArray[IDX_REGION]); 227 builder.setVariant(outputArray[IDX_VARIANT]); 228 builder.setScript(outputArray[IDX_SCRIPT]); 229 for (String attribute : unicodeAttributeSet) { 230 builder.addUnicodeLocaleAttribute(attribute); 231 } 232 for (Entry<String, String> keyword : unicodeKeywordsMap.entrySet()) { 233 builder.setUnicodeLocaleKeyword(keyword.getKey(), keyword.getValue()); 234 } 235 236 for (Entry<Character, String> extension : extensionsMap.entrySet()) { 237 builder.setExtension(extension.getKey(), extension.getValue()); 238 } 239 240 return builder.build(); 241 } 242 243 public static Locale[] localesFromStrings(String[] localeNames) { 244 // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera. 245 // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about 246 // both so that we never need to convert back when talking to it. 247 LinkedHashSet<Locale> set = new LinkedHashSet<Locale>(); 248 for (String localeName : localeNames) { 249 set.add(localeFromIcuLocaleId(localeName)); 250 } 251 return set.toArray(new Locale[set.size()]); 252 } 253 254 public static Locale[] getAvailableLocales() { 255 if (availableLocalesCache == null) { 256 availableLocalesCache = localesFromStrings(getAvailableLocalesNative()); 257 } 258 return availableLocalesCache.clone(); 259 } 260 261 public static Locale[] getAvailableBreakIteratorLocales() { 262 return localesFromStrings(getAvailableBreakIteratorLocalesNative()); 263 } 264 265 public static Locale[] getAvailableCalendarLocales() { 266 return localesFromStrings(getAvailableCalendarLocalesNative()); 267 } 268 269 public static Locale[] getAvailableCollatorLocales() { 270 return localesFromStrings(getAvailableCollatorLocalesNative()); 271 } 272 273 public static Locale[] getAvailableDateFormatLocales() { 274 return localesFromStrings(getAvailableDateFormatLocalesNative()); 275 } 276 277 public static Locale[] getAvailableDateFormatSymbolsLocales() { 278 return getAvailableDateFormatLocales(); 279 } 280 281 public static Locale[] getAvailableDecimalFormatSymbolsLocales() { 282 return getAvailableNumberFormatLocales(); 283 } 284 285 public static Locale[] getAvailableNumberFormatLocales() { 286 return localesFromStrings(getAvailableNumberFormatLocalesNative()); 287 } 288 289 public static String getBestDateTimePattern(String skeleton, Locale locale) { 290 String languageTag = locale.toLanguageTag(); 291 String key = skeleton + "\t" + languageTag; 292 synchronized (CACHED_PATTERNS) { 293 String pattern = CACHED_PATTERNS.get(key); 294 if (pattern == null) { 295 pattern = getBestDateTimePatternNative(skeleton, languageTag); 296 CACHED_PATTERNS.put(key, pattern); 297 } 298 return pattern; 299 } 300 } 301 302 private static native String getBestDateTimePatternNative(String skeleton, String languageTag); 303 304 public static char[] getDateFormatOrder(String pattern) { 305 char[] result = new char[3]; 306 int resultIndex = 0; 307 boolean sawDay = false; 308 boolean sawMonth = false; 309 boolean sawYear = false; 310 311 for (int i = 0; i < pattern.length(); ++i) { 312 char ch = pattern.charAt(i); 313 if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') { 314 if (ch == 'd' && !sawDay) { 315 result[resultIndex++] = 'd'; 316 sawDay = true; 317 } else if ((ch == 'L' || ch == 'M') && !sawMonth) { 318 result[resultIndex++] = 'M'; 319 sawMonth = true; 320 } else if ((ch == 'y') && !sawYear) { 321 result[resultIndex++] = 'y'; 322 sawYear = true; 323 } 324 } else if (ch == 'G') { 325 // Ignore the era specifier, if present. 326 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 327 throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern); 328 } else if (ch == '\'') { 329 if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') { 330 ++i; 331 } else { 332 i = pattern.indexOf('\'', i + 1); 333 if (i == -1) { 334 throw new IllegalArgumentException("Bad quoting in " + pattern); 335 } 336 ++i; 337 } 338 } else { 339 // Ignore spaces and punctuation. 340 } 341 } 342 return result; 343 } 344 345 /** 346 * Returns the version of the CLDR data in use, such as "22.1.1". 347 */ 348 public static native String getCldrVersion(); 349 350 /** 351 * Returns the icu4c version in use, such as "50.1.1". 352 */ 353 public static native String getIcuVersion(); 354 355 /** 356 * Returns the Unicode version our ICU supports, such as "6.2". 357 */ 358 public static native String getUnicodeVersion(); 359 360 // --- Case mapping. 361 362 public static String toLowerCase(String s, Locale locale) { 363 return toLowerCase(s, locale.toLanguageTag()); 364 } 365 366 private static native String toLowerCase(String s, String languageTag); 367 368 public static String toUpperCase(String s, Locale locale) { 369 return toUpperCase(s, locale.toLanguageTag()); 370 } 371 372 private static native String toUpperCase(String s, String languageTag); 373 374 // --- Errors. 375 376 // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU. 377 public static final int U_ZERO_ERROR = 0; 378 public static final int U_INVALID_CHAR_FOUND = 10; 379 public static final int U_TRUNCATED_CHAR_FOUND = 11; 380 public static final int U_ILLEGAL_CHAR_FOUND = 12; 381 public static final int U_BUFFER_OVERFLOW_ERROR = 15; 382 383 public static boolean U_FAILURE(int error) { 384 return error > U_ZERO_ERROR; 385 } 386 387 // --- Native methods accessing ICU's database. 388 389 private static native String[] getAvailableBreakIteratorLocalesNative(); 390 private static native String[] getAvailableCalendarLocalesNative(); 391 private static native String[] getAvailableCollatorLocalesNative(); 392 private static native String[] getAvailableDateFormatLocalesNative(); 393 private static native String[] getAvailableLocalesNative(); 394 private static native String[] getAvailableNumberFormatLocalesNative(); 395 396 public static native String[] getAvailableCurrencyCodes(); 397 public static native String getCurrencyCode(String countryCode); 398 399 public static String getCurrencyDisplayName(Locale locale, String currencyCode) { 400 return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode); 401 } 402 403 private static native String getCurrencyDisplayName(String languageTag, String currencyCode); 404 405 public static native int getCurrencyFractionDigits(String currencyCode); 406 public static native int getCurrencyNumericCode(String currencyCode); 407 408 public static String getCurrencySymbol(Locale locale, String currencyCode) { 409 return getCurrencySymbol(locale.toLanguageTag(), currencyCode); 410 } 411 412 private static native String getCurrencySymbol(String languageTag, String currencyCode); 413 414 public static String getDisplayCountry(Locale targetLocale, Locale locale) { 415 return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 416 } 417 418 private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag); 419 420 public static String getDisplayLanguage(Locale targetLocale, Locale locale) { 421 return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 422 } 423 424 private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag); 425 426 public static String getDisplayVariant(Locale targetLocale, Locale locale) { 427 return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 428 } 429 430 private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag); 431 432 public static String getDisplayScript(Locale targetLocale, Locale locale) { 433 return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 434 } 435 436 private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag); 437 438 public static native String getISO3Country(String languageTag); 439 440 public static native String getISO3Language(String languageTag); 441 442 public static Locale addLikelySubtags(Locale locale) { 443 return Locale.forLanguageTag(addLikelySubtags(locale.toLanguageTag()).replace('_', '-')); 444 } 445 446 /** 447 * @deprecated use {@link #addLikelySubtags(java.util.Locale)} instead. 448 */ 449 @Deprecated 450 public static native String addLikelySubtags(String locale); 451 452 /** 453 * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept 454 * around only for the support library. 455 */ 456 @Deprecated 457 public static native String getScript(String locale); 458 459 private static native String[] getISOLanguagesNative(); 460 private static native String[] getISOCountriesNative(); 461 462 static native boolean initLocaleDataNative(String languageTag, LocaleData result); 463 464 /** 465 * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US 466 */ 467 public static native void setDefaultLocale(String languageTag); 468 469 /** 470 * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US. 471 */ 472 public static native String getDefaultLocale(); 473 474 /** Returns the TZData version as reported by ICU4C. */ 475 public static native String getTZDataVersion(); 476 } 477