Home | History | Annotate | Download | only in icu
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package libcore.icu;
     18 
     19 import java.util.Collections;
     20 import java.util.HashMap;
     21 import java.util.HashSet;
     22 import java.util.LinkedHashSet;
     23 import java.util.Locale;
     24 import java.util.Map;
     25 import java.util.Map.Entry;
     26 import java.util.Set;
     27 import libcore.util.BasicLruCache;
     28 
     29 /**
     30  * Makes ICU data accessible to Java.
     31  */
     32 public final class ICU {
     33   private static final BasicLruCache<String, String> CACHED_PATTERNS =
     34       new BasicLruCache<String, String>(8);
     35 
     36   private static Locale[] availableLocalesCache;
     37 
     38   private static String[] isoCountries;
     39 
     40   private static String[] isoLanguages;
     41 
     42   /**
     43    * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache.
     44    */
     45   public static String[] getISOLanguages() {
     46     if (isoLanguages == null) {
     47       isoLanguages = getISOLanguagesNative();
     48     }
     49     return isoLanguages.clone();
     50   }
     51 
     52   /**
     53    * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache.
     54    */
     55   public static String[] getISOCountries() {
     56     if (isoCountries == null) {
     57       isoCountries = getISOCountriesNative();
     58     }
     59     return isoCountries.clone();
     60   }
     61 
     62   private static final int IDX_LANGUAGE = 0;
     63   private static final int IDX_SCRIPT = 1;
     64   private static final int IDX_REGION = 2;
     65   private static final int IDX_VARIANT = 3;
     66 
     67   /*
     68    * Parse the {Language, Script, Region, Variant*} section of the ICU locale
     69    * ID. This is the bit that appears before the keyword separate "@". The general
     70    * structure is a series of ASCII alphanumeric strings (subtags)
     71    * separated by underscores.
     72    *
     73    * Each subtag is interpreted according to its position in the list of subtags
     74    * AND its length (groan...). The various cases are explained in comments
     75    * below.
     76    */
     77   private static void parseLangScriptRegionAndVariants(String string,
     78           String[] outputArray) {
     79     final int first = string.indexOf('_');
     80     final int second = string.indexOf('_', first + 1);
     81     final int third = string.indexOf('_', second + 1);
     82 
     83     if (first == -1) {
     84       outputArray[IDX_LANGUAGE] = string;
     85     } else if (second == -1) {
     86       // Language and country ("ja_JP") OR
     87       // Language and script ("en_Latn") OR
     88       // Language and variant ("en_POSIX").
     89 
     90       outputArray[IDX_LANGUAGE] = string.substring(0, first);
     91       final String secondString = string.substring(first + 1);
     92 
     93       if (secondString.length() == 4) {
     94           // 4 Letter ISO script code.
     95           outputArray[IDX_SCRIPT] = secondString;
     96       } else if (secondString.length() == 2 || secondString.length() == 3) {
     97           // 2 or 3 Letter region code.
     98           outputArray[IDX_REGION] = secondString;
     99       } else {
    100           // If we're here, the length of the second half is either 1 or greater
    101           // than 5. Assume that ICU won't hand us malformed tags, and therefore
    102           // assume the rest of the string is a series of variant tags.
    103           outputArray[IDX_VARIANT] = secondString;
    104       }
    105     } else if (third == -1) {
    106       // Language and country and variant ("ja_JP_TRADITIONAL") OR
    107       // Language and script and variant ("en_Latn_POSIX") OR
    108       // Language and script and region ("en_Latn_US"). OR
    109       // Language and variant with multiple subtags ("en_POSIX_XISOP")
    110 
    111       outputArray[IDX_LANGUAGE] = string.substring(0, first);
    112       final String secondString = string.substring(first + 1, second);
    113       final String thirdString = string.substring(second + 1);
    114 
    115       if (secondString.length() == 4) {
    116           // The second subtag is a script.
    117           outputArray[IDX_SCRIPT] = secondString;
    118 
    119           // The third subtag can be either a region or a variant, depending
    120           // on its length.
    121           if (thirdString.length() == 2 || thirdString.length() == 3 ||
    122                   thirdString.isEmpty()) {
    123               outputArray[IDX_REGION] = thirdString;
    124           } else {
    125               outputArray[IDX_VARIANT] = thirdString;
    126           }
    127       } else if (secondString.isEmpty() ||
    128               secondString.length() == 2 || secondString.length() == 3) {
    129           // The second string is a region, and the third a variant.
    130           outputArray[IDX_REGION] = secondString;
    131           outputArray[IDX_VARIANT] = thirdString;
    132       } else {
    133           // Variant with multiple subtags.
    134           outputArray[IDX_VARIANT] = string.substring(first + 1);
    135       }
    136     } else {
    137       // Language, script, region and variant with 1 or more subtags
    138       // ("en_Latn_US_POSIX") OR
    139       // Language, region and variant with 2 or more subtags
    140       // (en_US_POSIX_VARIANT).
    141       outputArray[IDX_LANGUAGE] = string.substring(0, first);
    142       final String secondString = string.substring(first + 1, second);
    143       if (secondString.length() == 4) {
    144           outputArray[IDX_SCRIPT] = secondString;
    145           outputArray[IDX_REGION] = string.substring(second + 1, third);
    146           outputArray[IDX_VARIANT] = string.substring(third + 1);
    147       } else {
    148           outputArray[IDX_REGION] = secondString;
    149           outputArray[IDX_VARIANT] = string.substring(second + 1);
    150       }
    151     }
    152   }
    153 
    154   /**
    155    * Returns the appropriate {@code Locale} given a {@code String} of the form returned
    156    * by {@code toString}. This is very lenient, and doesn't care what's between the underscores:
    157    * this method can parse strings that {@code Locale.toString} won't produce.
    158    * Used to remove duplication.
    159    */
    160   public static Locale localeFromIcuLocaleId(String localeId) {
    161     // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h).
    162     final int extensionsIndex = localeId.indexOf('@');
    163 
    164     Map<Character, String> extensionsMap = Collections.EMPTY_MAP;
    165     Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP;
    166     Set<String> unicodeAttributeSet = Collections.EMPTY_SET;
    167 
    168     if (extensionsIndex != -1) {
    169       extensionsMap = new HashMap<Character, String>();
    170       unicodeKeywordsMap = new HashMap<String, String>();
    171       unicodeAttributeSet = new HashSet<String>();
    172 
    173       // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string
    174       // containing all "keywords" it could parse. An ICU keyword is a key-value pair
    175       // separated by an "=" (ULOC_KEYWORD_ASSIGN).
    176       //
    177       // Each keyword item can be one of three things :
    178       // - A unicode extension attribute list: In this case the item key is "attribute"
    179       //   and the value is a hyphen separated list of unicode attributes.
    180       // - A unicode extension keyword: In this case, the item key will be larger than
    181       //   1 char in length, and the value will be the unicode extension value.
    182       // - A BCP-47 extension subtag: In this case, the item key will be exactly one
    183       //   char in length, and the value will be a sequence of unparsed subtags that
    184       //   represent the extension.
    185       //
    186       // Note that this implies that unicode extension keywords are "promoted" to
    187       // to the same namespace as the top level extension subtags and their values.
    188       // There can't be any collisions in practice because the BCP-47 spec imposes
    189       // restrictions on their lengths.
    190       final String extensionsString = localeId.substring(extensionsIndex + 1);
    191       final String[] extensions = extensionsString.split(";");
    192       for (String extension : extensions) {
    193         // This is the special key for the unicode attributes
    194         if (extension.startsWith("attribute=")) {
    195           String unicodeAttributeValues = extension.substring("attribute=".length());
    196           for (String unicodeAttribute : unicodeAttributeValues.split("-")) {
    197             unicodeAttributeSet.add(unicodeAttribute);
    198           }
    199         } else {
    200           final int separatorIndex = extension.indexOf('=');
    201 
    202           if (separatorIndex == 1) {
    203             // This is a BCP-47 extension subtag.
    204             final String value = extension.substring(2);
    205             final char extensionId = extension.charAt(0);
    206 
    207             extensionsMap.put(extensionId, value);
    208           } else {
    209             // This is a unicode extension keyword.
    210             unicodeKeywordsMap.put(extension.substring(0, separatorIndex),
    211             extension.substring(separatorIndex + 1));
    212           }
    213         }
    214       }
    215     }
    216 
    217     final String[] outputArray = new String[] { "", "", "", "" };
    218     if (extensionsIndex == -1) {
    219       parseLangScriptRegionAndVariants(localeId, outputArray);
    220     } else {
    221       parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex),
    222           outputArray);
    223     }
    224     Locale.Builder builder = new Locale.Builder();
    225     builder.setLanguage(outputArray[IDX_LANGUAGE]);
    226     builder.setRegion(outputArray[IDX_REGION]);
    227     builder.setVariant(outputArray[IDX_VARIANT]);
    228     builder.setScript(outputArray[IDX_SCRIPT]);
    229     for (String attribute : unicodeAttributeSet) {
    230       builder.addUnicodeLocaleAttribute(attribute);
    231     }
    232     for (Entry<String, String> keyword : unicodeKeywordsMap.entrySet()) {
    233       builder.setUnicodeLocaleKeyword(keyword.getKey(), keyword.getValue());
    234     }
    235 
    236     for (Entry<Character, String> extension : extensionsMap.entrySet()) {
    237       builder.setExtension(extension.getKey(), extension.getValue());
    238     }
    239 
    240     return builder.build();
    241   }
    242 
    243   public static Locale[] localesFromStrings(String[] localeNames) {
    244     // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera.
    245     // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about
    246     // both so that we never need to convert back when talking to it.
    247     LinkedHashSet<Locale> set = new LinkedHashSet<Locale>();
    248     for (String localeName : localeNames) {
    249       set.add(localeFromIcuLocaleId(localeName));
    250     }
    251     return set.toArray(new Locale[set.size()]);
    252   }
    253 
    254   public static Locale[] getAvailableLocales() {
    255     if (availableLocalesCache == null) {
    256       availableLocalesCache = localesFromStrings(getAvailableLocalesNative());
    257     }
    258     return availableLocalesCache.clone();
    259   }
    260 
    261   public static Locale[] getAvailableBreakIteratorLocales() {
    262     return localesFromStrings(getAvailableBreakIteratorLocalesNative());
    263   }
    264 
    265   public static Locale[] getAvailableCalendarLocales() {
    266     return localesFromStrings(getAvailableCalendarLocalesNative());
    267   }
    268 
    269   public static Locale[] getAvailableCollatorLocales() {
    270     return localesFromStrings(getAvailableCollatorLocalesNative());
    271   }
    272 
    273   public static Locale[] getAvailableDateFormatLocales() {
    274     return localesFromStrings(getAvailableDateFormatLocalesNative());
    275   }
    276 
    277   public static Locale[] getAvailableDateFormatSymbolsLocales() {
    278     return getAvailableDateFormatLocales();
    279   }
    280 
    281   public static Locale[] getAvailableDecimalFormatSymbolsLocales() {
    282     return getAvailableNumberFormatLocales();
    283   }
    284 
    285   public static Locale[] getAvailableNumberFormatLocales() {
    286     return localesFromStrings(getAvailableNumberFormatLocalesNative());
    287   }
    288 
    289   public static String getBestDateTimePattern(String skeleton, Locale locale) {
    290     String languageTag = locale.toLanguageTag();
    291     String key = skeleton + "\t" + languageTag;
    292     synchronized (CACHED_PATTERNS) {
    293       String pattern = CACHED_PATTERNS.get(key);
    294       if (pattern == null) {
    295         pattern = getBestDateTimePatternNative(skeleton, languageTag);
    296         CACHED_PATTERNS.put(key, pattern);
    297       }
    298       return pattern;
    299     }
    300   }
    301 
    302   private static native String getBestDateTimePatternNative(String skeleton, String languageTag);
    303 
    304   public static char[] getDateFormatOrder(String pattern) {
    305     char[] result = new char[3];
    306     int resultIndex = 0;
    307     boolean sawDay = false;
    308     boolean sawMonth = false;
    309     boolean sawYear = false;
    310 
    311     for (int i = 0; i < pattern.length(); ++i) {
    312       char ch = pattern.charAt(i);
    313       if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') {
    314         if (ch == 'd' && !sawDay) {
    315           result[resultIndex++] = 'd';
    316           sawDay = true;
    317         } else if ((ch == 'L' || ch == 'M') && !sawMonth) {
    318           result[resultIndex++] = 'M';
    319           sawMonth = true;
    320         } else if ((ch == 'y') && !sawYear) {
    321           result[resultIndex++] = 'y';
    322           sawYear = true;
    323         }
    324       } else if (ch == 'G') {
    325         // Ignore the era specifier, if present.
    326       } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
    327         throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern);
    328       } else if (ch == '\'') {
    329         if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') {
    330           ++i;
    331         } else {
    332           i = pattern.indexOf('\'', i + 1);
    333           if (i == -1) {
    334             throw new IllegalArgumentException("Bad quoting in " + pattern);
    335           }
    336           ++i;
    337         }
    338       } else {
    339         // Ignore spaces and punctuation.
    340       }
    341     }
    342     return result;
    343   }
    344 
    345   /**
    346    * Returns the version of the CLDR data in use, such as "22.1.1".
    347    */
    348   public static native String getCldrVersion();
    349 
    350   /**
    351    * Returns the icu4c version in use, such as "50.1.1".
    352    */
    353   public static native String getIcuVersion();
    354 
    355   /**
    356    * Returns the Unicode version our ICU supports, such as "6.2".
    357    */
    358   public static native String getUnicodeVersion();
    359 
    360   // --- Case mapping.
    361 
    362   public static String toLowerCase(String s, Locale locale) {
    363     return toLowerCase(s, locale.toLanguageTag());
    364   }
    365 
    366   private static native String toLowerCase(String s, String languageTag);
    367 
    368   public static String toUpperCase(String s, Locale locale) {
    369     return toUpperCase(s, locale.toLanguageTag());
    370   }
    371 
    372   private static native String toUpperCase(String s, String languageTag);
    373 
    374   // --- Errors.
    375 
    376   // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU.
    377   public static final int U_ZERO_ERROR = 0;
    378   public static final int U_INVALID_CHAR_FOUND = 10;
    379   public static final int U_TRUNCATED_CHAR_FOUND = 11;
    380   public static final int U_ILLEGAL_CHAR_FOUND = 12;
    381   public static final int U_BUFFER_OVERFLOW_ERROR = 15;
    382 
    383   public static boolean U_FAILURE(int error) {
    384     return error > U_ZERO_ERROR;
    385   }
    386 
    387   // --- Native methods accessing ICU's database.
    388 
    389   private static native String[] getAvailableBreakIteratorLocalesNative();
    390   private static native String[] getAvailableCalendarLocalesNative();
    391   private static native String[] getAvailableCollatorLocalesNative();
    392   private static native String[] getAvailableDateFormatLocalesNative();
    393   private static native String[] getAvailableLocalesNative();
    394   private static native String[] getAvailableNumberFormatLocalesNative();
    395 
    396   public static native String[] getAvailableCurrencyCodes();
    397   public static native String getCurrencyCode(String countryCode);
    398 
    399   public static String getCurrencyDisplayName(Locale locale, String currencyCode) {
    400     return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode);
    401   }
    402 
    403   private static native String getCurrencyDisplayName(String languageTag, String currencyCode);
    404 
    405   public static native int getCurrencyFractionDigits(String currencyCode);
    406   public static native int getCurrencyNumericCode(String currencyCode);
    407 
    408   public static String getCurrencySymbol(Locale locale, String currencyCode) {
    409     return getCurrencySymbol(locale.toLanguageTag(), currencyCode);
    410   }
    411 
    412   private static native String getCurrencySymbol(String languageTag, String currencyCode);
    413 
    414   public static String getDisplayCountry(Locale targetLocale, Locale locale) {
    415     return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
    416   }
    417 
    418   private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag);
    419 
    420   public static String getDisplayLanguage(Locale targetLocale, Locale locale) {
    421     return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
    422   }
    423 
    424   private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag);
    425 
    426   public static String getDisplayVariant(Locale targetLocale, Locale locale) {
    427     return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
    428   }
    429 
    430   private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag);
    431 
    432   public static String getDisplayScript(Locale targetLocale, Locale locale) {
    433     return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
    434   }
    435 
    436   private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag);
    437 
    438   public static native String getISO3Country(String languageTag);
    439 
    440   public static native String getISO3Language(String languageTag);
    441 
    442   public static Locale addLikelySubtags(Locale locale) {
    443       return Locale.forLanguageTag(addLikelySubtags(locale.toLanguageTag()).replace('_', '-'));
    444   }
    445 
    446   /**
    447    * @deprecated use {@link #addLikelySubtags(java.util.Locale)} instead.
    448    */
    449   @Deprecated
    450   public static native String addLikelySubtags(String locale);
    451 
    452   /**
    453    * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept
    454    *     around only for the support library.
    455    */
    456   @Deprecated
    457   public static native String getScript(String locale);
    458 
    459   private static native String[] getISOLanguagesNative();
    460   private static native String[] getISOCountriesNative();
    461 
    462   static native boolean initLocaleDataNative(String languageTag, LocaleData result);
    463 
    464   /**
    465    * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US
    466    */
    467   public static native void setDefaultLocale(String languageTag);
    468 
    469   /**
    470    * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US.
    471    */
    472   public static native String getDefaultLocale();
    473 
    474   /** Returns the TZData version as reported by ICU4C. */
    475   public static native String getTZDataVersion();
    476 }
    477