Home | History | Annotate | Download | only in coll
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1996-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *
     11 * CollationLoader.java, ported from ucol_res.cpp
     12 *
     13 * created by: Markus W. Scherer
     14 */
     15 
     16 package com.ibm.icu.impl.coll;
     17 
     18 import java.io.IOException;
     19 import java.nio.ByteBuffer;
     20 import java.util.MissingResourceException;
     21 
     22 import com.ibm.icu.impl.ICUData;
     23 import com.ibm.icu.impl.ICUResourceBundle;
     24 import com.ibm.icu.util.ICUUncheckedIOException;
     25 import com.ibm.icu.util.Output;
     26 import com.ibm.icu.util.ULocale;
     27 import com.ibm.icu.util.UResourceBundle;
     28 
     29 /**
     30  * Convenience string denoting the Collation data tree
     31  */
     32 public final class CollationLoader {
     33 
     34     // not implemented, all methods are static
     35     private CollationLoader() {
     36     }
     37 
     38     private static volatile String rootRules = null;
     39 
     40     private static void loadRootRules() {
     41         if (rootRules != null) {
     42             return;
     43         }
     44         synchronized(CollationLoader.class) {
     45             if (rootRules == null) {
     46                 UResourceBundle rootBundle = UResourceBundle.getBundleInstance(
     47                         ICUData.ICU_COLLATION_BASE_NAME, ULocale.ROOT);
     48                 rootRules = rootBundle.getString("UCARules");
     49             }
     50         }
     51     }
     52 
     53     // C++: static void appendRootRules(UnicodeString &s)
     54     public static String getRootRules() {
     55         loadRootRules();
     56         return rootRules;
     57     }
     58 
     59     /**
     60      * Simpler/faster methods for ASCII than ones based on Unicode data.
     61      * TODO: There should be code like this somewhere already??
     62      */
     63     private static final class ASCII {
     64         static String toLowerCase(String s) {
     65             for (int i = 0; i < s.length(); ++i) {
     66                 char c = s.charAt(i);
     67                 if ('A' <= c && c <= 'Z') {
     68                     StringBuilder sb = new StringBuilder(s.length());
     69                     sb.append(s, 0, i).append((char)(c + 0x20));
     70                     while (++i < s.length()) {
     71                         c = s.charAt(i);
     72                         if ('A' <= c && c <= 'Z') { c = (char)(c + 0x20); }
     73                         sb.append(c);
     74                     }
     75                     return sb.toString();
     76                 }
     77             }
     78             return s;
     79         }
     80     }
     81 
     82     static String loadRules(ULocale locale, String collationType) {
     83         UResourceBundle bundle = UResourceBundle.getBundleInstance(
     84                 ICUData.ICU_COLLATION_BASE_NAME, locale);
     85         UResourceBundle data = ((ICUResourceBundle)bundle).getWithFallback(
     86                 "collations/" + ASCII.toLowerCase(collationType));
     87         String rules = data.getString("Sequence");
     88         return rules;
     89     }
     90 
     91     private static final UResourceBundle findWithFallback(UResourceBundle table, String entryName) {
     92         return ((ICUResourceBundle)table).findWithFallback(entryName);
     93     }
     94 
     95     public static CollationTailoring loadTailoring(ULocale locale, Output<ULocale> outValidLocale) {
     96 
     97         // Java porting note: ICU4J getWithFallback/getStringWithFallback currently does not
     98         // work well when alias table is involved in a resource path, unless full path is specified.
     99         // For now, collation resources does not contain such data, so the code below should work fine.
    100 
    101         CollationTailoring root = CollationRoot.getRoot();
    102         String localeName = locale.getName();
    103         if (localeName.length() == 0 || localeName.equals("root")) {
    104             outValidLocale.value = ULocale.ROOT;
    105             return root;
    106         }
    107 
    108         UResourceBundle bundle = null;
    109         try {
    110             bundle = ICUResourceBundle.getBundleInstance(
    111                     ICUData.ICU_COLLATION_BASE_NAME, locale,
    112                     ICUResourceBundle.OpenType.LOCALE_ROOT);
    113         } catch (MissingResourceException e) {
    114             outValidLocale.value = ULocale.ROOT;
    115             return root;
    116         }
    117 
    118         ULocale validLocale = bundle.getULocale();
    119         // Normalize the root locale. See
    120         // http://bugs.icu-project.org/trac/ticket/10715
    121         String validLocaleName = validLocale.getName();
    122         if (validLocaleName.length() == 0 || validLocaleName.equals("root")) {
    123             validLocale = ULocale.ROOT;
    124         }
    125         outValidLocale.value = validLocale;
    126 
    127         // There are zero or more tailorings in the collations table.
    128         UResourceBundle collations;
    129         try {
    130             collations = bundle.get("collations");
    131             if (collations == null) {
    132                 return root;
    133             }
    134         } catch(MissingResourceException ignored) {
    135             return root;
    136         }
    137 
    138         // Fetch the collation type from the locale ID and the default type from the data.
    139         String type = locale.getKeywordValue("collation");
    140         String defaultType = "standard";
    141 
    142         String defT = ((ICUResourceBundle)collations).findStringWithFallback("default");
    143         if (defT != null) {
    144             defaultType = defT;
    145         }
    146 
    147         if (type == null || type.equals("default")) {
    148             type = defaultType;
    149         } else {
    150             type = ASCII.toLowerCase(type);
    151         }
    152 
    153         // Load the collations/type tailoring, with type fallback.
    154 
    155         // Java porting note: typeFallback is used for setting U_USING_DEFAULT_WARNING in
    156         // ICU4C, but not used by ICU4J
    157 
    158         // boolean typeFallback = false;
    159         UResourceBundle data = findWithFallback(collations, type);
    160         if (data == null &&
    161                 type.length() > 6 && type.startsWith("search")) {
    162             // fall back from something like "searchjl" to "search"
    163             // typeFallback = true;
    164             type = "search";
    165             data = findWithFallback(collations, type);
    166         }
    167 
    168         if (data == null && !type.equals(defaultType)) {
    169             // fall back to the default type
    170             // typeFallback = true;
    171             type = defaultType;
    172             data = findWithFallback(collations, type);
    173         }
    174 
    175         if (data == null && !type.equals("standard")) {
    176             // fall back to the "standard" type
    177             // typeFallback = true;
    178             type = "standard";
    179             data = findWithFallback(collations, type);
    180         }
    181 
    182         if (data == null) {
    183             return root;
    184         }
    185 
    186         // Is this the same as the root collator? If so, then use that instead.
    187         ULocale actualLocale = data.getULocale();
    188         // http://bugs.icu-project.org/trac/ticket/10715 ICUResourceBundle(root).getULocale() != ULocale.ROOT
    189         // Therefore not just if (actualLocale.equals(ULocale.ROOT) && type.equals("standard")) {
    190         String actualLocaleName = actualLocale.getName();
    191         if (actualLocaleName.length() == 0 || actualLocaleName.equals("root")) {
    192             actualLocale = ULocale.ROOT;
    193             if (type.equals("standard")) {
    194                 return root;
    195             }
    196         }
    197 
    198         CollationTailoring t = new CollationTailoring(root.settings);
    199         t.actualLocale = actualLocale;
    200 
    201         // deserialize
    202         UResourceBundle binary = data.get("%%CollationBin");
    203         ByteBuffer inBytes = binary.getBinary();
    204         try {
    205             CollationDataReader.read(root, inBytes, t);
    206         } catch (IOException e) {
    207             throw new ICUUncheckedIOException("Failed to load collation tailoring data for locale:"
    208                     + actualLocale + " type:" + type, e);
    209         }
    210 
    211         // Try to fetch the optional rules string.
    212         try {
    213             t.setRulesResource(data.get("Sequence"));
    214         } catch(MissingResourceException ignored) {
    215         }
    216 
    217         // Set the collation types on the informational locales,
    218         // except when they match the default types (for brevity and backwards compatibility).
    219         // For the valid locale, suppress the default type.
    220         if (!type.equals(defaultType)) {
    221             outValidLocale.value = validLocale.setKeywordValue("collation", type);
    222         }
    223 
    224         // For the actual locale, suppress the default type *according to the actual locale*.
    225         // For example, zh has default=pinyin and contains all of the Chinese tailorings.
    226         // zh_Hant has default=stroke but has no other data.
    227         // For the valid locale "zh_Hant" we need to suppress stroke.
    228         // For the actual locale "zh" we need to suppress pinyin instead.
    229         if (!actualLocale.equals(validLocale)) {
    230             // Opening a bundle for the actual locale should always succeed.
    231             UResourceBundle actualBundle = UResourceBundle.getBundleInstance(
    232                     ICUData.ICU_COLLATION_BASE_NAME, actualLocale);
    233             defT = ((ICUResourceBundle)actualBundle).findStringWithFallback("collations/default");
    234             if (defT != null) {
    235                 defaultType = defT;
    236             }
    237         }
    238 
    239         if (!type.equals(defaultType)) {
    240             t.actualLocale = t.actualLocale.setKeywordValue("collation", type);
    241         }
    242 
    243         // if (typeFallback) {
    244         //     ICU4C implementation sets U_USING_DEFAULT_WARNING here
    245         // }
    246 
    247         return t;
    248     }
    249 }
    250