Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 1996-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  ucol_res.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 * Description:
     12 * This file contains dependencies that the collation run-time doesn't normally
     13 * need. This mainly contains resource bundle usage and collation meta information
     14 *
     15 * Modification history
     16 * Date        Name      Comments
     17 * 1996-1999   various members of ICU team maintained C API for collation framework
     18 * 02/16/2001  synwee    Added internal method getPrevSpecialCE
     19 * 03/01/2001  synwee    Added maxexpansion functionality.
     20 * 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compliant
     21 * 12/08/2004  grhoten   Split part of ucol.cpp into ucol_res.cpp
     22 * 2012-2014   markus    Rewritten in C++ again.
     23 */
     24 
     25 #include "unicode/utypes.h"
     26 
     27 #if !UCONFIG_NO_COLLATION
     28 
     29 #include "unicode/coll.h"
     30 #include "unicode/localpointer.h"
     31 #include "unicode/locid.h"
     32 #include "unicode/tblcoll.h"
     33 #include "unicode/ucol.h"
     34 #include "unicode/uloc.h"
     35 #include "unicode/unistr.h"
     36 #include "unicode/ures.h"
     37 #include "cmemory.h"
     38 #include "cstring.h"
     39 #include "collationdatareader.h"
     40 #include "collationroot.h"
     41 #include "collationtailoring.h"
     42 #include "putilimp.h"
     43 #include "uassert.h"
     44 #include "ucln_in.h"
     45 #include "ucol_imp.h"
     46 #include "uenumimp.h"
     47 #include "ulist.h"
     48 #include "umutex.h"
     49 #include "uresimp.h"
     50 #include "ustrenum.h"
     51 #include "utracimp.h"
     52 
     53 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     54 
     55 U_NAMESPACE_BEGIN
     56 
     57 namespace {
     58 
     59 static const UChar *rootRules = NULL;
     60 static int32_t rootRulesLength = 0;
     61 static UResourceBundle *rootBundle = NULL;
     62 static UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
     63 
     64 }  // namespace
     65 
     66 U_CDECL_BEGIN
     67 
     68 static UBool U_CALLCONV
     69 ucol_res_cleanup() {
     70     rootRules = NULL;
     71     rootRulesLength = 0;
     72     ures_close(rootBundle);
     73     rootBundle = NULL;
     74     gInitOnce.reset();
     75     return TRUE;
     76 }
     77 
     78 U_CDECL_END
     79 
     80 void
     81 CollationLoader::loadRootRules(UErrorCode &errorCode) {
     82     if(U_FAILURE(errorCode)) { return; }
     83     rootBundle = ures_open(U_ICUDATA_COLL, kRootLocaleName, &errorCode);
     84     if(U_FAILURE(errorCode)) { return; }
     85     rootRules = ures_getStringByKey(rootBundle, "UCARules", &rootRulesLength, &errorCode);
     86     if(U_FAILURE(errorCode)) {
     87         ures_close(rootBundle);
     88         rootBundle = NULL;
     89         return;
     90     }
     91     ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
     92 }
     93 
     94 void
     95 CollationLoader::appendRootRules(UnicodeString &s) {
     96     UErrorCode errorCode = U_ZERO_ERROR;
     97     umtx_initOnce(gInitOnce, CollationLoader::loadRootRules, errorCode);
     98     if(U_SUCCESS(errorCode)) {
     99         s.append(rootRules, rootRulesLength);
    100     }
    101 }
    102 
    103 UnicodeString *
    104 CollationLoader::loadRules(const char *localeID, const char *collationType, UErrorCode &errorCode) {
    105     if(U_FAILURE(errorCode)) { return NULL; }
    106     U_ASSERT(collationType != NULL && *collationType != 0);
    107     // Copy the type for lowercasing.
    108     char type[16];
    109     int32_t typeLength = uprv_strlen(collationType);
    110     if(typeLength >= LENGTHOF(type)) {
    111         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    112         return NULL;
    113     }
    114     uprv_memcpy(type, collationType, typeLength + 1);
    115     T_CString_toLowerCase(type);
    116 
    117     LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, localeID, &errorCode));
    118     LocalUResourceBundlePointer collations(
    119             ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode));
    120     LocalUResourceBundlePointer data(
    121             ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
    122     int32_t length;
    123     const UChar *s =  ures_getStringByKey(data.getAlias(), "Sequence", &length, &errorCode);
    124     if(U_FAILURE(errorCode)) { return NULL; }
    125 
    126     // No string pointer aliasing so that we need not hold onto the resource bundle.
    127     UnicodeString *rules = new UnicodeString(s, length);
    128     if(rules == NULL) {
    129         errorCode = U_MEMORY_ALLOCATION_ERROR;
    130         return NULL;
    131     }
    132     return rules;
    133 }
    134 
    135 const CollationTailoring *
    136 CollationLoader::loadTailoring(const Locale &locale, Locale &validLocale, UErrorCode &errorCode) {
    137     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
    138     if(U_FAILURE(errorCode)) { return NULL; }
    139     const char *name = locale.getName();
    140     if(*name == 0 || uprv_strcmp(name, "root") == 0) {
    141         validLocale = Locale::getRoot();
    142         return root;
    143     }
    144 
    145     LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, name, &errorCode));
    146     if(errorCode == U_MISSING_RESOURCE_ERROR) {
    147         errorCode = U_USING_DEFAULT_WARNING;
    148         validLocale = Locale::getRoot();
    149         return root;
    150     }
    151     const char *vLocale = ures_getLocaleByType(bundle.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode);
    152     if(U_FAILURE(errorCode)) { return NULL; }
    153     validLocale = Locale(vLocale);
    154 
    155     // There are zero or more tailorings in the collations table.
    156     LocalUResourceBundlePointer collations(
    157             ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode));
    158     if(errorCode == U_MISSING_RESOURCE_ERROR) {
    159         errorCode = U_USING_DEFAULT_WARNING;
    160         return root;
    161     }
    162     if(U_FAILURE(errorCode)) { return NULL; }
    163 
    164     // Fetch the collation type from the locale ID and the default type from the data.
    165     char type[16];
    166     int32_t typeLength = locale.getKeywordValue("collation", type, LENGTHOF(type) - 1, errorCode);
    167     if(U_FAILURE(errorCode)) {
    168         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    169         return NULL;
    170     }
    171     type[typeLength] = 0;  // in case of U_NOT_TERMINATED_WARNING
    172     char defaultType[16];
    173     {
    174         UErrorCode internalErrorCode = U_ZERO_ERROR;
    175         LocalUResourceBundlePointer def(
    176                 ures_getByKeyWithFallback(collations.getAlias(), "default", NULL,
    177                                           &internalErrorCode));
    178         int32_t length;
    179         const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode);
    180         if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) {
    181             u_UCharsToChars(s, defaultType, length + 1);
    182         } else {
    183             uprv_strcpy(defaultType, "standard");
    184         }
    185     }
    186     if(typeLength == 0 || uprv_strcmp(type, "default") == 0) {
    187         uprv_strcpy(type, defaultType);
    188     } else {
    189         T_CString_toLowerCase(type);
    190     }
    191 
    192     // Load the collations/type tailoring, with type fallback.
    193     UBool typeFallback = FALSE;
    194     LocalUResourceBundlePointer data(
    195             ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
    196     if(errorCode == U_MISSING_RESOURCE_ERROR &&
    197             typeLength > 6 && uprv_strncmp(type, "search", 6) == 0) {
    198         // fall back from something like "searchjl" to "search"
    199         typeFallback = TRUE;
    200         type[6] = 0;
    201         errorCode = U_ZERO_ERROR;
    202         data.adoptInstead(
    203             ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
    204     }
    205     if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, defaultType) != 0) {
    206         // fall back to the default type
    207         typeFallback = TRUE;
    208         uprv_strcpy(type, defaultType);
    209         errorCode = U_ZERO_ERROR;
    210         data.adoptInstead(
    211             ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
    212     }
    213     if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, "standard") != 0) {
    214         // fall back to the "standard" type
    215         typeFallback = TRUE;
    216         uprv_strcpy(type, "standard");
    217         errorCode = U_ZERO_ERROR;
    218         data.adoptInstead(
    219             ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
    220     }
    221     if(errorCode == U_MISSING_RESOURCE_ERROR) {
    222         errorCode = U_USING_DEFAULT_WARNING;
    223         return root;
    224     }
    225     if(U_FAILURE(errorCode)) { return NULL; }
    226 
    227     LocalPointer<CollationTailoring> t(new CollationTailoring(root->settings));
    228     if(t.isNull() || t->isBogus()) {
    229         errorCode = U_MEMORY_ALLOCATION_ERROR;
    230         return NULL;
    231     }
    232 
    233     // Is this the same as the root collator? If so, then use that instead.
    234     const char *actualLocale = ures_getLocaleByType(data.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode);
    235     if(U_FAILURE(errorCode)) { return NULL; }
    236     if((*actualLocale == 0 || uprv_strcmp(actualLocale, "root") == 0) &&
    237             uprv_strcmp(type, "standard") == 0) {
    238         if(typeFallback) {
    239             errorCode = U_USING_DEFAULT_WARNING;
    240         }
    241         return root;
    242     }
    243     t->actualLocale = Locale(actualLocale);
    244 
    245     // deserialize
    246     LocalUResourceBundlePointer binary(
    247             ures_getByKey(data.getAlias(), "%%CollationBin", NULL, &errorCode));
    248     // Note: U_MISSING_RESOURCE_ERROR --> The old code built from rules if available
    249     // but that created undesirable dependencies.
    250     int32_t length;
    251     const uint8_t *inBytes = ures_getBinary(binary.getAlias(), &length, &errorCode);
    252     if(U_FAILURE(errorCode)) { return NULL; }
    253     CollationDataReader::read(root, inBytes, length, *t, errorCode);
    254     // Note: U_COLLATOR_VERSION_MISMATCH --> The old code built from rules if available
    255     // but that created undesirable dependencies.
    256     if(U_FAILURE(errorCode)) { return NULL; }
    257 
    258     // Try to fetch the optional rules string.
    259     {
    260         UErrorCode internalErrorCode = U_ZERO_ERROR;
    261         int32_t length;
    262         const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length,
    263                                              &internalErrorCode);
    264         if(U_SUCCESS(errorCode)) {
    265             t->rules.setTo(TRUE, s, length);
    266         }
    267     }
    268 
    269     // Set the collation types on the informational locales,
    270     // except when they match the default types (for brevity and backwards compatibility).
    271     // For the valid locale, suppress the default type.
    272     if(uprv_strcmp(type, defaultType) != 0) {
    273         validLocale.setKeywordValue("collation", type, errorCode);
    274         if(U_FAILURE(errorCode)) { return NULL; }
    275     }
    276 
    277     // For the actual locale, suppress the default type *according to the actual locale*.
    278     // For example, zh has default=pinyin and contains all of the Chinese tailorings.
    279     // zh_Hant has default=stroke but has no other data.
    280     // For the valid locale "zh_Hant" we need to suppress stroke.
    281     // For the actual locale "zh" we need to suppress pinyin instead.
    282     if(uprv_strcmp(actualLocale, vLocale) != 0) {
    283         // Opening a bundle for the actual locale should always succeed.
    284         LocalUResourceBundlePointer actualBundle(
    285                 ures_open(U_ICUDATA_COLL, actualLocale, &errorCode));
    286         if(U_FAILURE(errorCode)) { return NULL; }
    287         UErrorCode internalErrorCode = U_ZERO_ERROR;
    288         LocalUResourceBundlePointer def(
    289                 ures_getByKeyWithFallback(actualBundle.getAlias(), "collations/default", NULL,
    290                                           &internalErrorCode));
    291         int32_t length;
    292         const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode);
    293         if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) {
    294             u_UCharsToChars(s, defaultType, length + 1);
    295         } else {
    296             uprv_strcpy(defaultType, "standard");
    297         }
    298     }
    299     if(uprv_strcmp(type, defaultType) != 0) {
    300         t->actualLocale.setKeywordValue("collation", type, errorCode);
    301         if(U_FAILURE(errorCode)) { return NULL; }
    302     }
    303 
    304     if(typeFallback) {
    305         errorCode = U_USING_DEFAULT_WARNING;
    306     }
    307     t->bundle = bundle.orphan();
    308     return t.orphan();
    309 }
    310 
    311 U_NAMESPACE_END
    312 
    313 U_NAMESPACE_USE
    314 
    315 U_CAPI UCollator*
    316 ucol_open(const char *loc,
    317           UErrorCode *status)
    318 {
    319     U_NAMESPACE_USE
    320 
    321     UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
    322     UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
    323     UCollator *result = NULL;
    324 
    325     Collator *coll = Collator::createInstance(loc, *status);
    326     if(U_SUCCESS(*status)) {
    327         result = coll->toUCollator();
    328     }
    329     UTRACE_EXIT_PTR_STATUS(result, *status);
    330     return result;
    331 }
    332 
    333 
    334 U_CAPI int32_t U_EXPORT2
    335 ucol_getDisplayName(    const    char        *objLoc,
    336                     const    char        *dispLoc,
    337                     UChar             *result,
    338                     int32_t         resultLength,
    339                     UErrorCode        *status)
    340 {
    341     U_NAMESPACE_USE
    342 
    343     if(U_FAILURE(*status)) return -1;
    344     UnicodeString dst;
    345     if(!(result==NULL && resultLength==0)) {
    346         // NULL destination for pure preflighting: empty dummy string
    347         // otherwise, alias the destination buffer
    348         dst.setTo(result, 0, resultLength);
    349     }
    350     Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
    351     return dst.extract(result, resultLength, *status);
    352 }
    353 
    354 U_CAPI const char* U_EXPORT2
    355 ucol_getAvailable(int32_t index)
    356 {
    357     int32_t count = 0;
    358     const Locale *loc = Collator::getAvailableLocales(count);
    359     if (loc != NULL && index < count) {
    360         return loc[index].getName();
    361     }
    362     return NULL;
    363 }
    364 
    365 U_CAPI int32_t U_EXPORT2
    366 ucol_countAvailable()
    367 {
    368     int32_t count = 0;
    369     Collator::getAvailableLocales(count);
    370     return count;
    371 }
    372 
    373 #if !UCONFIG_NO_SERVICE
    374 U_CAPI UEnumeration* U_EXPORT2
    375 ucol_openAvailableLocales(UErrorCode *status) {
    376     U_NAMESPACE_USE
    377 
    378     // This is a wrapper over Collator::getAvailableLocales()
    379     if (U_FAILURE(*status)) {
    380         return NULL;
    381     }
    382     StringEnumeration *s = icu::Collator::getAvailableLocales();
    383     if (s == NULL) {
    384         *status = U_MEMORY_ALLOCATION_ERROR;
    385         return NULL;
    386     }
    387     return uenum_openFromStringEnumeration(s, status);
    388 }
    389 #endif
    390 
    391 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
    392 
    393 static const char RESOURCE_NAME[] = "collations";
    394 
    395 static const char* const KEYWORDS[] = { "collation" };
    396 
    397 #define KEYWORD_COUNT LENGTHOF(KEYWORDS)
    398 
    399 U_CAPI UEnumeration* U_EXPORT2
    400 ucol_getKeywords(UErrorCode *status) {
    401     UEnumeration *result = NULL;
    402     if (U_SUCCESS(*status)) {
    403         return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
    404     }
    405     return result;
    406 }
    407 
    408 U_CAPI UEnumeration* U_EXPORT2
    409 ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
    410     if (U_FAILURE(*status)) {
    411         return NULL;
    412     }
    413     // hard-coded to accept exactly one collation keyword
    414     // modify if additional collation keyword is added later
    415     if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
    416     {
    417         *status = U_ILLEGAL_ARGUMENT_ERROR;
    418         return NULL;
    419     }
    420     return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
    421 }
    422 
    423 static const UEnumeration defaultKeywordValues = {
    424     NULL,
    425     NULL,
    426     ulist_close_keyword_values_iterator,
    427     ulist_count_keyword_values,
    428     uenum_unextDefault,
    429     ulist_next_keyword_value,
    430     ulist_reset_keyword_values_iterator
    431 };
    432 
    433 #include <stdio.h>
    434 
    435 U_CAPI UEnumeration* U_EXPORT2
    436 ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale,
    437                                UBool /*commonlyUsed*/, UErrorCode* status) {
    438     /* Get the locale base name. */
    439     char localeBuffer[ULOC_FULLNAME_CAPACITY] = "";
    440     uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status);
    441 
    442     /* Create the 2 lists
    443      * -values is the temp location for the keyword values
    444      * -results hold the actual list used by the UEnumeration object
    445      */
    446     UList *values = ulist_createEmptyList(status);
    447     UList *results = ulist_createEmptyList(status);
    448     UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
    449     if (U_FAILURE(*status) || en == NULL) {
    450         if (en == NULL) {
    451             *status = U_MEMORY_ALLOCATION_ERROR;
    452         } else {
    453             uprv_free(en);
    454         }
    455         ulist_deleteList(values);
    456         ulist_deleteList(results);
    457         return NULL;
    458     }
    459 
    460     memcpy(en, &defaultKeywordValues, sizeof(UEnumeration));
    461     en->context = results;
    462 
    463     /* Open the resource bundle for collation with the given locale. */
    464     UResourceBundle bundle, collations, collres, defres;
    465     ures_initStackObject(&bundle);
    466     ures_initStackObject(&collations);
    467     ures_initStackObject(&collres);
    468     ures_initStackObject(&defres);
    469 
    470     ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
    471 
    472     while (U_SUCCESS(*status)) {
    473         ures_getByKey(&bundle, RESOURCE_NAME, &collations, status);
    474         ures_resetIterator(&collations);
    475         while (U_SUCCESS(*status) && ures_hasNext(&collations)) {
    476             ures_getNextResource(&collations, &collres, status);
    477             const char *key = ures_getKey(&collres);
    478             /* If the key is default, get the string and store it in results list only
    479              * if results list is empty.
    480              */
    481             if (uprv_strcmp(key, "default") == 0) {
    482                 if (ulist_getListSize(results) == 0) {
    483                     char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY);
    484                     int32_t defcollLength = ULOC_KEYWORDS_CAPACITY;
    485 
    486                     ures_getNextResource(&collres, &defres, status);
    487 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    488 			/* optimize - use the utf-8 string */
    489                     ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status);
    490 #else
    491                     {
    492                        const UChar* defString = ures_getString(&defres, &defcollLength, status);
    493                        if(U_SUCCESS(*status)) {
    494 			   if(defcollLength+1 > ULOC_KEYWORDS_CAPACITY) {
    495 				*status = U_BUFFER_OVERFLOW_ERROR;
    496 			   } else {
    497                            	u_UCharsToChars(defString, defcoll, defcollLength+1);
    498 			   }
    499                        }
    500                     }
    501 #endif
    502 
    503                     ulist_addItemBeginList(results, defcoll, TRUE, status);
    504                 }
    505             } else {
    506                 ulist_addItemEndList(values, key, FALSE, status);
    507             }
    508         }
    509 
    510         /* If the locale is "" this is root so exit. */
    511         if (uprv_strlen(localeBuffer) == 0) {
    512             break;
    513         }
    514         /* Get the parent locale and open a new resource bundle. */
    515         uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status);
    516         ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
    517     }
    518 
    519     ures_close(&defres);
    520     ures_close(&collres);
    521     ures_close(&collations);
    522     ures_close(&bundle);
    523 
    524     if (U_SUCCESS(*status)) {
    525         char *value = NULL;
    526         ulist_resetList(values);
    527         while ((value = (char *)ulist_getNext(values)) != NULL) {
    528             if (!ulist_containsString(results, value, (int32_t)uprv_strlen(value))) {
    529                 ulist_addItemEndList(results, value, FALSE, status);
    530                 if (U_FAILURE(*status)) {
    531                     break;
    532                 }
    533             }
    534         }
    535     }
    536 
    537     ulist_deleteList(values);
    538 
    539     if (U_FAILURE(*status)){
    540         uenum_close(en);
    541         en = NULL;
    542     } else {
    543         ulist_resetList(results);
    544     }
    545 
    546     return en;
    547 }
    548 
    549 U_CAPI int32_t U_EXPORT2
    550 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
    551                              const char* keyword, const char* locale,
    552                              UBool* isAvailable, UErrorCode* status)
    553 {
    554     // N.B.: Resource name is "collations" but keyword is "collation"
    555     return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
    556         "collations", keyword, locale,
    557         isAvailable, TRUE, status);
    558 }
    559 
    560 #endif /* #if !UCONFIG_NO_COLLATION */
    561