Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * loadednormalizer2impl.cpp
      9 *
     10 * created on: 2014sep03
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #include "unicode/utypes.h"
     15 
     16 #if !UCONFIG_NO_NORMALIZATION
     17 
     18 #include "unicode/udata.h"
     19 #include "unicode/localpointer.h"
     20 #include "unicode/normalizer2.h"
     21 #include "unicode/unistr.h"
     22 #include "unicode/unorm.h"
     23 #include "cstring.h"
     24 #include "mutex.h"
     25 #include "norm2allmodes.h"
     26 #include "normalizer2impl.h"
     27 #include "uassert.h"
     28 #include "ucln_cmn.h"
     29 #include "uhash.h"
     30 
     31 U_NAMESPACE_BEGIN
     32 
     33 class LoadedNormalizer2Impl : public Normalizer2Impl {
     34 public:
     35     LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
     36     virtual ~LoadedNormalizer2Impl();
     37 
     38     void load(const char *packageName, const char *name, UErrorCode &errorCode);
     39 
     40 private:
     41     static UBool U_CALLCONV
     42     isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
     43 
     44     UDataMemory *memory;
     45     UTrie2 *ownedTrie;
     46 };
     47 
     48 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
     49     udata_close(memory);
     50     utrie2_close(ownedTrie);
     51 }
     52 
     53 UBool U_CALLCONV
     54 LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
     55                                     const char * /* type */, const char * /*name*/,
     56                                     const UDataInfo *pInfo) {
     57     if(
     58         pInfo->size>=20 &&
     59         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     60         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     61         pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
     62         pInfo->dataFormat[1]==0x72 &&
     63         pInfo->dataFormat[2]==0x6d &&
     64         pInfo->dataFormat[3]==0x32 &&
     65         pInfo->formatVersion[0]==3
     66     ) {
     67         // Normalizer2Impl *me=(Normalizer2Impl *)context;
     68         // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
     69         return TRUE;
     70     } else {
     71         return FALSE;
     72     }
     73 }
     74 
     75 void
     76 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
     77     if(U_FAILURE(errorCode)) {
     78         return;
     79     }
     80     memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
     81     if(U_FAILURE(errorCode)) {
     82         return;
     83     }
     84     const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
     85     const int32_t *inIndexes=(const int32_t *)inBytes;
     86     int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
     87     if(indexesLength<=IX_MIN_LCCC_CP) {
     88         errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
     89         return;
     90     }
     91 
     92     int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
     93     int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
     94     ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
     95                                         inBytes+offset, nextOffset-offset, NULL,
     96                                         &errorCode);
     97     if(U_FAILURE(errorCode)) {
     98         return;
     99     }
    100 
    101     offset=nextOffset;
    102     nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
    103     const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
    104 
    105     // smallFCD: new in formatVersion 2
    106     offset=nextOffset;
    107     const uint8_t *inSmallFCD=inBytes+offset;
    108 
    109     init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
    110 }
    111 
    112 // instance cache ---------------------------------------------------------- ***
    113 
    114 Norm2AllModes *
    115 Norm2AllModes::createInstance(const char *packageName,
    116                               const char *name,
    117                               UErrorCode &errorCode) {
    118     if(U_FAILURE(errorCode)) {
    119         return NULL;
    120     }
    121     LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
    122     if(impl==NULL) {
    123         errorCode=U_MEMORY_ALLOCATION_ERROR;
    124         return NULL;
    125     }
    126     impl->load(packageName, name, errorCode);
    127     return createInstance(impl, errorCode);
    128 }
    129 
    130 U_CDECL_BEGIN
    131 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
    132 U_CDECL_END
    133 
    134 static Norm2AllModes *nfkcSingleton;
    135 static Norm2AllModes *nfkc_cfSingleton;
    136 static UHashtable    *cache=NULL;
    137 
    138 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
    139 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
    140 
    141 // UInitOnce singleton initialization function
    142 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
    143     if (uprv_strcmp(what, "nfkc") == 0) {
    144         nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
    145     } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
    146         nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
    147     } else {
    148         U_ASSERT(FALSE);   // Unknown singleton
    149     }
    150     ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
    151 }
    152 
    153 U_CDECL_BEGIN
    154 
    155 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
    156     delete (Norm2AllModes *)allModes;
    157 }
    158 
    159 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
    160     delete nfkcSingleton;
    161     nfkcSingleton = NULL;
    162     delete nfkc_cfSingleton;
    163     nfkc_cfSingleton = NULL;
    164     uhash_close(cache);
    165     cache=NULL;
    166     nfkcInitOnce.reset();
    167     nfkc_cfInitOnce.reset();
    168     return TRUE;
    169 }
    170 
    171 U_CDECL_END
    172 
    173 const Norm2AllModes *
    174 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
    175     if(U_FAILURE(errorCode)) { return NULL; }
    176     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
    177     return nfkcSingleton;
    178 }
    179 
    180 const Norm2AllModes *
    181 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
    182     if(U_FAILURE(errorCode)) { return NULL; }
    183     umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
    184     return nfkc_cfSingleton;
    185 }
    186 
    187 const Normalizer2 *
    188 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
    189     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    190     return allModes!=NULL ? &allModes->comp : NULL;
    191 }
    192 
    193 const Normalizer2 *
    194 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
    195     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    196     return allModes!=NULL ? &allModes->decomp : NULL;
    197 }
    198 
    199 const Normalizer2 *
    200 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
    201     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    202     return allModes!=NULL ? &allModes->comp : NULL;
    203 }
    204 
    205 const Normalizer2 *
    206 Normalizer2::getInstance(const char *packageName,
    207                          const char *name,
    208                          UNormalization2Mode mode,
    209                          UErrorCode &errorCode) {
    210     if(U_FAILURE(errorCode)) {
    211         return NULL;
    212     }
    213     if(name==NULL || *name==0) {
    214         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    215         return NULL;
    216     }
    217     const Norm2AllModes *allModes=NULL;
    218     if(packageName==NULL) {
    219         if(0==uprv_strcmp(name, "nfc")) {
    220             allModes=Norm2AllModes::getNFCInstance(errorCode);
    221         } else if(0==uprv_strcmp(name, "nfkc")) {
    222             allModes=Norm2AllModes::getNFKCInstance(errorCode);
    223         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
    224             allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    225         }
    226     }
    227     if(allModes==NULL && U_SUCCESS(errorCode)) {
    228         {
    229             Mutex lock;
    230             if(cache!=NULL) {
    231                 allModes=(Norm2AllModes *)uhash_get(cache, name);
    232             }
    233         }
    234         if(allModes==NULL) {
    235             ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
    236             LocalPointer<Norm2AllModes> localAllModes(
    237                 Norm2AllModes::createInstance(packageName, name, errorCode));
    238             if(U_SUCCESS(errorCode)) {
    239                 Mutex lock;
    240                 if(cache==NULL) {
    241                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
    242                     if(U_FAILURE(errorCode)) {
    243                         return NULL;
    244                     }
    245                     uhash_setKeyDeleter(cache, uprv_free);
    246                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
    247                 }
    248                 void *temp=uhash_get(cache, name);
    249                 if(temp==NULL) {
    250                     int32_t keyLength=uprv_strlen(name)+1;
    251                     char *nameCopy=(char *)uprv_malloc(keyLength);
    252                     if(nameCopy==NULL) {
    253                         errorCode=U_MEMORY_ALLOCATION_ERROR;
    254                         return NULL;
    255                     }
    256                     uprv_memcpy(nameCopy, name, keyLength);
    257                     allModes=localAllModes.getAlias();
    258                     uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
    259                 } else {
    260                     // race condition
    261                     allModes=(Norm2AllModes *)temp;
    262                 }
    263             }
    264         }
    265     }
    266     if(allModes!=NULL && U_SUCCESS(errorCode)) {
    267         switch(mode) {
    268         case UNORM2_COMPOSE:
    269             return &allModes->comp;
    270         case UNORM2_DECOMPOSE:
    271             return &allModes->decomp;
    272         case UNORM2_FCD:
    273             return &allModes->fcd;
    274         case UNORM2_COMPOSE_CONTIGUOUS:
    275             return &allModes->fcc;
    276         default:
    277             break;  // do nothing
    278         }
    279     }
    280     return NULL;
    281 }
    282 
    283 const Normalizer2 *
    284 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    285     if(U_FAILURE(errorCode)) {
    286         return NULL;
    287     }
    288     switch(mode) {
    289     case UNORM_NFD:
    290         return Normalizer2::getNFDInstance(errorCode);
    291     case UNORM_NFKD:
    292         return Normalizer2::getNFKDInstance(errorCode);
    293     case UNORM_NFC:
    294         return Normalizer2::getNFCInstance(errorCode);
    295     case UNORM_NFKC:
    296         return Normalizer2::getNFKCInstance(errorCode);
    297     case UNORM_FCD:
    298         return getFCDInstance(errorCode);
    299     default:  // UNORM_NONE
    300         return getNoopInstance(errorCode);
    301     }
    302 }
    303 
    304 const Normalizer2Impl *
    305 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    306     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    307     return allModes!=NULL ? allModes->impl : NULL;
    308 }
    309 
    310 const Normalizer2Impl *
    311 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    312     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    313     return allModes!=NULL ? allModes->impl : NULL;
    314 }
    315 
    316 U_NAMESPACE_END
    317 
    318 // C API ------------------------------------------------------------------- ***
    319 
    320 U_NAMESPACE_USE
    321 
    322 U_CAPI const UNormalizer2 * U_EXPORT2
    323 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
    324     return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
    325 }
    326 
    327 U_CAPI const UNormalizer2 * U_EXPORT2
    328 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
    329     return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
    330 }
    331 
    332 U_CAPI const UNormalizer2 * U_EXPORT2
    333 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
    334     return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
    335 }
    336 
    337 U_CAPI const UNormalizer2 * U_EXPORT2
    338 unorm2_getInstance(const char *packageName,
    339                    const char *name,
    340                    UNormalization2Mode mode,
    341                    UErrorCode *pErrorCode) {
    342     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
    343 }
    344 
    345 U_CFUNC UNormalizationCheckResult
    346 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
    347     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
    348         return UNORM_YES;
    349     }
    350     UErrorCode errorCode=U_ZERO_ERROR;
    351     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
    352     if(U_SUCCESS(errorCode)) {
    353         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
    354     } else {
    355         return UNORM_MAYBE;
    356     }
    357 }
    358 
    359 #endif  // !UCONFIG_NO_NORMALIZATION
    360