Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * loadednormalizer2impl.cpp
      9 *
     10 * created on: 2014sep03
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #include "unicode/utypes.h"
     15 
     16 #if !UCONFIG_NO_NORMALIZATION
     17 
     18 #include "unicode/udata.h"
     19 #include "unicode/localpointer.h"
     20 #include "unicode/normalizer2.h"
     21 #include "unicode/ucptrie.h"
     22 #include "unicode/unistr.h"
     23 #include "unicode/unorm.h"
     24 #include "cstring.h"
     25 #include "mutex.h"
     26 #include "norm2allmodes.h"
     27 #include "normalizer2impl.h"
     28 #include "uassert.h"
     29 #include "ucln_cmn.h"
     30 #include "uhash.h"
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 class LoadedNormalizer2Impl : public Normalizer2Impl {
     35 public:
     36     LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
     37     virtual ~LoadedNormalizer2Impl();
     38 
     39     void load(const char *packageName, const char *name, UErrorCode &errorCode);
     40 
     41 private:
     42     static UBool U_CALLCONV
     43     isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
     44 
     45     UDataMemory *memory;
     46     UCPTrie *ownedTrie;
     47 };
     48 
     49 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
     50     udata_close(memory);
     51     ucptrie_close(ownedTrie);
     52 }
     53 
     54 UBool U_CALLCONV
     55 LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
     56                                     const char * /* type */, const char * /*name*/,
     57                                     const UDataInfo *pInfo) {
     58     if(
     59         pInfo->size>=20 &&
     60         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     61         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     62         pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
     63         pInfo->dataFormat[1]==0x72 &&
     64         pInfo->dataFormat[2]==0x6d &&
     65         pInfo->dataFormat[3]==0x32 &&
     66         pInfo->formatVersion[0]==4
     67     ) {
     68         // Normalizer2Impl *me=(Normalizer2Impl *)context;
     69         // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
     70         return TRUE;
     71     } else {
     72         return FALSE;
     73     }
     74 }
     75 
     76 void
     77 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
     78     if(U_FAILURE(errorCode)) {
     79         return;
     80     }
     81     memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
     82     if(U_FAILURE(errorCode)) {
     83         return;
     84     }
     85     const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
     86     const int32_t *inIndexes=(const int32_t *)inBytes;
     87     int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
     88     if(indexesLength<=IX_MIN_LCCC_CP) {
     89         errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
     90         return;
     91     }
     92 
     93     int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
     94     int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
     95     ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
     96                                      inBytes+offset, nextOffset-offset, NULL,
     97                                      &errorCode);
     98     if(U_FAILURE(errorCode)) {
     99         return;
    100     }
    101 
    102     offset=nextOffset;
    103     nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
    104     const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
    105 
    106     // smallFCD: new in formatVersion 2
    107     offset=nextOffset;
    108     const uint8_t *inSmallFCD=inBytes+offset;
    109 
    110     init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
    111 }
    112 
    113 // instance cache ---------------------------------------------------------- ***
    114 
    115 Norm2AllModes *
    116 Norm2AllModes::createInstance(const char *packageName,
    117                               const char *name,
    118                               UErrorCode &errorCode) {
    119     if(U_FAILURE(errorCode)) {
    120         return NULL;
    121     }
    122     LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
    123     if(impl==NULL) {
    124         errorCode=U_MEMORY_ALLOCATION_ERROR;
    125         return NULL;
    126     }
    127     impl->load(packageName, name, errorCode);
    128     return createInstance(impl, errorCode);
    129 }
    130 
    131 U_CDECL_BEGIN
    132 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
    133 U_CDECL_END
    134 
    135 #if !NORM2_HARDCODE_NFC_DATA
    136 static Norm2AllModes *nfcSingleton;
    137 static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
    138 #endif
    139 
    140 static Norm2AllModes *nfkcSingleton;
    141 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
    142 
    143 static Norm2AllModes *nfkc_cfSingleton;
    144 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
    145 
    146 static UHashtable    *cache=NULL;
    147 
    148 // UInitOnce singleton initialization function
    149 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
    150 #if !NORM2_HARDCODE_NFC_DATA
    151     if (uprv_strcmp(what, "nfc") == 0) {
    152         nfcSingleton    = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
    153     } else
    154 #endif
    155     if (uprv_strcmp(what, "nfkc") == 0) {
    156         nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
    157     } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
    158         nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
    159     } else {
    160         U_ASSERT(FALSE);   // Unknown singleton
    161     }
    162     ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
    163 }
    164 
    165 U_CDECL_BEGIN
    166 
    167 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
    168     delete (Norm2AllModes *)allModes;
    169 }
    170 
    171 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
    172 #if !NORM2_HARDCODE_NFC_DATA
    173     delete nfcSingleton;
    174     nfcSingleton = NULL;
    175     nfcInitOnce.reset();
    176 #endif
    177 
    178     delete nfkcSingleton;
    179     nfkcSingleton = NULL;
    180     nfkcInitOnce.reset();
    181 
    182     delete nfkc_cfSingleton;
    183     nfkc_cfSingleton = NULL;
    184     nfkc_cfInitOnce.reset();
    185 
    186     uhash_close(cache);
    187     cache=NULL;
    188     return TRUE;
    189 }
    190 
    191 U_CDECL_END
    192 
    193 #if !NORM2_HARDCODE_NFC_DATA
    194 const Norm2AllModes *
    195 Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
    196     if(U_FAILURE(errorCode)) { return NULL; }
    197     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
    198     return nfcSingleton;
    199 }
    200 #endif
    201 
    202 const Norm2AllModes *
    203 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
    204     if(U_FAILURE(errorCode)) { return NULL; }
    205     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
    206     return nfkcSingleton;
    207 }
    208 
    209 const Norm2AllModes *
    210 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
    211     if(U_FAILURE(errorCode)) { return NULL; }
    212     umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
    213     return nfkc_cfSingleton;
    214 }
    215 
    216 #if !NORM2_HARDCODE_NFC_DATA
    217 const Normalizer2 *
    218 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
    219     const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    220     return allModes!=NULL ? &allModes->comp : NULL;
    221 }
    222 
    223 const Normalizer2 *
    224 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
    225     const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    226     return allModes!=NULL ? &allModes->decomp : NULL;
    227 }
    228 
    229 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
    230     const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    231     return allModes!=NULL ? &allModes->fcd : NULL;
    232 }
    233 
    234 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
    235     const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    236     return allModes!=NULL ? &allModes->fcc : NULL;
    237 }
    238 
    239 const Normalizer2Impl *
    240 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
    241     const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    242     return allModes!=NULL ? allModes->impl : NULL;
    243 }
    244 #endif
    245 
    246 const Normalizer2 *
    247 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
    248     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    249     return allModes!=NULL ? &allModes->comp : NULL;
    250 }
    251 
    252 const Normalizer2 *
    253 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
    254     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    255     return allModes!=NULL ? &allModes->decomp : NULL;
    256 }
    257 
    258 const Normalizer2 *
    259 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
    260     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    261     return allModes!=NULL ? &allModes->comp : NULL;
    262 }
    263 
    264 const Normalizer2 *
    265 Normalizer2::getInstance(const char *packageName,
    266                          const char *name,
    267                          UNormalization2Mode mode,
    268                          UErrorCode &errorCode) {
    269     if(U_FAILURE(errorCode)) {
    270         return NULL;
    271     }
    272     if(name==NULL || *name==0) {
    273         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    274         return NULL;
    275     }
    276     const Norm2AllModes *allModes=NULL;
    277     if(packageName==NULL) {
    278         if(0==uprv_strcmp(name, "nfc")) {
    279             allModes=Norm2AllModes::getNFCInstance(errorCode);
    280         } else if(0==uprv_strcmp(name, "nfkc")) {
    281             allModes=Norm2AllModes::getNFKCInstance(errorCode);
    282         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
    283             allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    284         }
    285     }
    286     if(allModes==NULL && U_SUCCESS(errorCode)) {
    287         {
    288             Mutex lock;
    289             if(cache!=NULL) {
    290                 allModes=(Norm2AllModes *)uhash_get(cache, name);
    291             }
    292         }
    293         if(allModes==NULL) {
    294             ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
    295             LocalPointer<Norm2AllModes> localAllModes(
    296                 Norm2AllModes::createInstance(packageName, name, errorCode));
    297             if(U_SUCCESS(errorCode)) {
    298                 Mutex lock;
    299                 if(cache==NULL) {
    300                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
    301                     if(U_FAILURE(errorCode)) {
    302                         return NULL;
    303                     }
    304                     uhash_setKeyDeleter(cache, uprv_free);
    305                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
    306                 }
    307                 void *temp=uhash_get(cache, name);
    308                 if(temp==NULL) {
    309                     int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
    310                     char *nameCopy=(char *)uprv_malloc(keyLength);
    311                     if(nameCopy==NULL) {
    312                         errorCode=U_MEMORY_ALLOCATION_ERROR;
    313                         return NULL;
    314                     }
    315                     uprv_memcpy(nameCopy, name, keyLength);
    316                     allModes=localAllModes.getAlias();
    317                     uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
    318                 } else {
    319                     // race condition
    320                     allModes=(Norm2AllModes *)temp;
    321                 }
    322             }
    323         }
    324     }
    325     if(allModes!=NULL && U_SUCCESS(errorCode)) {
    326         switch(mode) {
    327         case UNORM2_COMPOSE:
    328             return &allModes->comp;
    329         case UNORM2_DECOMPOSE:
    330             return &allModes->decomp;
    331         case UNORM2_FCD:
    332             return &allModes->fcd;
    333         case UNORM2_COMPOSE_CONTIGUOUS:
    334             return &allModes->fcc;
    335         default:
    336             break;  // do nothing
    337         }
    338     }
    339     return NULL;
    340 }
    341 
    342 const Normalizer2 *
    343 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    344     if(U_FAILURE(errorCode)) {
    345         return NULL;
    346     }
    347     switch(mode) {
    348     case UNORM_NFD:
    349         return Normalizer2::getNFDInstance(errorCode);
    350     case UNORM_NFKD:
    351         return Normalizer2::getNFKDInstance(errorCode);
    352     case UNORM_NFC:
    353         return Normalizer2::getNFCInstance(errorCode);
    354     case UNORM_NFKC:
    355         return Normalizer2::getNFKCInstance(errorCode);
    356     case UNORM_FCD:
    357         return getFCDInstance(errorCode);
    358     default:  // UNORM_NONE
    359         return getNoopInstance(errorCode);
    360     }
    361 }
    362 
    363 const Normalizer2Impl *
    364 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    365     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    366     return allModes!=NULL ? allModes->impl : NULL;
    367 }
    368 
    369 const Normalizer2Impl *
    370 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    371     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    372     return allModes!=NULL ? allModes->impl : NULL;
    373 }
    374 
    375 U_NAMESPACE_END
    376 
    377 // C API ------------------------------------------------------------------- ***
    378 
    379 U_NAMESPACE_USE
    380 
    381 U_CAPI const UNormalizer2 * U_EXPORT2
    382 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
    383     return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
    384 }
    385 
    386 U_CAPI const UNormalizer2 * U_EXPORT2
    387 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
    388     return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
    389 }
    390 
    391 U_CAPI const UNormalizer2 * U_EXPORT2
    392 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
    393     return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
    394 }
    395 
    396 U_CAPI const UNormalizer2 * U_EXPORT2
    397 unorm2_getInstance(const char *packageName,
    398                    const char *name,
    399                    UNormalization2Mode mode,
    400                    UErrorCode *pErrorCode) {
    401     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
    402 }
    403 
    404 U_CFUNC UNormalizationCheckResult
    405 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
    406     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
    407         return UNORM_YES;
    408     }
    409     UErrorCode errorCode=U_ZERO_ERROR;
    410     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
    411     if(U_SUCCESS(errorCode)) {
    412         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
    413     } else {
    414         return UNORM_MAYBE;
    415     }
    416 }
    417 
    418 #endif  // !UCONFIG_NO_NORMALIZATION
    419