Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * loadednormalizer2impl.cpp
      7 *
      8 * created on: 2014sep03
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_NORMALIZATION
     15 
     16 #include "unicode/udata.h"
     17 #include "unicode/localpointer.h"
     18 #include "unicode/normalizer2.h"
     19 #include "unicode/unistr.h"
     20 #include "unicode/unorm.h"
     21 #include "cstring.h"
     22 #include "mutex.h"
     23 #include "norm2allmodes.h"
     24 #include "normalizer2impl.h"
     25 #include "uassert.h"
     26 #include "ucln_cmn.h"
     27 #include "uhash.h"
     28 
     29 U_NAMESPACE_BEGIN
     30 
     31 class LoadedNormalizer2Impl : public Normalizer2Impl {
     32 public:
     33     LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
     34     virtual ~LoadedNormalizer2Impl();
     35 
     36     void load(const char *packageName, const char *name, UErrorCode &errorCode);
     37 
     38 private:
     39     static UBool U_CALLCONV
     40     isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
     41 
     42     UDataMemory *memory;
     43     UTrie2 *ownedTrie;
     44 };
     45 
     46 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
     47     udata_close(memory);
     48     utrie2_close(ownedTrie);
     49 }
     50 
     51 UBool U_CALLCONV
     52 LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
     53                                     const char * /* type */, const char * /*name*/,
     54                                     const UDataInfo *pInfo) {
     55     if(
     56         pInfo->size>=20 &&
     57         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     58         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     59         pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
     60         pInfo->dataFormat[1]==0x72 &&
     61         pInfo->dataFormat[2]==0x6d &&
     62         pInfo->dataFormat[3]==0x32 &&
     63         pInfo->formatVersion[0]==2
     64     ) {
     65         // Normalizer2Impl *me=(Normalizer2Impl *)context;
     66         // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
     67         return TRUE;
     68     } else {
     69         return FALSE;
     70     }
     71 }
     72 
     73 void
     74 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
     75     if(U_FAILURE(errorCode)) {
     76         return;
     77     }
     78     memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
     79     if(U_FAILURE(errorCode)) {
     80         return;
     81     }
     82     const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
     83     const int32_t *inIndexes=(const int32_t *)inBytes;
     84     int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
     85     if(indexesLength<=IX_MIN_MAYBE_YES) {
     86         errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
     87         return;
     88     }
     89 
     90     int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
     91     int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
     92     ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
     93                                         inBytes+offset, nextOffset-offset, NULL,
     94                                         &errorCode);
     95     if(U_FAILURE(errorCode)) {
     96         return;
     97     }
     98 
     99     offset=nextOffset;
    100     nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
    101     const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
    102 
    103     // smallFCD: new in formatVersion 2
    104     offset=nextOffset;
    105     const uint8_t *inSmallFCD=inBytes+offset;
    106 
    107     init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
    108 }
    109 
    110 // instance cache ---------------------------------------------------------- ***
    111 
    112 Norm2AllModes *
    113 Norm2AllModes::createInstance(const char *packageName,
    114                               const char *name,
    115                               UErrorCode &errorCode) {
    116     if(U_FAILURE(errorCode)) {
    117         return NULL;
    118     }
    119     LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
    120     if(impl==NULL) {
    121         errorCode=U_MEMORY_ALLOCATION_ERROR;
    122         return NULL;
    123     }
    124     impl->load(packageName, name, errorCode);
    125     return createInstance(impl, errorCode);
    126 }
    127 
    128 U_CDECL_BEGIN
    129 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
    130 U_CDECL_END
    131 
    132 static Norm2AllModes *nfkcSingleton;
    133 static Norm2AllModes *nfkc_cfSingleton;
    134 static UHashtable    *cache=NULL;
    135 
    136 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
    137 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
    138 
    139 // UInitOnce singleton initialization function
    140 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
    141     if (uprv_strcmp(what, "nfkc") == 0) {
    142         nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
    143     } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
    144         nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
    145     } else {
    146         U_ASSERT(FALSE);   // Unknown singleton
    147     }
    148     ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
    149 }
    150 
    151 U_CDECL_BEGIN
    152 
    153 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
    154     delete (Norm2AllModes *)allModes;
    155 }
    156 
    157 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
    158     delete nfkcSingleton;
    159     nfkcSingleton = NULL;
    160     delete nfkc_cfSingleton;
    161     nfkc_cfSingleton = NULL;
    162     uhash_close(cache);
    163     cache=NULL;
    164     nfkcInitOnce.reset();
    165     nfkc_cfInitOnce.reset();
    166     return TRUE;
    167 }
    168 
    169 U_CDECL_END
    170 
    171 const Norm2AllModes *
    172 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
    173     if(U_FAILURE(errorCode)) { return NULL; }
    174     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
    175     return nfkcSingleton;
    176 }
    177 
    178 const Norm2AllModes *
    179 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
    180     if(U_FAILURE(errorCode)) { return NULL; }
    181     umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
    182     return nfkc_cfSingleton;
    183 }
    184 
    185 const Normalizer2 *
    186 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
    187     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    188     return allModes!=NULL ? &allModes->comp : NULL;
    189 }
    190 
    191 const Normalizer2 *
    192 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
    193     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    194     return allModes!=NULL ? &allModes->decomp : NULL;
    195 }
    196 
    197 const Normalizer2 *
    198 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
    199     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    200     return allModes!=NULL ? &allModes->comp : NULL;
    201 }
    202 
    203 const Normalizer2 *
    204 Normalizer2::getInstance(const char *packageName,
    205                          const char *name,
    206                          UNormalization2Mode mode,
    207                          UErrorCode &errorCode) {
    208     if(U_FAILURE(errorCode)) {
    209         return NULL;
    210     }
    211     if(name==NULL || *name==0) {
    212         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    213         return NULL;
    214     }
    215     const Norm2AllModes *allModes=NULL;
    216     if(packageName==NULL) {
    217         if(0==uprv_strcmp(name, "nfc")) {
    218             allModes=Norm2AllModes::getNFCInstance(errorCode);
    219         } else if(0==uprv_strcmp(name, "nfkc")) {
    220             allModes=Norm2AllModes::getNFKCInstance(errorCode);
    221         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
    222             allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    223         }
    224     }
    225     if(allModes==NULL && U_SUCCESS(errorCode)) {
    226         {
    227             Mutex lock;
    228             if(cache!=NULL) {
    229                 allModes=(Norm2AllModes *)uhash_get(cache, name);
    230             }
    231         }
    232         if(allModes==NULL) {
    233             LocalPointer<Norm2AllModes> localAllModes(
    234                 Norm2AllModes::createInstance(packageName, name, errorCode));
    235             if(U_SUCCESS(errorCode)) {
    236                 Mutex lock;
    237                 if(cache==NULL) {
    238                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
    239                     if(U_FAILURE(errorCode)) {
    240                         return NULL;
    241                     }
    242                     uhash_setKeyDeleter(cache, uprv_free);
    243                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
    244                 }
    245                 void *temp=uhash_get(cache, name);
    246                 if(temp==NULL) {
    247                     int32_t keyLength=uprv_strlen(name)+1;
    248                     char *nameCopy=(char *)uprv_malloc(keyLength);
    249                     if(nameCopy==NULL) {
    250                         errorCode=U_MEMORY_ALLOCATION_ERROR;
    251                         return NULL;
    252                     }
    253                     uprv_memcpy(nameCopy, name, keyLength);
    254                     allModes=localAllModes.getAlias();
    255                     uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
    256                 } else {
    257                     // race condition
    258                     allModes=(Norm2AllModes *)temp;
    259                 }
    260             }
    261         }
    262     }
    263     if(allModes!=NULL && U_SUCCESS(errorCode)) {
    264         switch(mode) {
    265         case UNORM2_COMPOSE:
    266             return &allModes->comp;
    267         case UNORM2_DECOMPOSE:
    268             return &allModes->decomp;
    269         case UNORM2_FCD:
    270             return &allModes->fcd;
    271         case UNORM2_COMPOSE_CONTIGUOUS:
    272             return &allModes->fcc;
    273         default:
    274             break;  // do nothing
    275         }
    276     }
    277     return NULL;
    278 }
    279 
    280 const Normalizer2 *
    281 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    282     if(U_FAILURE(errorCode)) {
    283         return NULL;
    284     }
    285     switch(mode) {
    286     case UNORM_NFD:
    287         return Normalizer2::getNFDInstance(errorCode);
    288     case UNORM_NFKD:
    289         return Normalizer2::getNFKDInstance(errorCode);
    290     case UNORM_NFC:
    291         return Normalizer2::getNFCInstance(errorCode);
    292     case UNORM_NFKC:
    293         return Normalizer2::getNFKCInstance(errorCode);
    294     case UNORM_FCD:
    295         return getFCDInstance(errorCode);
    296     default:  // UNORM_NONE
    297         return getNoopInstance(errorCode);
    298     }
    299 }
    300 
    301 const Normalizer2Impl *
    302 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    303     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    304     return allModes!=NULL ? allModes->impl : NULL;
    305 }
    306 
    307 const Normalizer2Impl *
    308 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    309     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    310     return allModes!=NULL ? allModes->impl : NULL;
    311 }
    312 
    313 U_NAMESPACE_END
    314 
    315 // C API ------------------------------------------------------------------- ***
    316 
    317 U_NAMESPACE_USE
    318 
    319 U_CAPI const UNormalizer2 * U_EXPORT2
    320 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
    321     return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
    322 }
    323 
    324 U_CAPI const UNormalizer2 * U_EXPORT2
    325 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
    326     return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
    327 }
    328 
    329 U_CAPI const UNormalizer2 * U_EXPORT2
    330 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
    331     return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
    332 }
    333 
    334 U_CAPI const UNormalizer2 * U_EXPORT2
    335 unorm2_getInstance(const char *packageName,
    336                    const char *name,
    337                    UNormalization2Mode mode,
    338                    UErrorCode *pErrorCode) {
    339     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
    340 }
    341 
    342 U_CFUNC UNormalizationCheckResult
    343 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
    344     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
    345         return UNORM_YES;
    346     }
    347     UErrorCode errorCode=U_ZERO_ERROR;
    348     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
    349     if(U_SUCCESS(errorCode)) {
    350         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
    351     } else {
    352         return UNORM_MAYBE;
    353     }
    354 }
    355 
    356 #endif  // !UCONFIG_NO_NORMALIZATION
    357