1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * loadednormalizer2impl.cpp 9 * 10 * created on: 2014sep03 11 * created by: Markus W. Scherer 12 */ 13 14 #include "unicode/utypes.h" 15 16 #if !UCONFIG_NO_NORMALIZATION 17 18 #include "unicode/udata.h" 19 #include "unicode/localpointer.h" 20 #include "unicode/normalizer2.h" 21 #include "unicode/ucptrie.h" 22 #include "unicode/unistr.h" 23 #include "unicode/unorm.h" 24 #include "cstring.h" 25 #include "mutex.h" 26 #include "norm2allmodes.h" 27 #include "normalizer2impl.h" 28 #include "uassert.h" 29 #include "ucln_cmn.h" 30 #include "uhash.h" 31 32 U_NAMESPACE_BEGIN 33 34 class LoadedNormalizer2Impl : public Normalizer2Impl { 35 public: 36 LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {} 37 virtual ~LoadedNormalizer2Impl(); 38 39 void load(const char *packageName, const char *name, UErrorCode &errorCode); 40 41 private: 42 static UBool U_CALLCONV 43 isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo); 44 45 UDataMemory *memory; 46 UCPTrie *ownedTrie; 47 }; 48 49 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() { 50 udata_close(memory); 51 ucptrie_close(ownedTrie); 52 } 53 54 UBool U_CALLCONV 55 LoadedNormalizer2Impl::isAcceptable(void * /*context*/, 56 const char * /* type */, const char * /*name*/, 57 const UDataInfo *pInfo) { 58 if( 59 pInfo->size>=20 && 60 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 61 pInfo->charsetFamily==U_CHARSET_FAMILY && 62 pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ 63 pInfo->dataFormat[1]==0x72 && 64 pInfo->dataFormat[2]==0x6d && 65 pInfo->dataFormat[3]==0x32 && 66 pInfo->formatVersion[0]==4 67 ) { 68 // Normalizer2Impl *me=(Normalizer2Impl *)context; 69 // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); 70 return TRUE; 71 } else { 72 return FALSE; 73 } 74 } 75 76 void 77 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) { 78 if(U_FAILURE(errorCode)) { 79 return; 80 } 81 memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode); 82 if(U_FAILURE(errorCode)) { 83 return; 84 } 85 const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); 86 const int32_t *inIndexes=(const int32_t *)inBytes; 87 int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; 88 if(indexesLength<=IX_MIN_LCCC_CP) { 89 errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. 90 return; 91 } 92 93 int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; 94 int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; 95 ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, 96 inBytes+offset, nextOffset-offset, NULL, 97 &errorCode); 98 if(U_FAILURE(errorCode)) { 99 return; 100 } 101 102 offset=nextOffset; 103 nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; 104 const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset); 105 106 // smallFCD: new in formatVersion 2 107 offset=nextOffset; 108 const uint8_t *inSmallFCD=inBytes+offset; 109 110 init(inIndexes, ownedTrie, inExtraData, inSmallFCD); 111 } 112 113 // instance cache ---------------------------------------------------------- *** 114 115 Norm2AllModes * 116 Norm2AllModes::createInstance(const char *packageName, 117 const char *name, 118 UErrorCode &errorCode) { 119 if(U_FAILURE(errorCode)) { 120 return NULL; 121 } 122 LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl; 123 if(impl==NULL) { 124 errorCode=U_MEMORY_ALLOCATION_ERROR; 125 return NULL; 126 } 127 impl->load(packageName, name, errorCode); 128 return createInstance(impl, errorCode); 129 } 130 131 U_CDECL_BEGIN 132 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup(); 133 U_CDECL_END 134 135 #if !NORM2_HARDCODE_NFC_DATA 136 static Norm2AllModes *nfcSingleton; 137 static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; 138 #endif 139 140 static Norm2AllModes *nfkcSingleton; 141 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; 142 143 static Norm2AllModes *nfkc_cfSingleton; 144 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; 145 146 static UHashtable *cache=NULL; 147 148 // UInitOnce singleton initialization function 149 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { 150 #if !NORM2_HARDCODE_NFC_DATA 151 if (uprv_strcmp(what, "nfc") == 0) { 152 nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode); 153 } else 154 #endif 155 if (uprv_strcmp(what, "nfkc") == 0) { 156 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); 157 } else if (uprv_strcmp(what, "nfkc_cf") == 0) { 158 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode); 159 } else { 160 U_ASSERT(FALSE); // Unknown singleton 161 } 162 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); 163 } 164 165 U_CDECL_BEGIN 166 167 static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 168 delete (Norm2AllModes *)allModes; 169 } 170 171 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { 172 #if !NORM2_HARDCODE_NFC_DATA 173 delete nfcSingleton; 174 nfcSingleton = NULL; 175 nfcInitOnce.reset(); 176 #endif 177 178 delete nfkcSingleton; 179 nfkcSingleton = NULL; 180 nfkcInitOnce.reset(); 181 182 delete nfkc_cfSingleton; 183 nfkc_cfSingleton = NULL; 184 nfkc_cfInitOnce.reset(); 185 186 uhash_close(cache); 187 cache=NULL; 188 return TRUE; 189 } 190 191 U_CDECL_END 192 193 #if !NORM2_HARDCODE_NFC_DATA 194 const Norm2AllModes * 195 Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { 196 if(U_FAILURE(errorCode)) { return NULL; } 197 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 198 return nfcSingleton; 199 } 200 #endif 201 202 const Norm2AllModes * 203 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) { 204 if(U_FAILURE(errorCode)) { return NULL; } 205 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 206 return nfkcSingleton; 207 } 208 209 const Norm2AllModes * 210 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { 211 if(U_FAILURE(errorCode)) { return NULL; } 212 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); 213 return nfkc_cfSingleton; 214 } 215 216 #if !NORM2_HARDCODE_NFC_DATA 217 const Normalizer2 * 218 Normalizer2::getNFCInstance(UErrorCode &errorCode) { 219 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); 220 return allModes!=NULL ? &allModes->comp : NULL; 221 } 222 223 const Normalizer2 * 224 Normalizer2::getNFDInstance(UErrorCode &errorCode) { 225 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); 226 return allModes!=NULL ? &allModes->decomp : NULL; 227 } 228 229 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { 230 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); 231 return allModes!=NULL ? &allModes->fcd : NULL; 232 } 233 234 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { 235 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); 236 return allModes!=NULL ? &allModes->fcc : NULL; 237 } 238 239 const Normalizer2Impl * 240 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { 241 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); 242 return allModes!=NULL ? allModes->impl : NULL; 243 } 244 #endif 245 246 const Normalizer2 * 247 Normalizer2::getNFKCInstance(UErrorCode &errorCode) { 248 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 249 return allModes!=NULL ? &allModes->comp : NULL; 250 } 251 252 const Normalizer2 * 253 Normalizer2::getNFKDInstance(UErrorCode &errorCode) { 254 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 255 return allModes!=NULL ? &allModes->decomp : NULL; 256 } 257 258 const Normalizer2 * 259 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { 260 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 261 return allModes!=NULL ? &allModes->comp : NULL; 262 } 263 264 const Normalizer2 * 265 Normalizer2::getInstance(const char *packageName, 266 const char *name, 267 UNormalization2Mode mode, 268 UErrorCode &errorCode) { 269 if(U_FAILURE(errorCode)) { 270 return NULL; 271 } 272 if(name==NULL || *name==0) { 273 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 274 return NULL; 275 } 276 const Norm2AllModes *allModes=NULL; 277 if(packageName==NULL) { 278 if(0==uprv_strcmp(name, "nfc")) { 279 allModes=Norm2AllModes::getNFCInstance(errorCode); 280 } else if(0==uprv_strcmp(name, "nfkc")) { 281 allModes=Norm2AllModes::getNFKCInstance(errorCode); 282 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 283 allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 284 } 285 } 286 if(allModes==NULL && U_SUCCESS(errorCode)) { 287 { 288 Mutex lock; 289 if(cache!=NULL) { 290 allModes=(Norm2AllModes *)uhash_get(cache, name); 291 } 292 } 293 if(allModes==NULL) { 294 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); 295 LocalPointer<Norm2AllModes> localAllModes( 296 Norm2AllModes::createInstance(packageName, name, errorCode)); 297 if(U_SUCCESS(errorCode)) { 298 Mutex lock; 299 if(cache==NULL) { 300 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 301 if(U_FAILURE(errorCode)) { 302 return NULL; 303 } 304 uhash_setKeyDeleter(cache, uprv_free); 305 uhash_setValueDeleter(cache, deleteNorm2AllModes); 306 } 307 void *temp=uhash_get(cache, name); 308 if(temp==NULL) { 309 int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1); 310 char *nameCopy=(char *)uprv_malloc(keyLength); 311 if(nameCopy==NULL) { 312 errorCode=U_MEMORY_ALLOCATION_ERROR; 313 return NULL; 314 } 315 uprv_memcpy(nameCopy, name, keyLength); 316 allModes=localAllModes.getAlias(); 317 uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode); 318 } else { 319 // race condition 320 allModes=(Norm2AllModes *)temp; 321 } 322 } 323 } 324 } 325 if(allModes!=NULL && U_SUCCESS(errorCode)) { 326 switch(mode) { 327 case UNORM2_COMPOSE: 328 return &allModes->comp; 329 case UNORM2_DECOMPOSE: 330 return &allModes->decomp; 331 case UNORM2_FCD: 332 return &allModes->fcd; 333 case UNORM2_COMPOSE_CONTIGUOUS: 334 return &allModes->fcc; 335 default: 336 break; // do nothing 337 } 338 } 339 return NULL; 340 } 341 342 const Normalizer2 * 343 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 344 if(U_FAILURE(errorCode)) { 345 return NULL; 346 } 347 switch(mode) { 348 case UNORM_NFD: 349 return Normalizer2::getNFDInstance(errorCode); 350 case UNORM_NFKD: 351 return Normalizer2::getNFKDInstance(errorCode); 352 case UNORM_NFC: 353 return Normalizer2::getNFCInstance(errorCode); 354 case UNORM_NFKC: 355 return Normalizer2::getNFKCInstance(errorCode); 356 case UNORM_FCD: 357 return getFCDInstance(errorCode); 358 default: // UNORM_NONE 359 return getNoopInstance(errorCode); 360 } 361 } 362 363 const Normalizer2Impl * 364 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 365 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 366 return allModes!=NULL ? allModes->impl : NULL; 367 } 368 369 const Normalizer2Impl * 370 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 371 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 372 return allModes!=NULL ? allModes->impl : NULL; 373 } 374 375 U_NAMESPACE_END 376 377 // C API ------------------------------------------------------------------- *** 378 379 U_NAMESPACE_USE 380 381 U_CAPI const UNormalizer2 * U_EXPORT2 382 unorm2_getNFKCInstance(UErrorCode *pErrorCode) { 383 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); 384 } 385 386 U_CAPI const UNormalizer2 * U_EXPORT2 387 unorm2_getNFKDInstance(UErrorCode *pErrorCode) { 388 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); 389 } 390 391 U_CAPI const UNormalizer2 * U_EXPORT2 392 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { 393 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); 394 } 395 396 U_CAPI const UNormalizer2 * U_EXPORT2 397 unorm2_getInstance(const char *packageName, 398 const char *name, 399 UNormalization2Mode mode, 400 UErrorCode *pErrorCode) { 401 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 402 } 403 404 U_CFUNC UNormalizationCheckResult 405 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 406 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 407 return UNORM_YES; 408 } 409 UErrorCode errorCode=U_ZERO_ERROR; 410 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 411 if(U_SUCCESS(errorCode)) { 412 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 413 } else { 414 return UNORM_MAYBE; 415 } 416 } 417 418 #endif // !UCONFIG_NO_NORMALIZATION 419