1 /* 2 ******************************************************************************* 3 * Copyright (C) 2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * loadednormalizer2impl.cpp 7 * 8 * created on: 2014sep03 9 * created by: Markus W. Scherer 10 */ 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_NORMALIZATION 15 16 #include "unicode/udata.h" 17 #include "unicode/localpointer.h" 18 #include "unicode/normalizer2.h" 19 #include "unicode/unistr.h" 20 #include "unicode/unorm.h" 21 #include "cstring.h" 22 #include "mutex.h" 23 #include "norm2allmodes.h" 24 #include "normalizer2impl.h" 25 #include "uassert.h" 26 #include "ucln_cmn.h" 27 #include "uhash.h" 28 29 U_NAMESPACE_BEGIN 30 31 class LoadedNormalizer2Impl : public Normalizer2Impl { 32 public: 33 LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {} 34 virtual ~LoadedNormalizer2Impl(); 35 36 void load(const char *packageName, const char *name, UErrorCode &errorCode); 37 38 private: 39 static UBool U_CALLCONV 40 isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo); 41 42 UDataMemory *memory; 43 UTrie2 *ownedTrie; 44 }; 45 46 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() { 47 udata_close(memory); 48 utrie2_close(ownedTrie); 49 } 50 51 UBool U_CALLCONV 52 LoadedNormalizer2Impl::isAcceptable(void * /*context*/, 53 const char * /* type */, const char * /*name*/, 54 const UDataInfo *pInfo) { 55 if( 56 pInfo->size>=20 && 57 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 58 pInfo->charsetFamily==U_CHARSET_FAMILY && 59 pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ 60 pInfo->dataFormat[1]==0x72 && 61 pInfo->dataFormat[2]==0x6d && 62 pInfo->dataFormat[3]==0x32 && 63 pInfo->formatVersion[0]==2 64 ) { 65 // Normalizer2Impl *me=(Normalizer2Impl *)context; 66 // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); 67 return TRUE; 68 } else { 69 return FALSE; 70 } 71 } 72 73 void 74 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) { 75 if(U_FAILURE(errorCode)) { 76 return; 77 } 78 memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode); 79 if(U_FAILURE(errorCode)) { 80 return; 81 } 82 const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); 83 const int32_t *inIndexes=(const int32_t *)inBytes; 84 int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; 85 if(indexesLength<=IX_MIN_MAYBE_YES) { 86 errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. 87 return; 88 } 89 90 int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; 91 int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; 92 ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 93 inBytes+offset, nextOffset-offset, NULL, 94 &errorCode); 95 if(U_FAILURE(errorCode)) { 96 return; 97 } 98 99 offset=nextOffset; 100 nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; 101 const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset); 102 103 // smallFCD: new in formatVersion 2 104 offset=nextOffset; 105 const uint8_t *inSmallFCD=inBytes+offset; 106 107 init(inIndexes, ownedTrie, inExtraData, inSmallFCD); 108 } 109 110 // instance cache ---------------------------------------------------------- *** 111 112 Norm2AllModes * 113 Norm2AllModes::createInstance(const char *packageName, 114 const char *name, 115 UErrorCode &errorCode) { 116 if(U_FAILURE(errorCode)) { 117 return NULL; 118 } 119 LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl; 120 if(impl==NULL) { 121 errorCode=U_MEMORY_ALLOCATION_ERROR; 122 return NULL; 123 } 124 impl->load(packageName, name, errorCode); 125 return createInstance(impl, errorCode); 126 } 127 128 U_CDECL_BEGIN 129 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup(); 130 U_CDECL_END 131 132 static Norm2AllModes *nfkcSingleton; 133 static Norm2AllModes *nfkc_cfSingleton; 134 static UHashtable *cache=NULL; 135 136 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; 137 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; 138 139 // UInitOnce singleton initialization function 140 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { 141 if (uprv_strcmp(what, "nfkc") == 0) { 142 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); 143 } else if (uprv_strcmp(what, "nfkc_cf") == 0) { 144 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode); 145 } else { 146 U_ASSERT(FALSE); // Unknown singleton 147 } 148 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); 149 } 150 151 U_CDECL_BEGIN 152 153 static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 154 delete (Norm2AllModes *)allModes; 155 } 156 157 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { 158 delete nfkcSingleton; 159 nfkcSingleton = NULL; 160 delete nfkc_cfSingleton; 161 nfkc_cfSingleton = NULL; 162 uhash_close(cache); 163 cache=NULL; 164 nfkcInitOnce.reset(); 165 nfkc_cfInitOnce.reset(); 166 return TRUE; 167 } 168 169 U_CDECL_END 170 171 const Norm2AllModes * 172 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) { 173 if(U_FAILURE(errorCode)) { return NULL; } 174 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 175 return nfkcSingleton; 176 } 177 178 const Norm2AllModes * 179 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { 180 if(U_FAILURE(errorCode)) { return NULL; } 181 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); 182 return nfkc_cfSingleton; 183 } 184 185 const Normalizer2 * 186 Normalizer2::getNFKCInstance(UErrorCode &errorCode) { 187 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 188 return allModes!=NULL ? &allModes->comp : NULL; 189 } 190 191 const Normalizer2 * 192 Normalizer2::getNFKDInstance(UErrorCode &errorCode) { 193 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 194 return allModes!=NULL ? &allModes->decomp : NULL; 195 } 196 197 const Normalizer2 * 198 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { 199 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 200 return allModes!=NULL ? &allModes->comp : NULL; 201 } 202 203 const Normalizer2 * 204 Normalizer2::getInstance(const char *packageName, 205 const char *name, 206 UNormalization2Mode mode, 207 UErrorCode &errorCode) { 208 if(U_FAILURE(errorCode)) { 209 return NULL; 210 } 211 if(name==NULL || *name==0) { 212 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 213 return NULL; 214 } 215 const Norm2AllModes *allModes=NULL; 216 if(packageName==NULL) { 217 if(0==uprv_strcmp(name, "nfc")) { 218 allModes=Norm2AllModes::getNFCInstance(errorCode); 219 } else if(0==uprv_strcmp(name, "nfkc")) { 220 allModes=Norm2AllModes::getNFKCInstance(errorCode); 221 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 222 allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 223 } 224 } 225 if(allModes==NULL && U_SUCCESS(errorCode)) { 226 { 227 Mutex lock; 228 if(cache!=NULL) { 229 allModes=(Norm2AllModes *)uhash_get(cache, name); 230 } 231 } 232 if(allModes==NULL) { 233 LocalPointer<Norm2AllModes> localAllModes( 234 Norm2AllModes::createInstance(packageName, name, errorCode)); 235 if(U_SUCCESS(errorCode)) { 236 Mutex lock; 237 if(cache==NULL) { 238 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 239 if(U_FAILURE(errorCode)) { 240 return NULL; 241 } 242 uhash_setKeyDeleter(cache, uprv_free); 243 uhash_setValueDeleter(cache, deleteNorm2AllModes); 244 } 245 void *temp=uhash_get(cache, name); 246 if(temp==NULL) { 247 int32_t keyLength=uprv_strlen(name)+1; 248 char *nameCopy=(char *)uprv_malloc(keyLength); 249 if(nameCopy==NULL) { 250 errorCode=U_MEMORY_ALLOCATION_ERROR; 251 return NULL; 252 } 253 uprv_memcpy(nameCopy, name, keyLength); 254 allModes=localAllModes.getAlias(); 255 uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode); 256 } else { 257 // race condition 258 allModes=(Norm2AllModes *)temp; 259 } 260 } 261 } 262 } 263 if(allModes!=NULL && U_SUCCESS(errorCode)) { 264 switch(mode) { 265 case UNORM2_COMPOSE: 266 return &allModes->comp; 267 case UNORM2_DECOMPOSE: 268 return &allModes->decomp; 269 case UNORM2_FCD: 270 return &allModes->fcd; 271 case UNORM2_COMPOSE_CONTIGUOUS: 272 return &allModes->fcc; 273 default: 274 break; // do nothing 275 } 276 } 277 return NULL; 278 } 279 280 const Normalizer2 * 281 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 282 if(U_FAILURE(errorCode)) { 283 return NULL; 284 } 285 switch(mode) { 286 case UNORM_NFD: 287 return Normalizer2::getNFDInstance(errorCode); 288 case UNORM_NFKD: 289 return Normalizer2::getNFKDInstance(errorCode); 290 case UNORM_NFC: 291 return Normalizer2::getNFCInstance(errorCode); 292 case UNORM_NFKC: 293 return Normalizer2::getNFKCInstance(errorCode); 294 case UNORM_FCD: 295 return getFCDInstance(errorCode); 296 default: // UNORM_NONE 297 return getNoopInstance(errorCode); 298 } 299 } 300 301 const Normalizer2Impl * 302 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 303 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 304 return allModes!=NULL ? allModes->impl : NULL; 305 } 306 307 const Normalizer2Impl * 308 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 309 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 310 return allModes!=NULL ? allModes->impl : NULL; 311 } 312 313 U_NAMESPACE_END 314 315 // C API ------------------------------------------------------------------- *** 316 317 U_NAMESPACE_USE 318 319 U_CAPI const UNormalizer2 * U_EXPORT2 320 unorm2_getNFKCInstance(UErrorCode *pErrorCode) { 321 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); 322 } 323 324 U_CAPI const UNormalizer2 * U_EXPORT2 325 unorm2_getNFKDInstance(UErrorCode *pErrorCode) { 326 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); 327 } 328 329 U_CAPI const UNormalizer2 * U_EXPORT2 330 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { 331 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); 332 } 333 334 U_CAPI const UNormalizer2 * U_EXPORT2 335 unorm2_getInstance(const char *packageName, 336 const char *name, 337 UNormalization2Mode mode, 338 UErrorCode *pErrorCode) { 339 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 340 } 341 342 U_CFUNC UNormalizationCheckResult 343 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 344 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 345 return UNORM_YES; 346 } 347 UErrorCode errorCode=U_ZERO_ERROR; 348 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 349 if(U_SUCCESS(errorCode)) { 350 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 351 } else { 352 return UNORM_MAYBE; 353 } 354 } 355 356 #endif // !UCONFIG_NO_NORMALIZATION 357