1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2009-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: normalizer2.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2009nov22 14 * created by: Markus W. Scherer 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_NORMALIZATION 20 21 #include "unicode/localpointer.h" 22 #include "unicode/normalizer2.h" 23 #include "unicode/unistr.h" 24 #include "unicode/unorm.h" 25 #include "cpputils.h" 26 #include "cstring.h" 27 #include "mutex.h" 28 #include "normalizer2impl.h" 29 #include "ucln_cmn.h" 30 #include "uhash.h" 31 32 U_NAMESPACE_BEGIN 33 34 // Public API dispatch via Normalizer2 subclasses -------------------------- *** 35 36 // Normalizer2 implementation for the old UNORM_NONE. 37 class NoopNormalizer2 : public Normalizer2 { 38 virtual UnicodeString & 39 normalize(const UnicodeString &src, 40 UnicodeString &dest, 41 UErrorCode &errorCode) const { 42 if(U_SUCCESS(errorCode)) { 43 if(&dest!=&src) { 44 dest=src; 45 } else { 46 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 47 } 48 } 49 return dest; 50 } 51 virtual UnicodeString & 52 normalizeSecondAndAppend(UnicodeString &first, 53 const UnicodeString &second, 54 UErrorCode &errorCode) const { 55 if(U_SUCCESS(errorCode)) { 56 if(&first!=&second) { 57 first.append(second); 58 } else { 59 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 60 } 61 } 62 return first; 63 } 64 virtual UnicodeString & 65 append(UnicodeString &first, 66 const UnicodeString &second, 67 UErrorCode &errorCode) const { 68 if(U_SUCCESS(errorCode)) { 69 if(&first!=&second) { 70 first.append(second); 71 } else { 72 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 73 } 74 } 75 return first; 76 } 77 virtual UBool 78 getDecomposition(UChar32, UnicodeString &) const { 79 return FALSE; 80 } 81 virtual UBool 82 isNormalized(const UnicodeString &, UErrorCode &) const { 83 return TRUE; 84 } 85 virtual UNormalizationCheckResult 86 quickCheck(const UnicodeString &, UErrorCode &) const { 87 return UNORM_YES; 88 } 89 virtual int32_t 90 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { 91 return s.length(); 92 } 93 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } 94 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } 95 virtual UBool isInert(UChar32) const { return TRUE; } 96 }; 97 98 // Intermediate class: 99 // Has Normalizer2Impl and does boilerplate argument checking and setup. 100 class Normalizer2WithImpl : public Normalizer2 { 101 public: 102 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 103 104 // normalize 105 virtual UnicodeString & 106 normalize(const UnicodeString &src, 107 UnicodeString &dest, 108 UErrorCode &errorCode) const { 109 if(U_FAILURE(errorCode)) { 110 dest.setToBogus(); 111 return dest; 112 } 113 const UChar *sArray=src.getBuffer(); 114 if(&dest==&src || sArray==NULL) { 115 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 116 dest.setToBogus(); 117 return dest; 118 } 119 dest.remove(); 120 ReorderingBuffer buffer(impl, dest); 121 if(buffer.init(src.length(), errorCode)) { 122 normalize(sArray, sArray+src.length(), buffer, errorCode); 123 } 124 return dest; 125 } 126 virtual void 127 normalize(const UChar *src, const UChar *limit, 128 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 129 130 // normalize and append 131 virtual UnicodeString & 132 normalizeSecondAndAppend(UnicodeString &first, 133 const UnicodeString &second, 134 UErrorCode &errorCode) const { 135 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 136 } 137 virtual UnicodeString & 138 append(UnicodeString &first, 139 const UnicodeString &second, 140 UErrorCode &errorCode) const { 141 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 142 } 143 UnicodeString & 144 normalizeSecondAndAppend(UnicodeString &first, 145 const UnicodeString &second, 146 UBool doNormalize, 147 UErrorCode &errorCode) const { 148 uprv_checkCanGetBuffer(first, errorCode); 149 if(U_FAILURE(errorCode)) { 150 return first; 151 } 152 const UChar *secondArray=second.getBuffer(); 153 if(&first==&second || secondArray==NULL) { 154 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 155 return first; 156 } 157 ReorderingBuffer buffer(impl, first); 158 if(buffer.init(first.length()+second.length(), errorCode)) { 159 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 160 buffer, errorCode); 161 } 162 return first; 163 } 164 virtual void 165 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 166 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 167 virtual UBool 168 getDecomposition(UChar32 c, UnicodeString &decomposition) const { 169 UChar buffer[4]; 170 int32_t length; 171 const UChar *d=impl.getDecomposition(c, buffer, length); 172 if(d==NULL) { 173 return FALSE; 174 } 175 if(d==buffer) { 176 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 177 } else { 178 decomposition.setTo(FALSE, d, length); // read-only alias 179 } 180 return TRUE; 181 } 182 183 // quick checks 184 virtual UBool 185 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 186 if(U_FAILURE(errorCode)) { 187 return FALSE; 188 } 189 const UChar *sArray=s.getBuffer(); 190 if(sArray==NULL) { 191 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 192 return FALSE; 193 } 194 const UChar *sLimit=sArray+s.length(); 195 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 196 } 197 virtual UNormalizationCheckResult 198 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 199 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 200 } 201 virtual int32_t 202 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 203 if(U_FAILURE(errorCode)) { 204 return 0; 205 } 206 const UChar *sArray=s.getBuffer(); 207 if(sArray==NULL) { 208 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 209 return 0; 210 } 211 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 212 } 213 virtual const UChar * 214 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 215 216 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 217 return UNORM_YES; 218 } 219 220 const Normalizer2Impl &impl; 221 }; 222 223 class DecomposeNormalizer2 : public Normalizer2WithImpl { 224 public: 225 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 226 227 private: 228 virtual void 229 normalize(const UChar *src, const UChar *limit, 230 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 231 impl.decompose(src, limit, &buffer, errorCode); 232 } 233 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 234 virtual void 235 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 236 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 237 impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode); 238 } 239 virtual const UChar * 240 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 241 return impl.decompose(src, limit, NULL, errorCode); 242 } 243 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 244 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 245 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 246 } 247 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } 248 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } 249 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 250 }; 251 252 class ComposeNormalizer2 : public Normalizer2WithImpl { 253 public: 254 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 255 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 256 257 private: 258 virtual void 259 normalize(const UChar *src, const UChar *limit, 260 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 261 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 262 } 263 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 264 virtual void 265 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 266 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 267 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode); 268 } 269 270 virtual UBool 271 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 272 if(U_FAILURE(errorCode)) { 273 return FALSE; 274 } 275 const UChar *sArray=s.getBuffer(); 276 if(sArray==NULL) { 277 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 278 return FALSE; 279 } 280 UnicodeString temp; 281 ReorderingBuffer buffer(impl, temp); 282 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 283 return FALSE; 284 } 285 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 286 } 287 virtual UNormalizationCheckResult 288 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 289 if(U_FAILURE(errorCode)) { 290 return UNORM_MAYBE; 291 } 292 const UChar *sArray=s.getBuffer(); 293 if(sArray==NULL) { 294 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 295 return UNORM_MAYBE; 296 } 297 UNormalizationCheckResult qcResult=UNORM_YES; 298 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 299 return qcResult; 300 } 301 virtual const UChar * 302 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { 303 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 304 } 305 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 306 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 307 return impl.getCompQuickCheck(impl.getNorm16(c)); 308 } 309 virtual UBool hasBoundaryBefore(UChar32 c) const { 310 return impl.hasCompBoundaryBefore(c); 311 } 312 virtual UBool hasBoundaryAfter(UChar32 c) const { 313 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); 314 } 315 virtual UBool isInert(UChar32 c) const { 316 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); 317 } 318 319 const UBool onlyContiguous; 320 }; 321 322 class FCDNormalizer2 : public Normalizer2WithImpl { 323 public: 324 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 325 326 private: 327 virtual void 328 normalize(const UChar *src, const UChar *limit, 329 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 330 impl.makeFCD(src, limit, &buffer, errorCode); 331 } 332 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 333 virtual void 334 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 335 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 336 impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode); 337 } 338 virtual const UChar * 339 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 340 return impl.makeFCD(src, limit, NULL, errorCode); 341 } 342 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 343 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } 344 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } 345 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 346 }; 347 348 // instance cache ---------------------------------------------------------- *** 349 350 struct Norm2AllModes : public UMemory { 351 static Norm2AllModes *createInstance(const char *packageName, 352 const char *name, 353 UErrorCode &errorCode); 354 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} 355 356 Normalizer2Impl impl; 357 ComposeNormalizer2 comp; 358 DecomposeNormalizer2 decomp; 359 FCDNormalizer2 fcd; 360 ComposeNormalizer2 fcc; 361 }; 362 363 Norm2AllModes * 364 Norm2AllModes::createInstance(const char *packageName, 365 const char *name, 366 UErrorCode &errorCode) { 367 if(U_FAILURE(errorCode)) { 368 return NULL; 369 } 370 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); 371 if(allModes.isNull()) { 372 errorCode=U_MEMORY_ALLOCATION_ERROR; 373 return NULL; 374 } 375 allModes->impl.load(packageName, name, errorCode); 376 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; 377 } 378 379 U_CDECL_BEGIN 380 static UBool U_CALLCONV uprv_normalizer2_cleanup(); 381 U_CDECL_END 382 383 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> { 384 public: 385 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) : 386 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {} 387 Norm2AllModes *getInstance(UErrorCode &errorCode) { 388 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode); 389 } 390 private: 391 static void *createInstance(const void *context, UErrorCode &errorCode) { 392 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 393 return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode); 394 } 395 396 const char *name; 397 }; 398 399 STATIC_TRI_STATE_SINGLETON(nfcSingleton); 400 STATIC_TRI_STATE_SINGLETON(nfkcSingleton); 401 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton); 402 403 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> { 404 public: 405 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {} 406 Normalizer2 *getInstance(UErrorCode &errorCode) { 407 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode); 408 } 409 private: 410 static void *createInstance(const void *, UErrorCode &errorCode) { 411 Normalizer2 *noop=new NoopNormalizer2; 412 if(noop==NULL) { 413 errorCode=U_MEMORY_ALLOCATION_ERROR; 414 } 415 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 416 return noop; 417 } 418 }; 419 420 STATIC_SIMPLE_SINGLETON(noopSingleton); 421 422 static UHashtable *cache=NULL; 423 424 U_CDECL_BEGIN 425 426 static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 427 delete (Norm2AllModes *)allModes; 428 } 429 430 static UBool U_CALLCONV uprv_normalizer2_cleanup() { 431 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance(); 432 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance(); 433 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance(); 434 Norm2Singleton(noopSingleton).deleteInstance(); 435 uhash_close(cache); 436 cache=NULL; 437 return TRUE; 438 } 439 440 U_CDECL_END 441 442 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { 443 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 444 return allModes!=NULL ? &allModes->comp : NULL; 445 } 446 447 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { 448 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 449 return allModes!=NULL ? &allModes->decomp : NULL; 450 } 451 452 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { 453 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 454 if(allModes!=NULL) { 455 allModes->impl.getFCDTrie(errorCode); 456 return &allModes->fcd; 457 } else { 458 return NULL; 459 } 460 } 461 462 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { 463 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 464 return allModes!=NULL ? &allModes->fcc : NULL; 465 } 466 467 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { 468 Norm2AllModes *allModes= 469 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 470 return allModes!=NULL ? &allModes->comp : NULL; 471 } 472 473 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { 474 Norm2AllModes *allModes= 475 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 476 return allModes!=NULL ? &allModes->decomp : NULL; 477 } 478 479 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { 480 Norm2AllModes *allModes= 481 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 482 return allModes!=NULL ? &allModes->comp : NULL; 483 } 484 485 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { 486 return Norm2Singleton(noopSingleton).getInstance(errorCode); 487 } 488 489 const Normalizer2 * 490 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 491 if(U_FAILURE(errorCode)) { 492 return NULL; 493 } 494 switch(mode) { 495 case UNORM_NFD: 496 return getNFDInstance(errorCode); 497 case UNORM_NFKD: 498 return getNFKDInstance(errorCode); 499 case UNORM_NFC: 500 return getNFCInstance(errorCode); 501 case UNORM_NFKC: 502 return getNFKCInstance(errorCode); 503 case UNORM_FCD: 504 return getFCDInstance(errorCode); 505 default: // UNORM_NONE 506 return getNoopInstance(errorCode); 507 } 508 } 509 510 const Normalizer2Impl * 511 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { 512 Norm2AllModes *allModes= 513 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 514 return allModes!=NULL ? &allModes->impl : NULL; 515 } 516 517 const Normalizer2Impl * 518 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 519 Norm2AllModes *allModes= 520 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 521 return allModes!=NULL ? &allModes->impl : NULL; 522 } 523 524 const Normalizer2Impl * 525 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 526 Norm2AllModes *allModes= 527 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 528 return allModes!=NULL ? &allModes->impl : NULL; 529 } 530 531 const Normalizer2Impl * 532 Normalizer2Factory::getImpl(const Normalizer2 *norm2) { 533 return &((Normalizer2WithImpl *)norm2)->impl; 534 } 535 536 const UTrie2 * 537 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) { 538 Norm2AllModes *allModes= 539 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 540 if(allModes!=NULL) { 541 return allModes->impl.getFCDTrie(errorCode); 542 } else { 543 return NULL; 544 } 545 } 546 547 const Normalizer2 * 548 Normalizer2::getInstance(const char *packageName, 549 const char *name, 550 UNormalization2Mode mode, 551 UErrorCode &errorCode) { 552 if(U_FAILURE(errorCode)) { 553 return NULL; 554 } 555 if(name==NULL || *name==0) { 556 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 557 } 558 Norm2AllModes *allModes=NULL; 559 if(packageName==NULL) { 560 if(0==uprv_strcmp(name, "nfc")) { 561 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 562 } else if(0==uprv_strcmp(name, "nfkc")) { 563 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 564 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 565 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 566 } 567 } 568 if(allModes==NULL && U_SUCCESS(errorCode)) { 569 { 570 Mutex lock; 571 if(cache!=NULL) { 572 allModes=(Norm2AllModes *)uhash_get(cache, name); 573 } 574 } 575 if(allModes==NULL) { 576 LocalPointer<Norm2AllModes> localAllModes( 577 Norm2AllModes::createInstance(packageName, name, errorCode)); 578 if(U_SUCCESS(errorCode)) { 579 Mutex lock; 580 if(cache==NULL) { 581 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 582 if(U_FAILURE(errorCode)) { 583 return NULL; 584 } 585 uhash_setKeyDeleter(cache, uprv_free); 586 uhash_setValueDeleter(cache, deleteNorm2AllModes); 587 } 588 void *temp=uhash_get(cache, name); 589 if(temp==NULL) { 590 int32_t keyLength=uprv_strlen(name)+1; 591 char *nameCopy=(char *)uprv_malloc(keyLength); 592 if(nameCopy==NULL) { 593 errorCode=U_MEMORY_ALLOCATION_ERROR; 594 return NULL; 595 } 596 uprv_memcpy(nameCopy, name, keyLength); 597 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); 598 } else { 599 // race condition 600 allModes=(Norm2AllModes *)temp; 601 } 602 } 603 } 604 } 605 if(allModes!=NULL && U_SUCCESS(errorCode)) { 606 switch(mode) { 607 case UNORM2_COMPOSE: 608 return &allModes->comp; 609 case UNORM2_DECOMPOSE: 610 return &allModes->decomp; 611 case UNORM2_FCD: 612 allModes->impl.getFCDTrie(errorCode); 613 return &allModes->fcd; 614 case UNORM2_COMPOSE_CONTIGUOUS: 615 return &allModes->fcc; 616 default: 617 break; // do nothing 618 } 619 } 620 return NULL; 621 } 622 623 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2) 624 625 U_NAMESPACE_END 626 627 // C API ------------------------------------------------------------------- *** 628 629 U_NAMESPACE_USE 630 631 U_DRAFT const UNormalizer2 * U_EXPORT2 632 unorm2_getInstance(const char *packageName, 633 const char *name, 634 UNormalization2Mode mode, 635 UErrorCode *pErrorCode) { 636 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 637 } 638 639 U_DRAFT void U_EXPORT2 640 unorm2_close(UNormalizer2 *norm2) { 641 delete (Normalizer2 *)norm2; 642 } 643 644 U_DRAFT int32_t U_EXPORT2 645 unorm2_normalize(const UNormalizer2 *norm2, 646 const UChar *src, int32_t length, 647 UChar *dest, int32_t capacity, 648 UErrorCode *pErrorCode) { 649 if(U_FAILURE(*pErrorCode)) { 650 return 0; 651 } 652 if( (src==NULL ? length!=0 : length<-1) || 653 (dest==NULL ? capacity!=0 : capacity<0) || 654 (src==dest && src!=NULL) 655 ) { 656 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 657 return 0; 658 } 659 UnicodeString destString(dest, 0, capacity); 660 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. 661 if(length!=0) { 662 const Normalizer2 *n2=(const Normalizer2 *)norm2; 663 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 664 if(n2wi!=NULL) { 665 // Avoid duplicate argument checking and support NUL-terminated src. 666 ReorderingBuffer buffer(n2wi->impl, destString); 667 if(buffer.init(length, *pErrorCode)) { 668 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); 669 } 670 } else { 671 UnicodeString srcString(length<0, src, length); 672 n2->normalize(srcString, destString, *pErrorCode); 673 } 674 } 675 return destString.extract(dest, capacity, *pErrorCode); 676 } 677 678 static int32_t 679 normalizeSecondAndAppend(const UNormalizer2 *norm2, 680 UChar *first, int32_t firstLength, int32_t firstCapacity, 681 const UChar *second, int32_t secondLength, 682 UBool doNormalize, 683 UErrorCode *pErrorCode) { 684 if(U_FAILURE(*pErrorCode)) { 685 return 0; 686 } 687 if( (second==NULL ? secondLength!=0 : secondLength<-1) || 688 (first==NULL ? (firstCapacity!=0 || firstLength!=0) : 689 (firstCapacity<0 || firstLength<-1)) || 690 (first==second && first!=NULL) 691 ) { 692 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 693 return 0; 694 } 695 UnicodeString firstString(first, firstLength, firstCapacity); 696 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. 697 if(secondLength!=0) { 698 const Normalizer2 *n2=(const Normalizer2 *)norm2; 699 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 700 if(n2wi!=NULL) { 701 // Avoid duplicate argument checking and support NUL-terminated src. 702 ReorderingBuffer buffer(n2wi->impl, firstString); 703 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 704 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, 705 doNormalize, buffer, *pErrorCode); 706 } 707 } else { 708 UnicodeString secondString(secondLength<0, second, secondLength); 709 if(doNormalize) { 710 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); 711 } else { 712 n2->append(firstString, secondString, *pErrorCode); 713 } 714 } 715 } 716 return firstString.extract(first, firstCapacity, *pErrorCode); 717 } 718 719 U_DRAFT int32_t U_EXPORT2 720 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 721 UChar *first, int32_t firstLength, int32_t firstCapacity, 722 const UChar *second, int32_t secondLength, 723 UErrorCode *pErrorCode) { 724 return normalizeSecondAndAppend(norm2, 725 first, firstLength, firstCapacity, 726 second, secondLength, 727 TRUE, pErrorCode); 728 } 729 730 U_DRAFT int32_t U_EXPORT2 731 unorm2_append(const UNormalizer2 *norm2, 732 UChar *first, int32_t firstLength, int32_t firstCapacity, 733 const UChar *second, int32_t secondLength, 734 UErrorCode *pErrorCode) { 735 return normalizeSecondAndAppend(norm2, 736 first, firstLength, firstCapacity, 737 second, secondLength, 738 FALSE, pErrorCode); 739 } 740 741 U_DRAFT int32_t U_EXPORT2 742 unorm2_getDecomposition(const UNormalizer2 *norm2, 743 UChar32 c, UChar *decomposition, int32_t capacity, 744 UErrorCode *pErrorCode) { 745 if(U_FAILURE(*pErrorCode)) { 746 return 0; 747 } 748 if(decomposition==NULL ? capacity!=0 : capacity<0) { 749 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 750 return 0; 751 } 752 UnicodeString destString(decomposition, 0, capacity); 753 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) { 754 return destString.extract(decomposition, capacity, *pErrorCode); 755 } else { 756 return -1; 757 } 758 } 759 760 U_DRAFT UBool U_EXPORT2 761 unorm2_isNormalized(const UNormalizer2 *norm2, 762 const UChar *s, int32_t length, 763 UErrorCode *pErrorCode) { 764 if(U_FAILURE(*pErrorCode)) { 765 return 0; 766 } 767 if((s==NULL && length!=0) || length<-1) { 768 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 769 return 0; 770 } 771 UnicodeString sString(length<0, s, length); 772 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); 773 } 774 775 U_DRAFT UNormalizationCheckResult U_EXPORT2 776 unorm2_quickCheck(const UNormalizer2 *norm2, 777 const UChar *s, int32_t length, 778 UErrorCode *pErrorCode) { 779 if(U_FAILURE(*pErrorCode)) { 780 return UNORM_NO; 781 } 782 if((s==NULL && length!=0) || length<-1) { 783 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 784 return UNORM_NO; 785 } 786 UnicodeString sString(length<0, s, length); 787 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); 788 } 789 790 U_DRAFT int32_t U_EXPORT2 791 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 792 const UChar *s, int32_t length, 793 UErrorCode *pErrorCode) { 794 if(U_FAILURE(*pErrorCode)) { 795 return 0; 796 } 797 if((s==NULL && length!=0) || length<-1) { 798 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 799 return 0; 800 } 801 UnicodeString sString(length<0, s, length); 802 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); 803 } 804 805 U_DRAFT UBool U_EXPORT2 806 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { 807 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); 808 } 809 810 U_DRAFT UBool U_EXPORT2 811 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { 812 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); 813 } 814 815 U_DRAFT UBool U_EXPORT2 816 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { 817 return ((const Normalizer2 *)norm2)->isInert(c); 818 } 819 820 // Some properties APIs ---------------------------------------------------- *** 821 822 U_CFUNC UNormalizationCheckResult U_EXPORT2 823 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 824 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 825 return UNORM_YES; 826 } 827 UErrorCode errorCode=U_ZERO_ERROR; 828 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 829 if(U_SUCCESS(errorCode)) { 830 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 831 } else { 832 return UNORM_MAYBE; 833 } 834 } 835 836 U_CAPI const uint16_t * U_EXPORT2 837 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) { 838 const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode); 839 if(U_SUCCESS(*pErrorCode)) { 840 fcdHighStart=trie->highStart; 841 return trie->index; 842 } else { 843 return NULL; 844 } 845 } 846 847 #endif // !UCONFIG_NO_NORMALIZATION 848