1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2009-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: normalizer2.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2009nov22 14 * created by: Markus W. Scherer 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_NORMALIZATION 20 21 #include "unicode/localpointer.h" 22 #include "unicode/normalizer2.h" 23 #include "unicode/unistr.h" 24 #include "unicode/unorm.h" 25 #include "cpputils.h" 26 #include "cstring.h" 27 #include "mutex.h" 28 #include "normalizer2impl.h" 29 #include "ucln_cmn.h" 30 #include "uhash.h" 31 32 U_NAMESPACE_BEGIN 33 34 // Public API dispatch via Normalizer2 subclasses -------------------------- *** 35 36 // Normalizer2 implementation for the old UNORM_NONE. 37 class NoopNormalizer2 : public Normalizer2 { 38 virtual UnicodeString & 39 normalize(const UnicodeString &src, 40 UnicodeString &dest, 41 UErrorCode &errorCode) const { 42 if(U_SUCCESS(errorCode)) { 43 if(&dest!=&src) { 44 dest=src; 45 } else { 46 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 47 } 48 } 49 return dest; 50 } 51 virtual UnicodeString & 52 normalizeSecondAndAppend(UnicodeString &first, 53 const UnicodeString &second, 54 UErrorCode &errorCode) const { 55 if(U_SUCCESS(errorCode)) { 56 if(&first!=&second) { 57 first.append(second); 58 } else { 59 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 60 } 61 } 62 return first; 63 } 64 virtual UnicodeString & 65 append(UnicodeString &first, 66 const UnicodeString &second, 67 UErrorCode &errorCode) const { 68 if(U_SUCCESS(errorCode)) { 69 if(&first!=&second) { 70 first.append(second); 71 } else { 72 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 73 } 74 } 75 return first; 76 } 77 virtual UBool 78 getDecomposition(UChar32, UnicodeString &) const { 79 return FALSE; 80 } 81 virtual UBool 82 isNormalized(const UnicodeString &, UErrorCode &) const { 83 return TRUE; 84 } 85 virtual UNormalizationCheckResult 86 quickCheck(const UnicodeString &, UErrorCode &) const { 87 return UNORM_YES; 88 } 89 virtual int32_t 90 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { 91 return s.length(); 92 } 93 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } 94 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } 95 virtual UBool isInert(UChar32) const { return TRUE; } 96 }; 97 98 // Intermediate class: 99 // Has Normalizer2Impl and does boilerplate argument checking and setup. 100 class Normalizer2WithImpl : public Normalizer2 { 101 public: 102 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 103 104 // normalize 105 virtual UnicodeString & 106 normalize(const UnicodeString &src, 107 UnicodeString &dest, 108 UErrorCode &errorCode) const { 109 if(U_FAILURE(errorCode)) { 110 dest.setToBogus(); 111 return dest; 112 } 113 const UChar *sArray=src.getBuffer(); 114 if(&dest==&src || sArray==NULL) { 115 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 116 dest.setToBogus(); 117 return dest; 118 } 119 dest.remove(); 120 ReorderingBuffer buffer(impl, dest); 121 if(buffer.init(src.length(), errorCode)) { 122 normalize(sArray, sArray+src.length(), buffer, errorCode); 123 } 124 return dest; 125 } 126 virtual void 127 normalize(const UChar *src, const UChar *limit, 128 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 129 130 // normalize and append 131 virtual UnicodeString & 132 normalizeSecondAndAppend(UnicodeString &first, 133 const UnicodeString &second, 134 UErrorCode &errorCode) const { 135 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 136 } 137 virtual UnicodeString & 138 append(UnicodeString &first, 139 const UnicodeString &second, 140 UErrorCode &errorCode) const { 141 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 142 } 143 UnicodeString & 144 normalizeSecondAndAppend(UnicodeString &first, 145 const UnicodeString &second, 146 UBool doNormalize, 147 UErrorCode &errorCode) const { 148 uprv_checkCanGetBuffer(first, errorCode); 149 if(U_FAILURE(errorCode)) { 150 return first; 151 } 152 const UChar *secondArray=second.getBuffer(); 153 if(&first==&second || secondArray==NULL) { 154 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 155 return first; 156 } 157 int32_t firstLength=first.length(); 158 UnicodeString safeMiddle; 159 { 160 ReorderingBuffer buffer(impl, first); 161 if(buffer.init(firstLength+second.length(), errorCode)) { 162 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 163 safeMiddle, buffer, errorCode); 164 } 165 } // The ReorderingBuffer destructor finalizes the first string. 166 if(U_FAILURE(errorCode)) { 167 // Restore the modified suffix of the first string. 168 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); 169 } 170 return first; 171 } 172 virtual void 173 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 174 UnicodeString &safeMiddle, 175 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 176 virtual UBool 177 getDecomposition(UChar32 c, UnicodeString &decomposition) const { 178 UChar buffer[4]; 179 int32_t length; 180 const UChar *d=impl.getDecomposition(c, buffer, length); 181 if(d==NULL) { 182 return FALSE; 183 } 184 if(d==buffer) { 185 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 186 } else { 187 decomposition.setTo(FALSE, d, length); // read-only alias 188 } 189 return TRUE; 190 } 191 192 // quick checks 193 virtual UBool 194 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 195 if(U_FAILURE(errorCode)) { 196 return FALSE; 197 } 198 const UChar *sArray=s.getBuffer(); 199 if(sArray==NULL) { 200 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 201 return FALSE; 202 } 203 const UChar *sLimit=sArray+s.length(); 204 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 205 } 206 virtual UNormalizationCheckResult 207 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 208 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 209 } 210 virtual int32_t 211 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 212 if(U_FAILURE(errorCode)) { 213 return 0; 214 } 215 const UChar *sArray=s.getBuffer(); 216 if(sArray==NULL) { 217 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 218 return 0; 219 } 220 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 221 } 222 virtual const UChar * 223 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 224 225 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 226 return UNORM_YES; 227 } 228 229 const Normalizer2Impl &impl; 230 }; 231 232 class DecomposeNormalizer2 : public Normalizer2WithImpl { 233 public: 234 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 235 236 private: 237 virtual void 238 normalize(const UChar *src, const UChar *limit, 239 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 240 impl.decompose(src, limit, &buffer, errorCode); 241 } 242 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 243 virtual void 244 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 245 UnicodeString &safeMiddle, 246 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 247 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 248 } 249 virtual const UChar * 250 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 251 return impl.decompose(src, limit, NULL, errorCode); 252 } 253 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 254 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 255 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 256 } 257 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } 258 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } 259 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 260 }; 261 262 class ComposeNormalizer2 : public Normalizer2WithImpl { 263 public: 264 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 265 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 266 267 private: 268 virtual void 269 normalize(const UChar *src, const UChar *limit, 270 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 271 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 272 } 273 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 274 virtual void 275 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 276 UnicodeString &safeMiddle, 277 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 278 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); 279 } 280 281 virtual UBool 282 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 283 if(U_FAILURE(errorCode)) { 284 return FALSE; 285 } 286 const UChar *sArray=s.getBuffer(); 287 if(sArray==NULL) { 288 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 289 return FALSE; 290 } 291 UnicodeString temp; 292 ReorderingBuffer buffer(impl, temp); 293 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 294 return FALSE; 295 } 296 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 297 } 298 virtual UNormalizationCheckResult 299 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 300 if(U_FAILURE(errorCode)) { 301 return UNORM_MAYBE; 302 } 303 const UChar *sArray=s.getBuffer(); 304 if(sArray==NULL) { 305 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 306 return UNORM_MAYBE; 307 } 308 UNormalizationCheckResult qcResult=UNORM_YES; 309 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 310 return qcResult; 311 } 312 virtual const UChar * 313 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { 314 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 315 } 316 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 317 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 318 return impl.getCompQuickCheck(impl.getNorm16(c)); 319 } 320 virtual UBool hasBoundaryBefore(UChar32 c) const { 321 return impl.hasCompBoundaryBefore(c); 322 } 323 virtual UBool hasBoundaryAfter(UChar32 c) const { 324 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); 325 } 326 virtual UBool isInert(UChar32 c) const { 327 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); 328 } 329 330 const UBool onlyContiguous; 331 }; 332 333 class FCDNormalizer2 : public Normalizer2WithImpl { 334 public: 335 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 336 337 private: 338 virtual void 339 normalize(const UChar *src, const UChar *limit, 340 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 341 impl.makeFCD(src, limit, &buffer, errorCode); 342 } 343 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 344 virtual void 345 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 346 UnicodeString &safeMiddle, 347 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 348 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 349 } 350 virtual const UChar * 351 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 352 return impl.makeFCD(src, limit, NULL, errorCode); 353 } 354 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 355 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } 356 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } 357 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 358 }; 359 360 // instance cache ---------------------------------------------------------- *** 361 362 struct Norm2AllModes : public UMemory { 363 static Norm2AllModes *createInstance(const char *packageName, 364 const char *name, 365 UErrorCode &errorCode); 366 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} 367 368 Normalizer2Impl impl; 369 ComposeNormalizer2 comp; 370 DecomposeNormalizer2 decomp; 371 FCDNormalizer2 fcd; 372 ComposeNormalizer2 fcc; 373 }; 374 375 Norm2AllModes * 376 Norm2AllModes::createInstance(const char *packageName, 377 const char *name, 378 UErrorCode &errorCode) { 379 if(U_FAILURE(errorCode)) { 380 return NULL; 381 } 382 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); 383 if(allModes.isNull()) { 384 errorCode=U_MEMORY_ALLOCATION_ERROR; 385 return NULL; 386 } 387 allModes->impl.load(packageName, name, errorCode); 388 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; 389 } 390 391 U_CDECL_BEGIN 392 static UBool U_CALLCONV uprv_normalizer2_cleanup(); 393 U_CDECL_END 394 395 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> { 396 public: 397 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) : 398 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {} 399 Norm2AllModes *getInstance(UErrorCode &errorCode) { 400 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode); 401 } 402 private: 403 static void *createInstance(const void *context, UErrorCode &errorCode) { 404 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 405 return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode); 406 } 407 408 const char *name; 409 }; 410 411 STATIC_TRI_STATE_SINGLETON(nfcSingleton); 412 STATIC_TRI_STATE_SINGLETON(nfkcSingleton); 413 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton); 414 415 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> { 416 public: 417 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {} 418 Normalizer2 *getInstance(UErrorCode &errorCode) { 419 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode); 420 } 421 private: 422 static void *createInstance(const void *, UErrorCode &errorCode) { 423 Normalizer2 *noop=new NoopNormalizer2; 424 if(noop==NULL) { 425 errorCode=U_MEMORY_ALLOCATION_ERROR; 426 } 427 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 428 return noop; 429 } 430 }; 431 432 STATIC_SIMPLE_SINGLETON(noopSingleton); 433 434 static UHashtable *cache=NULL; 435 436 U_CDECL_BEGIN 437 438 static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 439 delete (Norm2AllModes *)allModes; 440 } 441 442 static UBool U_CALLCONV uprv_normalizer2_cleanup() { 443 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance(); 444 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance(); 445 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance(); 446 Norm2Singleton(noopSingleton).deleteInstance(); 447 uhash_close(cache); 448 cache=NULL; 449 return TRUE; 450 } 451 452 U_CDECL_END 453 454 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { 455 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 456 return allModes!=NULL ? &allModes->comp : NULL; 457 } 458 459 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { 460 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 461 return allModes!=NULL ? &allModes->decomp : NULL; 462 } 463 464 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { 465 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 466 if(allModes!=NULL) { 467 allModes->impl.getFCDTrie(errorCode); 468 return &allModes->fcd; 469 } else { 470 return NULL; 471 } 472 } 473 474 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { 475 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 476 return allModes!=NULL ? &allModes->fcc : NULL; 477 } 478 479 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { 480 Norm2AllModes *allModes= 481 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 482 return allModes!=NULL ? &allModes->comp : NULL; 483 } 484 485 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { 486 Norm2AllModes *allModes= 487 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 488 return allModes!=NULL ? &allModes->decomp : NULL; 489 } 490 491 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { 492 Norm2AllModes *allModes= 493 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 494 return allModes!=NULL ? &allModes->comp : NULL; 495 } 496 497 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { 498 return Norm2Singleton(noopSingleton).getInstance(errorCode); 499 } 500 501 const Normalizer2 * 502 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 503 if(U_FAILURE(errorCode)) { 504 return NULL; 505 } 506 switch(mode) { 507 case UNORM_NFD: 508 return getNFDInstance(errorCode); 509 case UNORM_NFKD: 510 return getNFKDInstance(errorCode); 511 case UNORM_NFC: 512 return getNFCInstance(errorCode); 513 case UNORM_NFKC: 514 return getNFKCInstance(errorCode); 515 case UNORM_FCD: 516 return getFCDInstance(errorCode); 517 default: // UNORM_NONE 518 return getNoopInstance(errorCode); 519 } 520 } 521 522 const Normalizer2Impl * 523 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { 524 Norm2AllModes *allModes= 525 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 526 return allModes!=NULL ? &allModes->impl : NULL; 527 } 528 529 const Normalizer2Impl * 530 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 531 Norm2AllModes *allModes= 532 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 533 return allModes!=NULL ? &allModes->impl : NULL; 534 } 535 536 const Normalizer2Impl * 537 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 538 Norm2AllModes *allModes= 539 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 540 return allModes!=NULL ? &allModes->impl : NULL; 541 } 542 543 const Normalizer2Impl * 544 Normalizer2Factory::getImpl(const Normalizer2 *norm2) { 545 return &((Normalizer2WithImpl *)norm2)->impl; 546 } 547 548 const UTrie2 * 549 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) { 550 Norm2AllModes *allModes= 551 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 552 if(allModes!=NULL) { 553 return allModes->impl.getFCDTrie(errorCode); 554 } else { 555 return NULL; 556 } 557 } 558 559 const Normalizer2 * 560 Normalizer2::getInstance(const char *packageName, 561 const char *name, 562 UNormalization2Mode mode, 563 UErrorCode &errorCode) { 564 if(U_FAILURE(errorCode)) { 565 return NULL; 566 } 567 if(name==NULL || *name==0) { 568 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 569 } 570 Norm2AllModes *allModes=NULL; 571 if(packageName==NULL) { 572 if(0==uprv_strcmp(name, "nfc")) { 573 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 574 } else if(0==uprv_strcmp(name, "nfkc")) { 575 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 576 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 577 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 578 } 579 } 580 if(allModes==NULL && U_SUCCESS(errorCode)) { 581 { 582 Mutex lock; 583 if(cache!=NULL) { 584 allModes=(Norm2AllModes *)uhash_get(cache, name); 585 } 586 } 587 if(allModes==NULL) { 588 LocalPointer<Norm2AllModes> localAllModes( 589 Norm2AllModes::createInstance(packageName, name, errorCode)); 590 if(U_SUCCESS(errorCode)) { 591 Mutex lock; 592 if(cache==NULL) { 593 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 594 if(U_FAILURE(errorCode)) { 595 return NULL; 596 } 597 uhash_setKeyDeleter(cache, uprv_free); 598 uhash_setValueDeleter(cache, deleteNorm2AllModes); 599 } 600 void *temp=uhash_get(cache, name); 601 if(temp==NULL) { 602 int32_t keyLength=uprv_strlen(name)+1; 603 char *nameCopy=(char *)uprv_malloc(keyLength); 604 if(nameCopy==NULL) { 605 errorCode=U_MEMORY_ALLOCATION_ERROR; 606 return NULL; 607 } 608 uprv_memcpy(nameCopy, name, keyLength); 609 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); 610 } else { 611 // race condition 612 allModes=(Norm2AllModes *)temp; 613 } 614 } 615 } 616 } 617 if(allModes!=NULL && U_SUCCESS(errorCode)) { 618 switch(mode) { 619 case UNORM2_COMPOSE: 620 return &allModes->comp; 621 case UNORM2_DECOMPOSE: 622 return &allModes->decomp; 623 case UNORM2_FCD: 624 allModes->impl.getFCDTrie(errorCode); 625 return &allModes->fcd; 626 case UNORM2_COMPOSE_CONTIGUOUS: 627 return &allModes->fcc; 628 default: 629 break; // do nothing 630 } 631 } 632 return NULL; 633 } 634 635 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2) 636 637 U_NAMESPACE_END 638 639 // C API ------------------------------------------------------------------- *** 640 641 U_NAMESPACE_USE 642 643 U_DRAFT const UNormalizer2 * U_EXPORT2 644 unorm2_getInstance(const char *packageName, 645 const char *name, 646 UNormalization2Mode mode, 647 UErrorCode *pErrorCode) { 648 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 649 } 650 651 U_DRAFT void U_EXPORT2 652 unorm2_close(UNormalizer2 *norm2) { 653 delete (Normalizer2 *)norm2; 654 } 655 656 U_DRAFT int32_t U_EXPORT2 657 unorm2_normalize(const UNormalizer2 *norm2, 658 const UChar *src, int32_t length, 659 UChar *dest, int32_t capacity, 660 UErrorCode *pErrorCode) { 661 if(U_FAILURE(*pErrorCode)) { 662 return 0; 663 } 664 if( (src==NULL ? length!=0 : length<-1) || 665 (dest==NULL ? capacity!=0 : capacity<0) || 666 (src==dest && src!=NULL) 667 ) { 668 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 669 return 0; 670 } 671 UnicodeString destString(dest, 0, capacity); 672 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. 673 if(length!=0) { 674 const Normalizer2 *n2=(const Normalizer2 *)norm2; 675 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 676 if(n2wi!=NULL) { 677 // Avoid duplicate argument checking and support NUL-terminated src. 678 ReorderingBuffer buffer(n2wi->impl, destString); 679 if(buffer.init(length, *pErrorCode)) { 680 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); 681 } 682 } else { 683 UnicodeString srcString(length<0, src, length); 684 n2->normalize(srcString, destString, *pErrorCode); 685 } 686 } 687 return destString.extract(dest, capacity, *pErrorCode); 688 } 689 690 static int32_t 691 normalizeSecondAndAppend(const UNormalizer2 *norm2, 692 UChar *first, int32_t firstLength, int32_t firstCapacity, 693 const UChar *second, int32_t secondLength, 694 UBool doNormalize, 695 UErrorCode *pErrorCode) { 696 if(U_FAILURE(*pErrorCode)) { 697 return 0; 698 } 699 if( (second==NULL ? secondLength!=0 : secondLength<-1) || 700 (first==NULL ? (firstCapacity!=0 || firstLength!=0) : 701 (firstCapacity<0 || firstLength<-1)) || 702 (first==second && first!=NULL) 703 ) { 704 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 705 return 0; 706 } 707 UnicodeString firstString(first, firstLength, firstCapacity); 708 firstLength=firstString.length(); // In case it was -1. 709 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. 710 if(secondLength!=0) { 711 const Normalizer2 *n2=(const Normalizer2 *)norm2; 712 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 713 if(n2wi!=NULL) { 714 // Avoid duplicate argument checking and support NUL-terminated src. 715 UnicodeString safeMiddle; 716 { 717 ReorderingBuffer buffer(n2wi->impl, firstString); 718 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 719 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, 720 doNormalize, safeMiddle, buffer, *pErrorCode); 721 } 722 } // The ReorderingBuffer destructor finalizes firstString. 723 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { 724 // Restore the modified suffix of the first string. 725 // This does not restore first[] array contents between firstLength and firstCapacity. 726 // (That might be uninitialized memory, as far as we know.) 727 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); 728 if(firstLength<firstCapacity) { 729 first[firstLength]=0; // NUL-terminate in case it was originally. 730 } 731 } 732 } else { 733 UnicodeString secondString(secondLength<0, second, secondLength); 734 if(doNormalize) { 735 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); 736 } else { 737 n2->append(firstString, secondString, *pErrorCode); 738 } 739 } 740 } 741 return firstString.extract(first, firstCapacity, *pErrorCode); 742 } 743 744 U_DRAFT int32_t U_EXPORT2 745 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 746 UChar *first, int32_t firstLength, int32_t firstCapacity, 747 const UChar *second, int32_t secondLength, 748 UErrorCode *pErrorCode) { 749 return normalizeSecondAndAppend(norm2, 750 first, firstLength, firstCapacity, 751 second, secondLength, 752 TRUE, pErrorCode); 753 } 754 755 U_DRAFT int32_t U_EXPORT2 756 unorm2_append(const UNormalizer2 *norm2, 757 UChar *first, int32_t firstLength, int32_t firstCapacity, 758 const UChar *second, int32_t secondLength, 759 UErrorCode *pErrorCode) { 760 return normalizeSecondAndAppend(norm2, 761 first, firstLength, firstCapacity, 762 second, secondLength, 763 FALSE, pErrorCode); 764 } 765 766 U_DRAFT int32_t U_EXPORT2 767 unorm2_getDecomposition(const UNormalizer2 *norm2, 768 UChar32 c, UChar *decomposition, int32_t capacity, 769 UErrorCode *pErrorCode) { 770 if(U_FAILURE(*pErrorCode)) { 771 return 0; 772 } 773 if(decomposition==NULL ? capacity!=0 : capacity<0) { 774 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 775 return 0; 776 } 777 UnicodeString destString(decomposition, 0, capacity); 778 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) { 779 return destString.extract(decomposition, capacity, *pErrorCode); 780 } else { 781 return -1; 782 } 783 } 784 785 U_DRAFT UBool U_EXPORT2 786 unorm2_isNormalized(const UNormalizer2 *norm2, 787 const UChar *s, int32_t length, 788 UErrorCode *pErrorCode) { 789 if(U_FAILURE(*pErrorCode)) { 790 return 0; 791 } 792 if((s==NULL && length!=0) || length<-1) { 793 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 794 return 0; 795 } 796 UnicodeString sString(length<0, s, length); 797 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); 798 } 799 800 U_DRAFT UNormalizationCheckResult U_EXPORT2 801 unorm2_quickCheck(const UNormalizer2 *norm2, 802 const UChar *s, int32_t length, 803 UErrorCode *pErrorCode) { 804 if(U_FAILURE(*pErrorCode)) { 805 return UNORM_NO; 806 } 807 if((s==NULL && length!=0) || length<-1) { 808 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 809 return UNORM_NO; 810 } 811 UnicodeString sString(length<0, s, length); 812 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); 813 } 814 815 U_DRAFT int32_t U_EXPORT2 816 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 817 const UChar *s, int32_t length, 818 UErrorCode *pErrorCode) { 819 if(U_FAILURE(*pErrorCode)) { 820 return 0; 821 } 822 if((s==NULL && length!=0) || length<-1) { 823 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 824 return 0; 825 } 826 UnicodeString sString(length<0, s, length); 827 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); 828 } 829 830 U_DRAFT UBool U_EXPORT2 831 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { 832 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); 833 } 834 835 U_DRAFT UBool U_EXPORT2 836 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { 837 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); 838 } 839 840 U_DRAFT UBool U_EXPORT2 841 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { 842 return ((const Normalizer2 *)norm2)->isInert(c); 843 } 844 845 // Some properties APIs ---------------------------------------------------- *** 846 847 U_CFUNC UNormalizationCheckResult U_EXPORT2 848 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 849 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 850 return UNORM_YES; 851 } 852 UErrorCode errorCode=U_ZERO_ERROR; 853 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 854 if(U_SUCCESS(errorCode)) { 855 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 856 } else { 857 return UNORM_MAYBE; 858 } 859 } 860 861 U_CAPI const uint16_t * U_EXPORT2 862 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) { 863 const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode); 864 if(U_SUCCESS(*pErrorCode)) { 865 fcdHighStart=trie->highStart; 866 return trie->index; 867 } else { 868 return NULL; 869 } 870 } 871 872 #endif // !UCONFIG_NO_NORMALIZATION 873