1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2009-2013, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: normalizer2.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2009nov22 14 * created by: Markus W. Scherer 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_NORMALIZATION 20 21 #include "unicode/localpointer.h" 22 #include "unicode/normalizer2.h" 23 #include "unicode/unistr.h" 24 #include "unicode/unorm.h" 25 #include "cpputils.h" 26 #include "cstring.h" 27 #include "mutex.h" 28 #include "normalizer2impl.h" 29 #include "uassert.h" 30 #include "ucln_cmn.h" 31 #include "uhash.h" 32 33 U_NAMESPACE_BEGIN 34 35 // Public API dispatch via Normalizer2 subclasses -------------------------- *** 36 37 Normalizer2::~Normalizer2() {} 38 39 UBool 40 Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { 41 return FALSE; 42 } 43 44 UChar32 45 Normalizer2::composePair(UChar32, UChar32) const { 46 return U_SENTINEL; 47 } 48 49 uint8_t 50 Normalizer2::getCombiningClass(UChar32 /*c*/) const { 51 return 0; 52 } 53 54 // Normalizer2 implementation for the old UNORM_NONE. 55 class NoopNormalizer2 : public Normalizer2 { 56 virtual ~NoopNormalizer2(); 57 58 virtual UnicodeString & 59 normalize(const UnicodeString &src, 60 UnicodeString &dest, 61 UErrorCode &errorCode) const { 62 if(U_SUCCESS(errorCode)) { 63 if(&dest!=&src) { 64 dest=src; 65 } else { 66 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 67 } 68 } 69 return dest; 70 } 71 virtual UnicodeString & 72 normalizeSecondAndAppend(UnicodeString &first, 73 const UnicodeString &second, 74 UErrorCode &errorCode) const { 75 if(U_SUCCESS(errorCode)) { 76 if(&first!=&second) { 77 first.append(second); 78 } else { 79 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 80 } 81 } 82 return first; 83 } 84 virtual UnicodeString & 85 append(UnicodeString &first, 86 const UnicodeString &second, 87 UErrorCode &errorCode) const { 88 if(U_SUCCESS(errorCode)) { 89 if(&first!=&second) { 90 first.append(second); 91 } else { 92 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 93 } 94 } 95 return first; 96 } 97 virtual UBool 98 getDecomposition(UChar32, UnicodeString &) const { 99 return FALSE; 100 } 101 // No need to override the default getRawDecomposition(). 102 virtual UBool 103 isNormalized(const UnicodeString &, UErrorCode &) const { 104 return TRUE; 105 } 106 virtual UNormalizationCheckResult 107 quickCheck(const UnicodeString &, UErrorCode &) const { 108 return UNORM_YES; 109 } 110 virtual int32_t 111 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { 112 return s.length(); 113 } 114 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } 115 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } 116 virtual UBool isInert(UChar32) const { return TRUE; } 117 }; 118 119 NoopNormalizer2::~NoopNormalizer2() {} 120 121 // Intermediate class: 122 // Has Normalizer2Impl and does boilerplate argument checking and setup. 123 class Normalizer2WithImpl : public Normalizer2 { 124 public: 125 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 126 virtual ~Normalizer2WithImpl(); 127 128 // normalize 129 virtual UnicodeString & 130 normalize(const UnicodeString &src, 131 UnicodeString &dest, 132 UErrorCode &errorCode) const { 133 if(U_FAILURE(errorCode)) { 134 dest.setToBogus(); 135 return dest; 136 } 137 const UChar *sArray=src.getBuffer(); 138 if(&dest==&src || sArray==NULL) { 139 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 140 dest.setToBogus(); 141 return dest; 142 } 143 dest.remove(); 144 ReorderingBuffer buffer(impl, dest); 145 if(buffer.init(src.length(), errorCode)) { 146 normalize(sArray, sArray+src.length(), buffer, errorCode); 147 } 148 return dest; 149 } 150 virtual void 151 normalize(const UChar *src, const UChar *limit, 152 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 153 154 // normalize and append 155 virtual UnicodeString & 156 normalizeSecondAndAppend(UnicodeString &first, 157 const UnicodeString &second, 158 UErrorCode &errorCode) const { 159 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 160 } 161 virtual UnicodeString & 162 append(UnicodeString &first, 163 const UnicodeString &second, 164 UErrorCode &errorCode) const { 165 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 166 } 167 UnicodeString & 168 normalizeSecondAndAppend(UnicodeString &first, 169 const UnicodeString &second, 170 UBool doNormalize, 171 UErrorCode &errorCode) const { 172 uprv_checkCanGetBuffer(first, errorCode); 173 if(U_FAILURE(errorCode)) { 174 return first; 175 } 176 const UChar *secondArray=second.getBuffer(); 177 if(&first==&second || secondArray==NULL) { 178 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 179 return first; 180 } 181 int32_t firstLength=first.length(); 182 UnicodeString safeMiddle; 183 { 184 ReorderingBuffer buffer(impl, first); 185 if(buffer.init(firstLength+second.length(), errorCode)) { 186 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 187 safeMiddle, buffer, errorCode); 188 } 189 } // The ReorderingBuffer destructor finalizes the first string. 190 if(U_FAILURE(errorCode)) { 191 // Restore the modified suffix of the first string. 192 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); 193 } 194 return first; 195 } 196 virtual void 197 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 198 UnicodeString &safeMiddle, 199 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 200 virtual UBool 201 getDecomposition(UChar32 c, UnicodeString &decomposition) const { 202 UChar buffer[4]; 203 int32_t length; 204 const UChar *d=impl.getDecomposition(c, buffer, length); 205 if(d==NULL) { 206 return FALSE; 207 } 208 if(d==buffer) { 209 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 210 } else { 211 decomposition.setTo(FALSE, d, length); // read-only alias 212 } 213 return TRUE; 214 } 215 virtual UBool 216 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { 217 UChar buffer[30]; 218 int32_t length; 219 const UChar *d=impl.getRawDecomposition(c, buffer, length); 220 if(d==NULL) { 221 return FALSE; 222 } 223 if(d==buffer) { 224 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) 225 } else { 226 decomposition.setTo(FALSE, d, length); // read-only alias 227 } 228 return TRUE; 229 } 230 virtual UChar32 231 composePair(UChar32 a, UChar32 b) const { 232 return impl.composePair(a, b); 233 } 234 235 virtual uint8_t 236 getCombiningClass(UChar32 c) const { 237 return impl.getCC(impl.getNorm16(c)); 238 } 239 240 // quick checks 241 virtual UBool 242 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 243 if(U_FAILURE(errorCode)) { 244 return FALSE; 245 } 246 const UChar *sArray=s.getBuffer(); 247 if(sArray==NULL) { 248 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 249 return FALSE; 250 } 251 const UChar *sLimit=sArray+s.length(); 252 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 253 } 254 virtual UNormalizationCheckResult 255 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 256 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 257 } 258 virtual int32_t 259 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 260 if(U_FAILURE(errorCode)) { 261 return 0; 262 } 263 const UChar *sArray=s.getBuffer(); 264 if(sArray==NULL) { 265 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 266 return 0; 267 } 268 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 269 } 270 virtual const UChar * 271 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 272 273 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 274 return UNORM_YES; 275 } 276 277 const Normalizer2Impl &impl; 278 }; 279 280 Normalizer2WithImpl::~Normalizer2WithImpl() {} 281 282 class DecomposeNormalizer2 : public Normalizer2WithImpl { 283 public: 284 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 285 virtual ~DecomposeNormalizer2(); 286 287 private: 288 virtual void 289 normalize(const UChar *src, const UChar *limit, 290 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 291 impl.decompose(src, limit, &buffer, errorCode); 292 } 293 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 294 virtual void 295 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 296 UnicodeString &safeMiddle, 297 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 298 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 299 } 300 virtual const UChar * 301 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 302 return impl.decompose(src, limit, NULL, errorCode); 303 } 304 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 305 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 306 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 307 } 308 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } 309 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } 310 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 311 }; 312 313 DecomposeNormalizer2::~DecomposeNormalizer2() {} 314 315 class ComposeNormalizer2 : public Normalizer2WithImpl { 316 public: 317 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 318 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 319 virtual ~ComposeNormalizer2(); 320 321 private: 322 virtual void 323 normalize(const UChar *src, const UChar *limit, 324 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 325 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 326 } 327 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 328 virtual void 329 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 330 UnicodeString &safeMiddle, 331 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 332 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); 333 } 334 335 virtual UBool 336 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 337 if(U_FAILURE(errorCode)) { 338 return FALSE; 339 } 340 const UChar *sArray=s.getBuffer(); 341 if(sArray==NULL) { 342 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 343 return FALSE; 344 } 345 UnicodeString temp; 346 ReorderingBuffer buffer(impl, temp); 347 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 348 return FALSE; 349 } 350 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 351 } 352 virtual UNormalizationCheckResult 353 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 354 if(U_FAILURE(errorCode)) { 355 return UNORM_MAYBE; 356 } 357 const UChar *sArray=s.getBuffer(); 358 if(sArray==NULL) { 359 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 360 return UNORM_MAYBE; 361 } 362 UNormalizationCheckResult qcResult=UNORM_YES; 363 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 364 return qcResult; 365 } 366 virtual const UChar * 367 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { 368 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 369 } 370 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 371 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 372 return impl.getCompQuickCheck(impl.getNorm16(c)); 373 } 374 virtual UBool hasBoundaryBefore(UChar32 c) const { 375 return impl.hasCompBoundaryBefore(c); 376 } 377 virtual UBool hasBoundaryAfter(UChar32 c) const { 378 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); 379 } 380 virtual UBool isInert(UChar32 c) const { 381 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); 382 } 383 384 const UBool onlyContiguous; 385 }; 386 387 ComposeNormalizer2::~ComposeNormalizer2() {} 388 389 class FCDNormalizer2 : public Normalizer2WithImpl { 390 public: 391 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 392 virtual ~FCDNormalizer2(); 393 394 private: 395 virtual void 396 normalize(const UChar *src, const UChar *limit, 397 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 398 impl.makeFCD(src, limit, &buffer, errorCode); 399 } 400 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 401 virtual void 402 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 403 UnicodeString &safeMiddle, 404 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 405 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 406 } 407 virtual const UChar * 408 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 409 return impl.makeFCD(src, limit, NULL, errorCode); 410 } 411 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 412 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } 413 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } 414 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 415 }; 416 417 FCDNormalizer2::~FCDNormalizer2() {} 418 419 // instance cache ---------------------------------------------------------- *** 420 421 struct Norm2AllModes : public UMemory { 422 static Norm2AllModes *createInstance(const char *packageName, 423 const char *name, 424 UErrorCode &errorCode); 425 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} 426 427 Normalizer2Impl impl; 428 ComposeNormalizer2 comp; 429 DecomposeNormalizer2 decomp; 430 FCDNormalizer2 fcd; 431 ComposeNormalizer2 fcc; 432 }; 433 434 Norm2AllModes * 435 Norm2AllModes::createInstance(const char *packageName, 436 const char *name, 437 UErrorCode &errorCode) { 438 if(U_FAILURE(errorCode)) { 439 return NULL; 440 } 441 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); 442 if(allModes.isNull()) { 443 errorCode=U_MEMORY_ALLOCATION_ERROR; 444 return NULL; 445 } 446 allModes->impl.load(packageName, name, errorCode); 447 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; 448 } 449 450 U_CDECL_BEGIN 451 static UBool U_CALLCONV uprv_normalizer2_cleanup(); 452 U_CDECL_END 453 454 455 static Norm2AllModes *nfcSingleton; 456 static Norm2AllModes *nfkcSingleton; 457 static Norm2AllModes *nfkc_cfSingleton; 458 static Normalizer2 *noopSingleton; 459 static UHashtable *cache=NULL; 460 461 static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; 462 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; 463 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; 464 static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; 465 466 // UInitOnce singleton initialization function 467 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { 468 if (uprv_strcmp(what, "nfc") == 0) { 469 nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode); 470 } else if (uprv_strcmp(what, "nfkc") == 0) { 471 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); 472 } else if (uprv_strcmp(what, "nfkc_cf") == 0) { 473 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode); 474 } else if (uprv_strcmp(what, "noop") == 0) { 475 noopSingleton = new NoopNormalizer2; 476 } else { 477 U_ASSERT(FALSE); // Unknown singleton 478 } 479 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 480 } 481 482 U_CDECL_BEGIN 483 484 static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 485 delete (Norm2AllModes *)allModes; 486 } 487 488 static UBool U_CALLCONV uprv_normalizer2_cleanup() { 489 delete nfcSingleton; 490 nfcSingleton = NULL; 491 delete nfkcSingleton; 492 nfkcSingleton = NULL; 493 delete nfkc_cfSingleton; 494 nfkc_cfSingleton = NULL; 495 delete noopSingleton; 496 noopSingleton = NULL; 497 uhash_close(cache); 498 cache=NULL; 499 nfcInitOnce.reset(); 500 nfkcInitOnce.reset(); 501 nfkc_cfInitOnce.reset(); 502 noopInitOnce.reset(); 503 return TRUE; 504 } 505 506 U_CDECL_END 507 508 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { 509 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 510 return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL; 511 } 512 513 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { 514 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 515 return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL; 516 } 517 518 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { 519 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 520 return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL; 521 } 522 523 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { 524 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 525 return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL; 526 } 527 528 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { 529 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 530 return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL; 531 } 532 533 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { 534 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 535 return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL; 536 } 537 538 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { 539 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); 540 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL; 541 } 542 543 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { 544 umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode); 545 return noopSingleton; 546 } 547 548 const Normalizer2 * 549 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 550 if(U_FAILURE(errorCode)) { 551 return NULL; 552 } 553 switch(mode) { 554 case UNORM_NFD: 555 return getNFDInstance(errorCode); 556 case UNORM_NFKD: 557 return getNFKDInstance(errorCode); 558 case UNORM_NFC: 559 return getNFCInstance(errorCode); 560 case UNORM_NFKC: 561 return getNFKCInstance(errorCode); 562 case UNORM_FCD: 563 return getFCDInstance(errorCode); 564 default: // UNORM_NONE 565 return getNoopInstance(errorCode); 566 } 567 } 568 569 const Normalizer2Impl * 570 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { 571 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 572 return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL; 573 } 574 575 const Normalizer2Impl * 576 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 577 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 578 return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL; 579 } 580 581 const Normalizer2Impl * 582 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 583 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); 584 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL; 585 } 586 587 const Normalizer2Impl * 588 Normalizer2Factory::getImpl(const Normalizer2 *norm2) { 589 return &((Normalizer2WithImpl *)norm2)->impl; 590 } 591 592 const Normalizer2 * 593 Normalizer2::getNFCInstance(UErrorCode &errorCode) { 594 return Normalizer2Factory::getNFCInstance(errorCode); 595 } 596 597 const Normalizer2 * 598 Normalizer2::getNFDInstance(UErrorCode &errorCode) { 599 return Normalizer2Factory::getNFDInstance(errorCode); 600 } 601 602 const Normalizer2 * 603 Normalizer2::getNFKCInstance(UErrorCode &errorCode) { 604 return Normalizer2Factory::getNFKCInstance(errorCode); 605 } 606 607 const Normalizer2 * 608 Normalizer2::getNFKDInstance(UErrorCode &errorCode) { 609 return Normalizer2Factory::getNFKDInstance(errorCode); 610 } 611 612 const Normalizer2 * 613 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { 614 return Normalizer2Factory::getNFKC_CFInstance(errorCode); 615 } 616 617 const Normalizer2 * 618 Normalizer2::getInstance(const char *packageName, 619 const char *name, 620 UNormalization2Mode mode, 621 UErrorCode &errorCode) { 622 if(U_FAILURE(errorCode)) { 623 return NULL; 624 } 625 if(name==NULL || *name==0) { 626 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 627 return NULL; 628 } 629 Norm2AllModes *allModes=NULL; 630 if(packageName==NULL) { 631 if(0==uprv_strcmp(name, "nfc")) { 632 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 633 allModes=nfcSingleton; 634 } else if(0==uprv_strcmp(name, "nfkc")) { 635 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 636 allModes=nfkcSingleton; 637 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 638 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); 639 allModes=nfkc_cfSingleton; 640 } 641 } 642 if(allModes==NULL && U_SUCCESS(errorCode)) { 643 { 644 Mutex lock; 645 if(cache!=NULL) { 646 allModes=(Norm2AllModes *)uhash_get(cache, name); 647 } 648 } 649 if(allModes==NULL) { 650 LocalPointer<Norm2AllModes> localAllModes( 651 Norm2AllModes::createInstance(packageName, name, errorCode)); 652 if(U_SUCCESS(errorCode)) { 653 Mutex lock; 654 if(cache==NULL) { 655 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 656 if(U_FAILURE(errorCode)) { 657 return NULL; 658 } 659 uhash_setKeyDeleter(cache, uprv_free); 660 uhash_setValueDeleter(cache, deleteNorm2AllModes); 661 } 662 void *temp=uhash_get(cache, name); 663 if(temp==NULL) { 664 int32_t keyLength=uprv_strlen(name)+1; 665 char *nameCopy=(char *)uprv_malloc(keyLength); 666 if(nameCopy==NULL) { 667 errorCode=U_MEMORY_ALLOCATION_ERROR; 668 return NULL; 669 } 670 uprv_memcpy(nameCopy, name, keyLength); 671 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); 672 } else { 673 // race condition 674 allModes=(Norm2AllModes *)temp; 675 } 676 } 677 } 678 } 679 if(allModes!=NULL && U_SUCCESS(errorCode)) { 680 switch(mode) { 681 case UNORM2_COMPOSE: 682 return &allModes->comp; 683 case UNORM2_DECOMPOSE: 684 return &allModes->decomp; 685 case UNORM2_FCD: 686 return &allModes->fcd; 687 case UNORM2_COMPOSE_CONTIGUOUS: 688 return &allModes->fcc; 689 default: 690 break; // do nothing 691 } 692 } 693 return NULL; 694 } 695 696 U_NAMESPACE_END 697 698 // C API ------------------------------------------------------------------- *** 699 700 U_NAMESPACE_USE 701 702 U_CAPI const UNormalizer2 * U_EXPORT2 703 unorm2_getNFCInstance(UErrorCode *pErrorCode) { 704 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); 705 } 706 707 U_CAPI const UNormalizer2 * U_EXPORT2 708 unorm2_getNFDInstance(UErrorCode *pErrorCode) { 709 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); 710 } 711 712 U_CAPI const UNormalizer2 * U_EXPORT2 713 unorm2_getNFKCInstance(UErrorCode *pErrorCode) { 714 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); 715 } 716 717 U_CAPI const UNormalizer2 * U_EXPORT2 718 unorm2_getNFKDInstance(UErrorCode *pErrorCode) { 719 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); 720 } 721 722 U_CAPI const UNormalizer2 * U_EXPORT2 723 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { 724 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); 725 } 726 727 U_CAPI const UNormalizer2 * U_EXPORT2 728 unorm2_getInstance(const char *packageName, 729 const char *name, 730 UNormalization2Mode mode, 731 UErrorCode *pErrorCode) { 732 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 733 } 734 735 U_CAPI void U_EXPORT2 736 unorm2_close(UNormalizer2 *norm2) { 737 delete (Normalizer2 *)norm2; 738 } 739 740 U_CAPI int32_t U_EXPORT2 741 unorm2_normalize(const UNormalizer2 *norm2, 742 const UChar *src, int32_t length, 743 UChar *dest, int32_t capacity, 744 UErrorCode *pErrorCode) { 745 if(U_FAILURE(*pErrorCode)) { 746 return 0; 747 } 748 if( (src==NULL ? length!=0 : length<-1) || 749 (dest==NULL ? capacity!=0 : capacity<0) || 750 (src==dest && src!=NULL) 751 ) { 752 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 753 return 0; 754 } 755 UnicodeString destString(dest, 0, capacity); 756 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. 757 if(length!=0) { 758 const Normalizer2 *n2=(const Normalizer2 *)norm2; 759 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 760 if(n2wi!=NULL) { 761 // Avoid duplicate argument checking and support NUL-terminated src. 762 ReorderingBuffer buffer(n2wi->impl, destString); 763 if(buffer.init(length, *pErrorCode)) { 764 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); 765 } 766 } else { 767 UnicodeString srcString(length<0, src, length); 768 n2->normalize(srcString, destString, *pErrorCode); 769 } 770 } 771 return destString.extract(dest, capacity, *pErrorCode); 772 } 773 774 static int32_t 775 normalizeSecondAndAppend(const UNormalizer2 *norm2, 776 UChar *first, int32_t firstLength, int32_t firstCapacity, 777 const UChar *second, int32_t secondLength, 778 UBool doNormalize, 779 UErrorCode *pErrorCode) { 780 if(U_FAILURE(*pErrorCode)) { 781 return 0; 782 } 783 if( (second==NULL ? secondLength!=0 : secondLength<-1) || 784 (first==NULL ? (firstCapacity!=0 || firstLength!=0) : 785 (firstCapacity<0 || firstLength<-1)) || 786 (first==second && first!=NULL) 787 ) { 788 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 789 return 0; 790 } 791 UnicodeString firstString(first, firstLength, firstCapacity); 792 firstLength=firstString.length(); // In case it was -1. 793 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. 794 if(secondLength!=0) { 795 const Normalizer2 *n2=(const Normalizer2 *)norm2; 796 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 797 if(n2wi!=NULL) { 798 // Avoid duplicate argument checking and support NUL-terminated src. 799 UnicodeString safeMiddle; 800 { 801 ReorderingBuffer buffer(n2wi->impl, firstString); 802 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 803 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, 804 doNormalize, safeMiddle, buffer, *pErrorCode); 805 } 806 } // The ReorderingBuffer destructor finalizes firstString. 807 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { 808 // Restore the modified suffix of the first string. 809 // This does not restore first[] array contents between firstLength and firstCapacity. 810 // (That might be uninitialized memory, as far as we know.) 811 if(first!=NULL) { /* don't dereference NULL */ 812 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); 813 if(firstLength<firstCapacity) { 814 first[firstLength]=0; // NUL-terminate in case it was originally. 815 } 816 } 817 } 818 } else { 819 UnicodeString secondString(secondLength<0, second, secondLength); 820 if(doNormalize) { 821 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); 822 } else { 823 n2->append(firstString, secondString, *pErrorCode); 824 } 825 } 826 } 827 return firstString.extract(first, firstCapacity, *pErrorCode); 828 } 829 830 U_CAPI int32_t U_EXPORT2 831 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 832 UChar *first, int32_t firstLength, int32_t firstCapacity, 833 const UChar *second, int32_t secondLength, 834 UErrorCode *pErrorCode) { 835 return normalizeSecondAndAppend(norm2, 836 first, firstLength, firstCapacity, 837 second, secondLength, 838 TRUE, pErrorCode); 839 } 840 841 U_CAPI int32_t U_EXPORT2 842 unorm2_append(const UNormalizer2 *norm2, 843 UChar *first, int32_t firstLength, int32_t firstCapacity, 844 const UChar *second, int32_t secondLength, 845 UErrorCode *pErrorCode) { 846 return normalizeSecondAndAppend(norm2, 847 first, firstLength, firstCapacity, 848 second, secondLength, 849 FALSE, pErrorCode); 850 } 851 852 U_CAPI int32_t U_EXPORT2 853 unorm2_getDecomposition(const UNormalizer2 *norm2, 854 UChar32 c, UChar *decomposition, int32_t capacity, 855 UErrorCode *pErrorCode) { 856 if(U_FAILURE(*pErrorCode)) { 857 return 0; 858 } 859 if(decomposition==NULL ? capacity!=0 : capacity<0) { 860 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 861 return 0; 862 } 863 UnicodeString destString(decomposition, 0, capacity); 864 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) { 865 return destString.extract(decomposition, capacity, *pErrorCode); 866 } else { 867 return -1; 868 } 869 } 870 871 U_CAPI int32_t U_EXPORT2 872 unorm2_getRawDecomposition(const UNormalizer2 *norm2, 873 UChar32 c, UChar *decomposition, int32_t capacity, 874 UErrorCode *pErrorCode) { 875 if(U_FAILURE(*pErrorCode)) { 876 return 0; 877 } 878 if(decomposition==NULL ? capacity!=0 : capacity<0) { 879 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 880 return 0; 881 } 882 UnicodeString destString(decomposition, 0, capacity); 883 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) { 884 return destString.extract(decomposition, capacity, *pErrorCode); 885 } else { 886 return -1; 887 } 888 } 889 890 U_CAPI UChar32 U_EXPORT2 891 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { 892 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b); 893 } 894 895 U_CAPI uint8_t U_EXPORT2 896 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { 897 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c); 898 } 899 900 U_CAPI UBool U_EXPORT2 901 unorm2_isNormalized(const UNormalizer2 *norm2, 902 const UChar *s, int32_t length, 903 UErrorCode *pErrorCode) { 904 if(U_FAILURE(*pErrorCode)) { 905 return 0; 906 } 907 if((s==NULL && length!=0) || length<-1) { 908 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 909 return 0; 910 } 911 UnicodeString sString(length<0, s, length); 912 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); 913 } 914 915 U_CAPI UNormalizationCheckResult U_EXPORT2 916 unorm2_quickCheck(const UNormalizer2 *norm2, 917 const UChar *s, int32_t length, 918 UErrorCode *pErrorCode) { 919 if(U_FAILURE(*pErrorCode)) { 920 return UNORM_NO; 921 } 922 if((s==NULL && length!=0) || length<-1) { 923 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 924 return UNORM_NO; 925 } 926 UnicodeString sString(length<0, s, length); 927 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); 928 } 929 930 U_CAPI int32_t U_EXPORT2 931 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 932 const UChar *s, int32_t length, 933 UErrorCode *pErrorCode) { 934 if(U_FAILURE(*pErrorCode)) { 935 return 0; 936 } 937 if((s==NULL && length!=0) || length<-1) { 938 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 939 return 0; 940 } 941 UnicodeString sString(length<0, s, length); 942 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); 943 } 944 945 U_CAPI UBool U_EXPORT2 946 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { 947 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); 948 } 949 950 U_CAPI UBool U_EXPORT2 951 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { 952 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); 953 } 954 955 U_CAPI UBool U_EXPORT2 956 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { 957 return ((const Normalizer2 *)norm2)->isInert(c); 958 } 959 960 // Some properties APIs ---------------------------------------------------- *** 961 962 U_CAPI uint8_t U_EXPORT2 963 u_getCombiningClass(UChar32 c) { 964 UErrorCode errorCode=U_ZERO_ERROR; 965 const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode); 966 if(U_SUCCESS(errorCode)) { 967 return nfd->getCombiningClass(c); 968 } else { 969 return 0; 970 } 971 } 972 973 U_CFUNC UNormalizationCheckResult 974 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 975 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 976 return UNORM_YES; 977 } 978 UErrorCode errorCode=U_ZERO_ERROR; 979 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 980 if(U_SUCCESS(errorCode)) { 981 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 982 } else { 983 return UNORM_MAYBE; 984 } 985 } 986 987 U_CFUNC uint16_t 988 unorm_getFCD16(UChar32 c) { 989 UErrorCode errorCode=U_ZERO_ERROR; 990 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 991 if(U_SUCCESS(errorCode)) { 992 return impl->getFCD16(c); 993 } else { 994 return 0; 995 } 996 } 997 998 #endif // !UCONFIG_NO_NORMALIZATION 999