Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2009-2013, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  normalizer2.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009nov22
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_NORMALIZATION
     20 
     21 #include "unicode/localpointer.h"
     22 #include "unicode/normalizer2.h"
     23 #include "unicode/unistr.h"
     24 #include "unicode/unorm.h"
     25 #include "cpputils.h"
     26 #include "cstring.h"
     27 #include "mutex.h"
     28 #include "normalizer2impl.h"
     29 #include "uassert.h"
     30 #include "ucln_cmn.h"
     31 #include "uhash.h"
     32 
     33 U_NAMESPACE_BEGIN
     34 
     35 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
     36 
     37 Normalizer2::~Normalizer2() {}
     38 
     39 UBool
     40 Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
     41     return FALSE;
     42 }
     43 
     44 UChar32
     45 Normalizer2::composePair(UChar32, UChar32) const {
     46     return U_SENTINEL;
     47 }
     48 
     49 uint8_t
     50 Normalizer2::getCombiningClass(UChar32 /*c*/) const {
     51     return 0;
     52 }
     53 
     54 // Normalizer2 implementation for the old UNORM_NONE.
     55 class NoopNormalizer2 : public Normalizer2 {
     56     virtual ~NoopNormalizer2();
     57 
     58     virtual UnicodeString &
     59     normalize(const UnicodeString &src,
     60               UnicodeString &dest,
     61               UErrorCode &errorCode) const {
     62         if(U_SUCCESS(errorCode)) {
     63             if(&dest!=&src) {
     64                 dest=src;
     65             } else {
     66                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     67             }
     68         }
     69         return dest;
     70     }
     71     virtual UnicodeString &
     72     normalizeSecondAndAppend(UnicodeString &first,
     73                              const UnicodeString &second,
     74                              UErrorCode &errorCode) const {
     75         if(U_SUCCESS(errorCode)) {
     76             if(&first!=&second) {
     77                 first.append(second);
     78             } else {
     79                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     80             }
     81         }
     82         return first;
     83     }
     84     virtual UnicodeString &
     85     append(UnicodeString &first,
     86            const UnicodeString &second,
     87            UErrorCode &errorCode) const {
     88         if(U_SUCCESS(errorCode)) {
     89             if(&first!=&second) {
     90                 first.append(second);
     91             } else {
     92                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     93             }
     94         }
     95         return first;
     96     }
     97     virtual UBool
     98     getDecomposition(UChar32, UnicodeString &) const {
     99         return FALSE;
    100     }
    101     // No need to override the default getRawDecomposition().
    102     virtual UBool
    103     isNormalized(const UnicodeString &, UErrorCode &) const {
    104         return TRUE;
    105     }
    106     virtual UNormalizationCheckResult
    107     quickCheck(const UnicodeString &, UErrorCode &) const {
    108         return UNORM_YES;
    109     }
    110     virtual int32_t
    111     spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
    112         return s.length();
    113     }
    114     virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
    115     virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
    116     virtual UBool isInert(UChar32) const { return TRUE; }
    117 };
    118 
    119 NoopNormalizer2::~NoopNormalizer2() {}
    120 
    121 // Intermediate class:
    122 // Has Normalizer2Impl and does boilerplate argument checking and setup.
    123 class Normalizer2WithImpl : public Normalizer2 {
    124 public:
    125     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
    126     virtual ~Normalizer2WithImpl();
    127 
    128     // normalize
    129     virtual UnicodeString &
    130     normalize(const UnicodeString &src,
    131               UnicodeString &dest,
    132               UErrorCode &errorCode) const {
    133         if(U_FAILURE(errorCode)) {
    134             dest.setToBogus();
    135             return dest;
    136         }
    137         const UChar *sArray=src.getBuffer();
    138         if(&dest==&src || sArray==NULL) {
    139             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    140             dest.setToBogus();
    141             return dest;
    142         }
    143         dest.remove();
    144         ReorderingBuffer buffer(impl, dest);
    145         if(buffer.init(src.length(), errorCode)) {
    146             normalize(sArray, sArray+src.length(), buffer, errorCode);
    147         }
    148         return dest;
    149     }
    150     virtual void
    151     normalize(const UChar *src, const UChar *limit,
    152               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    153 
    154     // normalize and append
    155     virtual UnicodeString &
    156     normalizeSecondAndAppend(UnicodeString &first,
    157                              const UnicodeString &second,
    158                              UErrorCode &errorCode) const {
    159         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
    160     }
    161     virtual UnicodeString &
    162     append(UnicodeString &first,
    163            const UnicodeString &second,
    164            UErrorCode &errorCode) const {
    165         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
    166     }
    167     UnicodeString &
    168     normalizeSecondAndAppend(UnicodeString &first,
    169                              const UnicodeString &second,
    170                              UBool doNormalize,
    171                              UErrorCode &errorCode) const {
    172         uprv_checkCanGetBuffer(first, errorCode);
    173         if(U_FAILURE(errorCode)) {
    174             return first;
    175         }
    176         const UChar *secondArray=second.getBuffer();
    177         if(&first==&second || secondArray==NULL) {
    178             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    179             return first;
    180         }
    181         int32_t firstLength=first.length();
    182         UnicodeString safeMiddle;
    183         {
    184             ReorderingBuffer buffer(impl, first);
    185             if(buffer.init(firstLength+second.length(), errorCode)) {
    186                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
    187                                    safeMiddle, buffer, errorCode);
    188             }
    189         }  // The ReorderingBuffer destructor finalizes the first string.
    190         if(U_FAILURE(errorCode)) {
    191             // Restore the modified suffix of the first string.
    192             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
    193         }
    194         return first;
    195     }
    196     virtual void
    197     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    198                        UnicodeString &safeMiddle,
    199                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    200     virtual UBool
    201     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
    202         UChar buffer[4];
    203         int32_t length;
    204         const UChar *d=impl.getDecomposition(c, buffer, length);
    205         if(d==NULL) {
    206             return FALSE;
    207         }
    208         if(d==buffer) {
    209             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
    210         } else {
    211             decomposition.setTo(FALSE, d, length);  // read-only alias
    212         }
    213         return TRUE;
    214     }
    215     virtual UBool
    216     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
    217         UChar buffer[30];
    218         int32_t length;
    219         const UChar *d=impl.getRawDecomposition(c, buffer, length);
    220         if(d==NULL) {
    221             return FALSE;
    222         }
    223         if(d==buffer) {
    224             decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
    225         } else {
    226             decomposition.setTo(FALSE, d, length);  // read-only alias
    227         }
    228         return TRUE;
    229     }
    230     virtual UChar32
    231     composePair(UChar32 a, UChar32 b) const {
    232         return impl.composePair(a, b);
    233     }
    234 
    235     virtual uint8_t
    236     getCombiningClass(UChar32 c) const {
    237         return impl.getCC(impl.getNorm16(c));
    238     }
    239 
    240     // quick checks
    241     virtual UBool
    242     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    243         if(U_FAILURE(errorCode)) {
    244             return FALSE;
    245         }
    246         const UChar *sArray=s.getBuffer();
    247         if(sArray==NULL) {
    248             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    249             return FALSE;
    250         }
    251         const UChar *sLimit=sArray+s.length();
    252         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
    253     }
    254     virtual UNormalizationCheckResult
    255     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    256         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
    257     }
    258     virtual int32_t
    259     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
    260         if(U_FAILURE(errorCode)) {
    261             return 0;
    262         }
    263         const UChar *sArray=s.getBuffer();
    264         if(sArray==NULL) {
    265             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    266             return 0;
    267         }
    268         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
    269     }
    270     virtual const UChar *
    271     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
    272 
    273     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
    274         return UNORM_YES;
    275     }
    276 
    277     const Normalizer2Impl &impl;
    278 };
    279 
    280 Normalizer2WithImpl::~Normalizer2WithImpl() {}
    281 
    282 class DecomposeNormalizer2 : public Normalizer2WithImpl {
    283 public:
    284     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    285     virtual ~DecomposeNormalizer2();
    286 
    287 private:
    288     virtual void
    289     normalize(const UChar *src, const UChar *limit,
    290               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    291         impl.decompose(src, limit, &buffer, errorCode);
    292     }
    293     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    294     virtual void
    295     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    296                        UnicodeString &safeMiddle,
    297                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    298         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    299     }
    300     virtual const UChar *
    301     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    302         return impl.decompose(src, limit, NULL, errorCode);
    303     }
    304     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    305     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    306         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
    307     }
    308     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
    309     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
    310     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
    311 };
    312 
    313 DecomposeNormalizer2::~DecomposeNormalizer2() {}
    314 
    315 class ComposeNormalizer2 : public Normalizer2WithImpl {
    316 public:
    317     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
    318         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
    319     virtual ~ComposeNormalizer2();
    320 
    321 private:
    322     virtual void
    323     normalize(const UChar *src, const UChar *limit,
    324               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    325         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
    326     }
    327     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    328     virtual void
    329     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    330                        UnicodeString &safeMiddle,
    331                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    332         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
    333     }
    334 
    335     virtual UBool
    336     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    337         if(U_FAILURE(errorCode)) {
    338             return FALSE;
    339         }
    340         const UChar *sArray=s.getBuffer();
    341         if(sArray==NULL) {
    342             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    343             return FALSE;
    344         }
    345         UnicodeString temp;
    346         ReorderingBuffer buffer(impl, temp);
    347         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
    348             return FALSE;
    349         }
    350         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
    351     }
    352     virtual UNormalizationCheckResult
    353     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    354         if(U_FAILURE(errorCode)) {
    355             return UNORM_MAYBE;
    356         }
    357         const UChar *sArray=s.getBuffer();
    358         if(sArray==NULL) {
    359             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    360             return UNORM_MAYBE;
    361         }
    362         UNormalizationCheckResult qcResult=UNORM_YES;
    363         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
    364         return qcResult;
    365     }
    366     virtual const UChar *
    367     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
    368         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
    369     }
    370     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    371     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    372         return impl.getCompQuickCheck(impl.getNorm16(c));
    373     }
    374     virtual UBool hasBoundaryBefore(UChar32 c) const {
    375         return impl.hasCompBoundaryBefore(c);
    376     }
    377     virtual UBool hasBoundaryAfter(UChar32 c) const {
    378         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
    379     }
    380     virtual UBool isInert(UChar32 c) const {
    381         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
    382     }
    383 
    384     const UBool onlyContiguous;
    385 };
    386 
    387 ComposeNormalizer2::~ComposeNormalizer2() {}
    388 
    389 class FCDNormalizer2 : public Normalizer2WithImpl {
    390 public:
    391     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    392     virtual ~FCDNormalizer2();
    393 
    394 private:
    395     virtual void
    396     normalize(const UChar *src, const UChar *limit,
    397               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    398         impl.makeFCD(src, limit, &buffer, errorCode);
    399     }
    400     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    401     virtual void
    402     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    403                        UnicodeString &safeMiddle,
    404                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    405         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    406     }
    407     virtual const UChar *
    408     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    409         return impl.makeFCD(src, limit, NULL, errorCode);
    410     }
    411     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    412     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
    413     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
    414     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
    415 };
    416 
    417 FCDNormalizer2::~FCDNormalizer2() {}
    418 
    419 // instance cache ---------------------------------------------------------- ***
    420 
    421 struct Norm2AllModes : public UMemory {
    422     static Norm2AllModes *createInstance(const char *packageName,
    423                                          const char *name,
    424                                          UErrorCode &errorCode);
    425     Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
    426 
    427     Normalizer2Impl impl;
    428     ComposeNormalizer2 comp;
    429     DecomposeNormalizer2 decomp;
    430     FCDNormalizer2 fcd;
    431     ComposeNormalizer2 fcc;
    432 };
    433 
    434 Norm2AllModes *
    435 Norm2AllModes::createInstance(const char *packageName,
    436                               const char *name,
    437                               UErrorCode &errorCode) {
    438     if(U_FAILURE(errorCode)) {
    439         return NULL;
    440     }
    441     LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
    442     if(allModes.isNull()) {
    443         errorCode=U_MEMORY_ALLOCATION_ERROR;
    444         return NULL;
    445     }
    446     allModes->impl.load(packageName, name, errorCode);
    447     return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
    448 }
    449 
    450 U_CDECL_BEGIN
    451 static UBool U_CALLCONV uprv_normalizer2_cleanup();
    452 U_CDECL_END
    453 
    454 
    455 static Norm2AllModes *nfcSingleton;
    456 static Norm2AllModes *nfkcSingleton;
    457 static Norm2AllModes *nfkc_cfSingleton;
    458 static Normalizer2   *noopSingleton;
    459 static UHashtable    *cache=NULL;
    460 
    461 static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
    462 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
    463 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
    464 static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
    465 
    466 // UInitOnce singleton initialization function
    467 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
    468     if (uprv_strcmp(what, "nfc") == 0) {
    469         nfcSingleton     = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
    470     } else if (uprv_strcmp(what, "nfkc") == 0) {
    471         nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
    472     } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
    473         nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
    474     } else if (uprv_strcmp(what, "noop") == 0) {
    475         noopSingleton    = new NoopNormalizer2;
    476     } else {
    477         U_ASSERT(FALSE);   // Unknown singleton
    478     }
    479     ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
    480 }
    481 
    482 U_CDECL_BEGIN
    483 
    484 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
    485     delete (Norm2AllModes *)allModes;
    486 }
    487 
    488 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
    489     delete nfcSingleton;
    490     nfcSingleton = NULL;
    491     delete nfkcSingleton;
    492     nfkcSingleton = NULL;
    493     delete nfkc_cfSingleton;
    494     nfkc_cfSingleton = NULL;
    495     delete noopSingleton;
    496     noopSingleton = NULL;
    497     uhash_close(cache);
    498     cache=NULL;
    499     nfcInitOnce.reset();
    500     nfkcInitOnce.reset();
    501     nfkc_cfInitOnce.reset();
    502     noopInitOnce.reset();
    503     return TRUE;
    504 }
    505 
    506 U_CDECL_END
    507 
    508 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
    509     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
    510     return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL;
    511 }
    512 
    513 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
    514     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
    515     return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL;
    516 }
    517 
    518 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
    519     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
    520     return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL;
    521 }
    522 
    523 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
    524     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
    525     return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL;
    526 }
    527 
    528 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
    529     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
    530     return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL;
    531 }
    532 
    533 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
    534     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
    535     return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL;
    536 }
    537 
    538 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
    539     umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
    540     return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL;
    541 }
    542 
    543 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
    544     umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode);
    545     return noopSingleton;
    546 }
    547 
    548 const Normalizer2 *
    549 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    550     if(U_FAILURE(errorCode)) {
    551         return NULL;
    552     }
    553     switch(mode) {
    554     case UNORM_NFD:
    555         return getNFDInstance(errorCode);
    556     case UNORM_NFKD:
    557         return getNFKDInstance(errorCode);
    558     case UNORM_NFC:
    559         return getNFCInstance(errorCode);
    560     case UNORM_NFKC:
    561         return getNFKCInstance(errorCode);
    562     case UNORM_FCD:
    563         return getFCDInstance(errorCode);
    564     default:  // UNORM_NONE
    565         return getNoopInstance(errorCode);
    566     }
    567 }
    568 
    569 const Normalizer2Impl *
    570 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
    571     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
    572     return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL;
    573 }
    574 
    575 const Normalizer2Impl *
    576 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    577     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
    578     return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL;
    579 }
    580 
    581 const Normalizer2Impl *
    582 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    583     umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
    584     return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL;
    585 }
    586 
    587 const Normalizer2Impl *
    588 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
    589     return &((Normalizer2WithImpl *)norm2)->impl;
    590 }
    591 
    592 const Normalizer2 *
    593 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
    594     return Normalizer2Factory::getNFCInstance(errorCode);
    595 }
    596 
    597 const Normalizer2 *
    598 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
    599     return Normalizer2Factory::getNFDInstance(errorCode);
    600 }
    601 
    602 const Normalizer2 *
    603 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
    604     return Normalizer2Factory::getNFKCInstance(errorCode);
    605 }
    606 
    607 const Normalizer2 *
    608 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
    609     return Normalizer2Factory::getNFKDInstance(errorCode);
    610 }
    611 
    612 const Normalizer2 *
    613 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
    614     return Normalizer2Factory::getNFKC_CFInstance(errorCode);
    615 }
    616 
    617 const Normalizer2 *
    618 Normalizer2::getInstance(const char *packageName,
    619                          const char *name,
    620                          UNormalization2Mode mode,
    621                          UErrorCode &errorCode) {
    622     if(U_FAILURE(errorCode)) {
    623         return NULL;
    624     }
    625     if(name==NULL || *name==0) {
    626         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    627         return NULL;
    628     }
    629     Norm2AllModes *allModes=NULL;
    630     if(packageName==NULL) {
    631         if(0==uprv_strcmp(name, "nfc")) {
    632             umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
    633             allModes=nfcSingleton;
    634         } else if(0==uprv_strcmp(name, "nfkc")) {
    635             umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
    636             allModes=nfkcSingleton;
    637         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
    638             umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
    639             allModes=nfkc_cfSingleton;
    640         }
    641     }
    642     if(allModes==NULL && U_SUCCESS(errorCode)) {
    643         {
    644             Mutex lock;
    645             if(cache!=NULL) {
    646                 allModes=(Norm2AllModes *)uhash_get(cache, name);
    647             }
    648         }
    649         if(allModes==NULL) {
    650             LocalPointer<Norm2AllModes> localAllModes(
    651                 Norm2AllModes::createInstance(packageName, name, errorCode));
    652             if(U_SUCCESS(errorCode)) {
    653                 Mutex lock;
    654                 if(cache==NULL) {
    655                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
    656                     if(U_FAILURE(errorCode)) {
    657                         return NULL;
    658                     }
    659                     uhash_setKeyDeleter(cache, uprv_free);
    660                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
    661                 }
    662                 void *temp=uhash_get(cache, name);
    663                 if(temp==NULL) {
    664                     int32_t keyLength=uprv_strlen(name)+1;
    665                     char *nameCopy=(char *)uprv_malloc(keyLength);
    666                     if(nameCopy==NULL) {
    667                         errorCode=U_MEMORY_ALLOCATION_ERROR;
    668                         return NULL;
    669                     }
    670                     uprv_memcpy(nameCopy, name, keyLength);
    671                     uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
    672                 } else {
    673                     // race condition
    674                     allModes=(Norm2AllModes *)temp;
    675                 }
    676             }
    677         }
    678     }
    679     if(allModes!=NULL && U_SUCCESS(errorCode)) {
    680         switch(mode) {
    681         case UNORM2_COMPOSE:
    682             return &allModes->comp;
    683         case UNORM2_DECOMPOSE:
    684             return &allModes->decomp;
    685         case UNORM2_FCD:
    686             return &allModes->fcd;
    687         case UNORM2_COMPOSE_CONTIGUOUS:
    688             return &allModes->fcc;
    689         default:
    690             break;  // do nothing
    691         }
    692     }
    693     return NULL;
    694 }
    695 
    696 U_NAMESPACE_END
    697 
    698 // C API ------------------------------------------------------------------- ***
    699 
    700 U_NAMESPACE_USE
    701 
    702 U_CAPI const UNormalizer2 * U_EXPORT2
    703 unorm2_getNFCInstance(UErrorCode *pErrorCode) {
    704     return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
    705 }
    706 
    707 U_CAPI const UNormalizer2 * U_EXPORT2
    708 unorm2_getNFDInstance(UErrorCode *pErrorCode) {
    709     return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
    710 }
    711 
    712 U_CAPI const UNormalizer2 * U_EXPORT2
    713 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
    714     return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
    715 }
    716 
    717 U_CAPI const UNormalizer2 * U_EXPORT2
    718 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
    719     return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
    720 }
    721 
    722 U_CAPI const UNormalizer2 * U_EXPORT2
    723 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
    724     return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
    725 }
    726 
    727 U_CAPI const UNormalizer2 * U_EXPORT2
    728 unorm2_getInstance(const char *packageName,
    729                    const char *name,
    730                    UNormalization2Mode mode,
    731                    UErrorCode *pErrorCode) {
    732     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
    733 }
    734 
    735 U_CAPI void U_EXPORT2
    736 unorm2_close(UNormalizer2 *norm2) {
    737     delete (Normalizer2 *)norm2;
    738 }
    739 
    740 U_CAPI int32_t U_EXPORT2
    741 unorm2_normalize(const UNormalizer2 *norm2,
    742                  const UChar *src, int32_t length,
    743                  UChar *dest, int32_t capacity,
    744                  UErrorCode *pErrorCode) {
    745     if(U_FAILURE(*pErrorCode)) {
    746         return 0;
    747     }
    748     if( (src==NULL ? length!=0 : length<-1) ||
    749         (dest==NULL ? capacity!=0 : capacity<0) ||
    750         (src==dest && src!=NULL)
    751     ) {
    752         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    753         return 0;
    754     }
    755     UnicodeString destString(dest, 0, capacity);
    756     // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
    757     if(length!=0) {
    758         const Normalizer2 *n2=(const Normalizer2 *)norm2;
    759         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
    760         if(n2wi!=NULL) {
    761             // Avoid duplicate argument checking and support NUL-terminated src.
    762             ReorderingBuffer buffer(n2wi->impl, destString);
    763             if(buffer.init(length, *pErrorCode)) {
    764                 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
    765             }
    766         } else {
    767             UnicodeString srcString(length<0, src, length);
    768             n2->normalize(srcString, destString, *pErrorCode);
    769         }
    770     }
    771     return destString.extract(dest, capacity, *pErrorCode);
    772 }
    773 
    774 static int32_t
    775 normalizeSecondAndAppend(const UNormalizer2 *norm2,
    776                          UChar *first, int32_t firstLength, int32_t firstCapacity,
    777                          const UChar *second, int32_t secondLength,
    778                          UBool doNormalize,
    779                          UErrorCode *pErrorCode) {
    780     if(U_FAILURE(*pErrorCode)) {
    781         return 0;
    782     }
    783     if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
    784         (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
    785                        (firstCapacity<0 || firstLength<-1)) ||
    786         (first==second && first!=NULL)
    787     ) {
    788         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    789         return 0;
    790     }
    791     UnicodeString firstString(first, firstLength, firstCapacity);
    792     firstLength=firstString.length();  // In case it was -1.
    793     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
    794     if(secondLength!=0) {
    795         const Normalizer2 *n2=(const Normalizer2 *)norm2;
    796         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
    797         if(n2wi!=NULL) {
    798             // Avoid duplicate argument checking and support NUL-terminated src.
    799             UnicodeString safeMiddle;
    800             {
    801                 ReorderingBuffer buffer(n2wi->impl, firstString);
    802                 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
    803                     n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
    804                                              doNormalize, safeMiddle, buffer, *pErrorCode);
    805                 }
    806             }  // The ReorderingBuffer destructor finalizes firstString.
    807             if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
    808                 // Restore the modified suffix of the first string.
    809                 // This does not restore first[] array contents between firstLength and firstCapacity.
    810                 // (That might be uninitialized memory, as far as we know.)
    811                 if(first!=NULL) { /* don't dereference NULL */
    812                   safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
    813                   if(firstLength<firstCapacity) {
    814                     first[firstLength]=0;  // NUL-terminate in case it was originally.
    815                   }
    816                 }
    817             }
    818         } else {
    819             UnicodeString secondString(secondLength<0, second, secondLength);
    820             if(doNormalize) {
    821                 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
    822             } else {
    823                 n2->append(firstString, secondString, *pErrorCode);
    824             }
    825         }
    826     }
    827     return firstString.extract(first, firstCapacity, *pErrorCode);
    828 }
    829 
    830 U_CAPI int32_t U_EXPORT2
    831 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
    832                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
    833                                 const UChar *second, int32_t secondLength,
    834                                 UErrorCode *pErrorCode) {
    835     return normalizeSecondAndAppend(norm2,
    836                                     first, firstLength, firstCapacity,
    837                                     second, secondLength,
    838                                     TRUE, pErrorCode);
    839 }
    840 
    841 U_CAPI int32_t U_EXPORT2
    842 unorm2_append(const UNormalizer2 *norm2,
    843               UChar *first, int32_t firstLength, int32_t firstCapacity,
    844               const UChar *second, int32_t secondLength,
    845               UErrorCode *pErrorCode) {
    846     return normalizeSecondAndAppend(norm2,
    847                                     first, firstLength, firstCapacity,
    848                                     second, secondLength,
    849                                     FALSE, pErrorCode);
    850 }
    851 
    852 U_CAPI int32_t U_EXPORT2
    853 unorm2_getDecomposition(const UNormalizer2 *norm2,
    854                         UChar32 c, UChar *decomposition, int32_t capacity,
    855                         UErrorCode *pErrorCode) {
    856     if(U_FAILURE(*pErrorCode)) {
    857         return 0;
    858     }
    859     if(decomposition==NULL ? capacity!=0 : capacity<0) {
    860         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    861         return 0;
    862     }
    863     UnicodeString destString(decomposition, 0, capacity);
    864     if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
    865         return destString.extract(decomposition, capacity, *pErrorCode);
    866     } else {
    867         return -1;
    868     }
    869 }
    870 
    871 U_CAPI int32_t U_EXPORT2
    872 unorm2_getRawDecomposition(const UNormalizer2 *norm2,
    873                            UChar32 c, UChar *decomposition, int32_t capacity,
    874                            UErrorCode *pErrorCode) {
    875     if(U_FAILURE(*pErrorCode)) {
    876         return 0;
    877     }
    878     if(decomposition==NULL ? capacity!=0 : capacity<0) {
    879         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    880         return 0;
    881     }
    882     UnicodeString destString(decomposition, 0, capacity);
    883     if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
    884         return destString.extract(decomposition, capacity, *pErrorCode);
    885     } else {
    886         return -1;
    887     }
    888 }
    889 
    890 U_CAPI UChar32 U_EXPORT2
    891 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
    892     return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
    893 }
    894 
    895 U_CAPI uint8_t U_EXPORT2
    896 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
    897     return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
    898 }
    899 
    900 U_CAPI UBool U_EXPORT2
    901 unorm2_isNormalized(const UNormalizer2 *norm2,
    902                     const UChar *s, int32_t length,
    903                     UErrorCode *pErrorCode) {
    904     if(U_FAILURE(*pErrorCode)) {
    905         return 0;
    906     }
    907     if((s==NULL && length!=0) || length<-1) {
    908         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    909         return 0;
    910     }
    911     UnicodeString sString(length<0, s, length);
    912     return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
    913 }
    914 
    915 U_CAPI UNormalizationCheckResult U_EXPORT2
    916 unorm2_quickCheck(const UNormalizer2 *norm2,
    917                   const UChar *s, int32_t length,
    918                   UErrorCode *pErrorCode) {
    919     if(U_FAILURE(*pErrorCode)) {
    920         return UNORM_NO;
    921     }
    922     if((s==NULL && length!=0) || length<-1) {
    923         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    924         return UNORM_NO;
    925     }
    926     UnicodeString sString(length<0, s, length);
    927     return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
    928 }
    929 
    930 U_CAPI int32_t U_EXPORT2
    931 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
    932                          const UChar *s, int32_t length,
    933                          UErrorCode *pErrorCode) {
    934     if(U_FAILURE(*pErrorCode)) {
    935         return 0;
    936     }
    937     if((s==NULL && length!=0) || length<-1) {
    938         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    939         return 0;
    940     }
    941     UnicodeString sString(length<0, s, length);
    942     return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
    943 }
    944 
    945 U_CAPI UBool U_EXPORT2
    946 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
    947     return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
    948 }
    949 
    950 U_CAPI UBool U_EXPORT2
    951 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
    952     return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
    953 }
    954 
    955 U_CAPI UBool U_EXPORT2
    956 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
    957     return ((const Normalizer2 *)norm2)->isInert(c);
    958 }
    959 
    960 // Some properties APIs ---------------------------------------------------- ***
    961 
    962 U_CAPI uint8_t U_EXPORT2
    963 u_getCombiningClass(UChar32 c) {
    964     UErrorCode errorCode=U_ZERO_ERROR;
    965     const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
    966     if(U_SUCCESS(errorCode)) {
    967         return nfd->getCombiningClass(c);
    968     } else {
    969         return 0;
    970     }
    971 }
    972 
    973 U_CFUNC UNormalizationCheckResult
    974 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
    975     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
    976         return UNORM_YES;
    977     }
    978     UErrorCode errorCode=U_ZERO_ERROR;
    979     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
    980     if(U_SUCCESS(errorCode)) {
    981         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
    982     } else {
    983         return UNORM_MAYBE;
    984     }
    985 }
    986 
    987 U_CFUNC uint16_t
    988 unorm_getFCD16(UChar32 c) {
    989     UErrorCode errorCode=U_ZERO_ERROR;
    990     const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
    991     if(U_SUCCESS(errorCode)) {
    992         return impl->getFCD16(c);
    993     } else {
    994         return 0;
    995     }
    996 }
    997 
    998 #endif  // !UCONFIG_NO_NORMALIZATION
    999