Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2009-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  normalizer2.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009nov22
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_NORMALIZATION
     20 
     21 #include "unicode/localpointer.h"
     22 #include "unicode/normalizer2.h"
     23 #include "unicode/unistr.h"
     24 #include "unicode/unorm.h"
     25 #include "cpputils.h"
     26 #include "cstring.h"
     27 #include "mutex.h"
     28 #include "normalizer2impl.h"
     29 #include "ucln_cmn.h"
     30 #include "uhash.h"
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
     35 
     36 // Normalizer2 implementation for the old UNORM_NONE.
     37 class NoopNormalizer2 : public Normalizer2 {
     38     virtual UnicodeString &
     39     normalize(const UnicodeString &src,
     40               UnicodeString &dest,
     41               UErrorCode &errorCode) const {
     42         if(U_SUCCESS(errorCode)) {
     43             if(&dest!=&src) {
     44                 dest=src;
     45             } else {
     46                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     47             }
     48         }
     49         return dest;
     50     }
     51     virtual UnicodeString &
     52     normalizeSecondAndAppend(UnicodeString &first,
     53                              const UnicodeString &second,
     54                              UErrorCode &errorCode) const {
     55         if(U_SUCCESS(errorCode)) {
     56             if(&first!=&second) {
     57                 first.append(second);
     58             } else {
     59                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     60             }
     61         }
     62         return first;
     63     }
     64     virtual UnicodeString &
     65     append(UnicodeString &first,
     66            const UnicodeString &second,
     67            UErrorCode &errorCode) const {
     68         if(U_SUCCESS(errorCode)) {
     69             if(&first!=&second) {
     70                 first.append(second);
     71             } else {
     72                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     73             }
     74         }
     75         return first;
     76     }
     77     virtual UBool
     78     getDecomposition(UChar32, UnicodeString &) const {
     79         return FALSE;
     80     }
     81     virtual UBool
     82     isNormalized(const UnicodeString &, UErrorCode &) const {
     83         return TRUE;
     84     }
     85     virtual UNormalizationCheckResult
     86     quickCheck(const UnicodeString &, UErrorCode &) const {
     87         return UNORM_YES;
     88     }
     89     virtual int32_t
     90     spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
     91         return s.length();
     92     }
     93     virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
     94     virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
     95     virtual UBool isInert(UChar32) const { return TRUE; }
     96 };
     97 
     98 // Intermediate class:
     99 // Has Normalizer2Impl and does boilerplate argument checking and setup.
    100 class Normalizer2WithImpl : public Normalizer2 {
    101 public:
    102     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
    103 
    104     // normalize
    105     virtual UnicodeString &
    106     normalize(const UnicodeString &src,
    107               UnicodeString &dest,
    108               UErrorCode &errorCode) const {
    109         if(U_FAILURE(errorCode)) {
    110             dest.setToBogus();
    111             return dest;
    112         }
    113         const UChar *sArray=src.getBuffer();
    114         if(&dest==&src || sArray==NULL) {
    115             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    116             dest.setToBogus();
    117             return dest;
    118         }
    119         dest.remove();
    120         ReorderingBuffer buffer(impl, dest);
    121         if(buffer.init(src.length(), errorCode)) {
    122             normalize(sArray, sArray+src.length(), buffer, errorCode);
    123         }
    124         return dest;
    125     }
    126     virtual void
    127     normalize(const UChar *src, const UChar *limit,
    128               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    129 
    130     // normalize and append
    131     virtual UnicodeString &
    132     normalizeSecondAndAppend(UnicodeString &first,
    133                              const UnicodeString &second,
    134                              UErrorCode &errorCode) const {
    135         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
    136     }
    137     virtual UnicodeString &
    138     append(UnicodeString &first,
    139            const UnicodeString &second,
    140            UErrorCode &errorCode) const {
    141         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
    142     }
    143     UnicodeString &
    144     normalizeSecondAndAppend(UnicodeString &first,
    145                              const UnicodeString &second,
    146                              UBool doNormalize,
    147                              UErrorCode &errorCode) const {
    148         uprv_checkCanGetBuffer(first, errorCode);
    149         if(U_FAILURE(errorCode)) {
    150             return first;
    151         }
    152         const UChar *secondArray=second.getBuffer();
    153         if(&first==&second || secondArray==NULL) {
    154             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    155             return first;
    156         }
    157         int32_t firstLength=first.length();
    158         UnicodeString safeMiddle;
    159         {
    160             ReorderingBuffer buffer(impl, first);
    161             if(buffer.init(firstLength+second.length(), errorCode)) {
    162                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
    163                                    safeMiddle, buffer, errorCode);
    164             }
    165         }  // The ReorderingBuffer destructor finalizes the first string.
    166         if(U_FAILURE(errorCode)) {
    167             // Restore the modified suffix of the first string.
    168             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
    169         }
    170         return first;
    171     }
    172     virtual void
    173     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    174                        UnicodeString &safeMiddle,
    175                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    176     virtual UBool
    177     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
    178         UChar buffer[4];
    179         int32_t length;
    180         const UChar *d=impl.getDecomposition(c, buffer, length);
    181         if(d==NULL) {
    182             return FALSE;
    183         }
    184         if(d==buffer) {
    185             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
    186         } else {
    187             decomposition.setTo(FALSE, d, length);  // read-only alias
    188         }
    189         return TRUE;
    190     }
    191 
    192     // quick checks
    193     virtual UBool
    194     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    195         if(U_FAILURE(errorCode)) {
    196             return FALSE;
    197         }
    198         const UChar *sArray=s.getBuffer();
    199         if(sArray==NULL) {
    200             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    201             return FALSE;
    202         }
    203         const UChar *sLimit=sArray+s.length();
    204         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
    205     }
    206     virtual UNormalizationCheckResult
    207     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    208         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
    209     }
    210     virtual int32_t
    211     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
    212         if(U_FAILURE(errorCode)) {
    213             return 0;
    214         }
    215         const UChar *sArray=s.getBuffer();
    216         if(sArray==NULL) {
    217             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    218             return 0;
    219         }
    220         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
    221     }
    222     virtual const UChar *
    223     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
    224 
    225     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
    226         return UNORM_YES;
    227     }
    228 
    229     const Normalizer2Impl &impl;
    230 };
    231 
    232 class DecomposeNormalizer2 : public Normalizer2WithImpl {
    233 public:
    234     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    235 
    236 private:
    237     virtual void
    238     normalize(const UChar *src, const UChar *limit,
    239               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    240         impl.decompose(src, limit, &buffer, errorCode);
    241     }
    242     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    243     virtual void
    244     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    245                        UnicodeString &safeMiddle,
    246                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    247         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    248     }
    249     virtual const UChar *
    250     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    251         return impl.decompose(src, limit, NULL, errorCode);
    252     }
    253     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    254     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    255         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
    256     }
    257     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
    258     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
    259     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
    260 };
    261 
    262 class ComposeNormalizer2 : public Normalizer2WithImpl {
    263 public:
    264     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
    265         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
    266 
    267 private:
    268     virtual void
    269     normalize(const UChar *src, const UChar *limit,
    270               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    271         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
    272     }
    273     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    274     virtual void
    275     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    276                        UnicodeString &safeMiddle,
    277                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    278         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
    279     }
    280 
    281     virtual UBool
    282     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    283         if(U_FAILURE(errorCode)) {
    284             return FALSE;
    285         }
    286         const UChar *sArray=s.getBuffer();
    287         if(sArray==NULL) {
    288             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    289             return FALSE;
    290         }
    291         UnicodeString temp;
    292         ReorderingBuffer buffer(impl, temp);
    293         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
    294             return FALSE;
    295         }
    296         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
    297     }
    298     virtual UNormalizationCheckResult
    299     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    300         if(U_FAILURE(errorCode)) {
    301             return UNORM_MAYBE;
    302         }
    303         const UChar *sArray=s.getBuffer();
    304         if(sArray==NULL) {
    305             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    306             return UNORM_MAYBE;
    307         }
    308         UNormalizationCheckResult qcResult=UNORM_YES;
    309         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
    310         return qcResult;
    311     }
    312     virtual const UChar *
    313     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
    314         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
    315     }
    316     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    317     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    318         return impl.getCompQuickCheck(impl.getNorm16(c));
    319     }
    320     virtual UBool hasBoundaryBefore(UChar32 c) const {
    321         return impl.hasCompBoundaryBefore(c);
    322     }
    323     virtual UBool hasBoundaryAfter(UChar32 c) const {
    324         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
    325     }
    326     virtual UBool isInert(UChar32 c) const {
    327         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
    328     }
    329 
    330     const UBool onlyContiguous;
    331 };
    332 
    333 class FCDNormalizer2 : public Normalizer2WithImpl {
    334 public:
    335     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    336 
    337 private:
    338     virtual void
    339     normalize(const UChar *src, const UChar *limit,
    340               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    341         impl.makeFCD(src, limit, &buffer, errorCode);
    342     }
    343     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    344     virtual void
    345     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    346                        UnicodeString &safeMiddle,
    347                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    348         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    349     }
    350     virtual const UChar *
    351     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    352         return impl.makeFCD(src, limit, NULL, errorCode);
    353     }
    354     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    355     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
    356     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
    357     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
    358 };
    359 
    360 // instance cache ---------------------------------------------------------- ***
    361 
    362 struct Norm2AllModes : public UMemory {
    363     static Norm2AllModes *createInstance(const char *packageName,
    364                                          const char *name,
    365                                          UErrorCode &errorCode);
    366     Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
    367 
    368     Normalizer2Impl impl;
    369     ComposeNormalizer2 comp;
    370     DecomposeNormalizer2 decomp;
    371     FCDNormalizer2 fcd;
    372     ComposeNormalizer2 fcc;
    373 };
    374 
    375 Norm2AllModes *
    376 Norm2AllModes::createInstance(const char *packageName,
    377                               const char *name,
    378                               UErrorCode &errorCode) {
    379     if(U_FAILURE(errorCode)) {
    380         return NULL;
    381     }
    382     LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
    383     if(allModes.isNull()) {
    384         errorCode=U_MEMORY_ALLOCATION_ERROR;
    385         return NULL;
    386     }
    387     allModes->impl.load(packageName, name, errorCode);
    388     return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
    389 }
    390 
    391 U_CDECL_BEGIN
    392 static UBool U_CALLCONV uprv_normalizer2_cleanup();
    393 U_CDECL_END
    394 
    395 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
    396 public:
    397     Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
    398         TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
    399     Norm2AllModes *getInstance(UErrorCode &errorCode) {
    400         return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
    401     }
    402 private:
    403     static void *createInstance(const void *context, UErrorCode &errorCode) {
    404         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
    405         return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
    406     }
    407 
    408     const char *name;
    409 };
    410 
    411 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
    412 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
    413 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
    414 
    415 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
    416 public:
    417     Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
    418     Normalizer2 *getInstance(UErrorCode &errorCode) {
    419         return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
    420     }
    421 private:
    422     static void *createInstance(const void *, UErrorCode &errorCode) {
    423         Normalizer2 *noop=new NoopNormalizer2;
    424         if(noop==NULL) {
    425             errorCode=U_MEMORY_ALLOCATION_ERROR;
    426         }
    427         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
    428         return noop;
    429     }
    430 };
    431 
    432 STATIC_SIMPLE_SINGLETON(noopSingleton);
    433 
    434 static UHashtable *cache=NULL;
    435 
    436 U_CDECL_BEGIN
    437 
    438 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
    439     delete (Norm2AllModes *)allModes;
    440 }
    441 
    442 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
    443     Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
    444     Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
    445     Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
    446     Norm2Singleton(noopSingleton).deleteInstance();
    447     uhash_close(cache);
    448     cache=NULL;
    449     return TRUE;
    450 }
    451 
    452 U_CDECL_END
    453 
    454 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
    455     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    456     return allModes!=NULL ? &allModes->comp : NULL;
    457 }
    458 
    459 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
    460     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    461     return allModes!=NULL ? &allModes->decomp : NULL;
    462 }
    463 
    464 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
    465     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    466     if(allModes!=NULL) {
    467         allModes->impl.getFCDTrie(errorCode);
    468         return &allModes->fcd;
    469     } else {
    470         return NULL;
    471     }
    472 }
    473 
    474 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
    475     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    476     return allModes!=NULL ? &allModes->fcc : NULL;
    477 }
    478 
    479 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
    480     Norm2AllModes *allModes=
    481         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    482     return allModes!=NULL ? &allModes->comp : NULL;
    483 }
    484 
    485 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
    486     Norm2AllModes *allModes=
    487         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    488     return allModes!=NULL ? &allModes->decomp : NULL;
    489 }
    490 
    491 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
    492     Norm2AllModes *allModes=
    493         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    494     return allModes!=NULL ? &allModes->comp : NULL;
    495 }
    496 
    497 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
    498     return Norm2Singleton(noopSingleton).getInstance(errorCode);
    499 }
    500 
    501 const Normalizer2 *
    502 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    503     if(U_FAILURE(errorCode)) {
    504         return NULL;
    505     }
    506     switch(mode) {
    507     case UNORM_NFD:
    508         return getNFDInstance(errorCode);
    509     case UNORM_NFKD:
    510         return getNFKDInstance(errorCode);
    511     case UNORM_NFC:
    512         return getNFCInstance(errorCode);
    513     case UNORM_NFKC:
    514         return getNFKCInstance(errorCode);
    515     case UNORM_FCD:
    516         return getFCDInstance(errorCode);
    517     default:  // UNORM_NONE
    518         return getNoopInstance(errorCode);
    519     }
    520 }
    521 
    522 const Normalizer2Impl *
    523 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
    524     Norm2AllModes *allModes=
    525         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    526     return allModes!=NULL ? &allModes->impl : NULL;
    527 }
    528 
    529 const Normalizer2Impl *
    530 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    531     Norm2AllModes *allModes=
    532         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    533     return allModes!=NULL ? &allModes->impl : NULL;
    534 }
    535 
    536 const Normalizer2Impl *
    537 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    538     Norm2AllModes *allModes=
    539         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    540     return allModes!=NULL ? &allModes->impl : NULL;
    541 }
    542 
    543 const Normalizer2Impl *
    544 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
    545     return &((Normalizer2WithImpl *)norm2)->impl;
    546 }
    547 
    548 const UTrie2 *
    549 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
    550     Norm2AllModes *allModes=
    551         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    552     if(allModes!=NULL) {
    553         return allModes->impl.getFCDTrie(errorCode);
    554     } else {
    555         return NULL;
    556     }
    557 }
    558 
    559 const Normalizer2 *
    560 Normalizer2::getInstance(const char *packageName,
    561                          const char *name,
    562                          UNormalization2Mode mode,
    563                          UErrorCode &errorCode) {
    564     if(U_FAILURE(errorCode)) {
    565         return NULL;
    566     }
    567     if(name==NULL || *name==0) {
    568         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    569     }
    570     Norm2AllModes *allModes=NULL;
    571     if(packageName==NULL) {
    572         if(0==uprv_strcmp(name, "nfc")) {
    573             allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    574         } else if(0==uprv_strcmp(name, "nfkc")) {
    575             allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    576         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
    577             allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    578         }
    579     }
    580     if(allModes==NULL && U_SUCCESS(errorCode)) {
    581         {
    582             Mutex lock;
    583             if(cache!=NULL) {
    584                 allModes=(Norm2AllModes *)uhash_get(cache, name);
    585             }
    586         }
    587         if(allModes==NULL) {
    588             LocalPointer<Norm2AllModes> localAllModes(
    589                 Norm2AllModes::createInstance(packageName, name, errorCode));
    590             if(U_SUCCESS(errorCode)) {
    591                 Mutex lock;
    592                 if(cache==NULL) {
    593                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
    594                     if(U_FAILURE(errorCode)) {
    595                         return NULL;
    596                     }
    597                     uhash_setKeyDeleter(cache, uprv_free);
    598                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
    599                 }
    600                 void *temp=uhash_get(cache, name);
    601                 if(temp==NULL) {
    602                     int32_t keyLength=uprv_strlen(name)+1;
    603                     char *nameCopy=(char *)uprv_malloc(keyLength);
    604                     if(nameCopy==NULL) {
    605                         errorCode=U_MEMORY_ALLOCATION_ERROR;
    606                         return NULL;
    607                     }
    608                     uprv_memcpy(nameCopy, name, keyLength);
    609                     uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
    610                 } else {
    611                     // race condition
    612                     allModes=(Norm2AllModes *)temp;
    613                 }
    614             }
    615         }
    616     }
    617     if(allModes!=NULL && U_SUCCESS(errorCode)) {
    618         switch(mode) {
    619         case UNORM2_COMPOSE:
    620             return &allModes->comp;
    621         case UNORM2_DECOMPOSE:
    622             return &allModes->decomp;
    623         case UNORM2_FCD:
    624             allModes->impl.getFCDTrie(errorCode);
    625             return &allModes->fcd;
    626         case UNORM2_COMPOSE_CONTIGUOUS:
    627             return &allModes->fcc;
    628         default:
    629             break;  // do nothing
    630         }
    631     }
    632     return NULL;
    633 }
    634 
    635 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
    636 
    637 U_NAMESPACE_END
    638 
    639 // C API ------------------------------------------------------------------- ***
    640 
    641 U_NAMESPACE_USE
    642 
    643 U_DRAFT const UNormalizer2 * U_EXPORT2
    644 unorm2_getInstance(const char *packageName,
    645                    const char *name,
    646                    UNormalization2Mode mode,
    647                    UErrorCode *pErrorCode) {
    648     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
    649 }
    650 
    651 U_DRAFT void U_EXPORT2
    652 unorm2_close(UNormalizer2 *norm2) {
    653     delete (Normalizer2 *)norm2;
    654 }
    655 
    656 U_DRAFT int32_t U_EXPORT2
    657 unorm2_normalize(const UNormalizer2 *norm2,
    658                  const UChar *src, int32_t length,
    659                  UChar *dest, int32_t capacity,
    660                  UErrorCode *pErrorCode) {
    661     if(U_FAILURE(*pErrorCode)) {
    662         return 0;
    663     }
    664     if( (src==NULL ? length!=0 : length<-1) ||
    665         (dest==NULL ? capacity!=0 : capacity<0) ||
    666         (src==dest && src!=NULL)
    667     ) {
    668         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    669         return 0;
    670     }
    671     UnicodeString destString(dest, 0, capacity);
    672     // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
    673     if(length!=0) {
    674         const Normalizer2 *n2=(const Normalizer2 *)norm2;
    675         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
    676         if(n2wi!=NULL) {
    677             // Avoid duplicate argument checking and support NUL-terminated src.
    678             ReorderingBuffer buffer(n2wi->impl, destString);
    679             if(buffer.init(length, *pErrorCode)) {
    680                 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
    681             }
    682         } else {
    683             UnicodeString srcString(length<0, src, length);
    684             n2->normalize(srcString, destString, *pErrorCode);
    685         }
    686     }
    687     return destString.extract(dest, capacity, *pErrorCode);
    688 }
    689 
    690 static int32_t
    691 normalizeSecondAndAppend(const UNormalizer2 *norm2,
    692                          UChar *first, int32_t firstLength, int32_t firstCapacity,
    693                          const UChar *second, int32_t secondLength,
    694                          UBool doNormalize,
    695                          UErrorCode *pErrorCode) {
    696     if(U_FAILURE(*pErrorCode)) {
    697         return 0;
    698     }
    699     if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
    700         (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
    701                        (firstCapacity<0 || firstLength<-1)) ||
    702         (first==second && first!=NULL)
    703     ) {
    704         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    705         return 0;
    706     }
    707     UnicodeString firstString(first, firstLength, firstCapacity);
    708     firstLength=firstString.length();  // In case it was -1.
    709     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
    710     if(secondLength!=0) {
    711         const Normalizer2 *n2=(const Normalizer2 *)norm2;
    712         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
    713         if(n2wi!=NULL) {
    714             // Avoid duplicate argument checking and support NUL-terminated src.
    715             UnicodeString safeMiddle;
    716             {
    717                 ReorderingBuffer buffer(n2wi->impl, firstString);
    718                 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
    719                     n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
    720                                              doNormalize, safeMiddle, buffer, *pErrorCode);
    721                 }
    722             }  // The ReorderingBuffer destructor finalizes firstString.
    723             if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
    724                 // Restore the modified suffix of the first string.
    725                 // This does not restore first[] array contents between firstLength and firstCapacity.
    726                 // (That might be uninitialized memory, as far as we know.)
    727                 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
    728                 if(firstLength<firstCapacity) {
    729                     first[firstLength]=0;  // NUL-terminate in case it was originally.
    730                 }
    731             }
    732         } else {
    733             UnicodeString secondString(secondLength<0, second, secondLength);
    734             if(doNormalize) {
    735                 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
    736             } else {
    737                 n2->append(firstString, secondString, *pErrorCode);
    738             }
    739         }
    740     }
    741     return firstString.extract(first, firstCapacity, *pErrorCode);
    742 }
    743 
    744 U_DRAFT int32_t U_EXPORT2
    745 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
    746                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
    747                                 const UChar *second, int32_t secondLength,
    748                                 UErrorCode *pErrorCode) {
    749     return normalizeSecondAndAppend(norm2,
    750                                     first, firstLength, firstCapacity,
    751                                     second, secondLength,
    752                                     TRUE, pErrorCode);
    753 }
    754 
    755 U_DRAFT int32_t U_EXPORT2
    756 unorm2_append(const UNormalizer2 *norm2,
    757               UChar *first, int32_t firstLength, int32_t firstCapacity,
    758               const UChar *second, int32_t secondLength,
    759               UErrorCode *pErrorCode) {
    760     return normalizeSecondAndAppend(norm2,
    761                                     first, firstLength, firstCapacity,
    762                                     second, secondLength,
    763                                     FALSE, pErrorCode);
    764 }
    765 
    766 U_DRAFT int32_t U_EXPORT2
    767 unorm2_getDecomposition(const UNormalizer2 *norm2,
    768                         UChar32 c, UChar *decomposition, int32_t capacity,
    769                         UErrorCode *pErrorCode) {
    770     if(U_FAILURE(*pErrorCode)) {
    771         return 0;
    772     }
    773     if(decomposition==NULL ? capacity!=0 : capacity<0) {
    774         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    775         return 0;
    776     }
    777     UnicodeString destString(decomposition, 0, capacity);
    778     if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
    779         return destString.extract(decomposition, capacity, *pErrorCode);
    780     } else {
    781         return -1;
    782     }
    783 }
    784 
    785 U_DRAFT UBool U_EXPORT2
    786 unorm2_isNormalized(const UNormalizer2 *norm2,
    787                     const UChar *s, int32_t length,
    788                     UErrorCode *pErrorCode) {
    789     if(U_FAILURE(*pErrorCode)) {
    790         return 0;
    791     }
    792     if((s==NULL && length!=0) || length<-1) {
    793         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    794         return 0;
    795     }
    796     UnicodeString sString(length<0, s, length);
    797     return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
    798 }
    799 
    800 U_DRAFT UNormalizationCheckResult U_EXPORT2
    801 unorm2_quickCheck(const UNormalizer2 *norm2,
    802                   const UChar *s, int32_t length,
    803                   UErrorCode *pErrorCode) {
    804     if(U_FAILURE(*pErrorCode)) {
    805         return UNORM_NO;
    806     }
    807     if((s==NULL && length!=0) || length<-1) {
    808         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    809         return UNORM_NO;
    810     }
    811     UnicodeString sString(length<0, s, length);
    812     return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
    813 }
    814 
    815 U_DRAFT int32_t U_EXPORT2
    816 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
    817                          const UChar *s, int32_t length,
    818                          UErrorCode *pErrorCode) {
    819     if(U_FAILURE(*pErrorCode)) {
    820         return 0;
    821     }
    822     if((s==NULL && length!=0) || length<-1) {
    823         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    824         return 0;
    825     }
    826     UnicodeString sString(length<0, s, length);
    827     return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
    828 }
    829 
    830 U_DRAFT UBool U_EXPORT2
    831 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
    832     return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
    833 }
    834 
    835 U_DRAFT UBool U_EXPORT2
    836 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
    837     return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
    838 }
    839 
    840 U_DRAFT UBool U_EXPORT2
    841 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
    842     return ((const Normalizer2 *)norm2)->isInert(c);
    843 }
    844 
    845 // Some properties APIs ---------------------------------------------------- ***
    846 
    847 U_CFUNC UNormalizationCheckResult U_EXPORT2
    848 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
    849     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
    850         return UNORM_YES;
    851     }
    852     UErrorCode errorCode=U_ZERO_ERROR;
    853     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
    854     if(U_SUCCESS(errorCode)) {
    855         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
    856     } else {
    857         return UNORM_MAYBE;
    858     }
    859 }
    860 
    861 U_CAPI const uint16_t * U_EXPORT2
    862 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
    863     const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
    864     if(U_SUCCESS(*pErrorCode)) {
    865         fcdHighStart=trie->highStart;
    866         return trie->index;
    867     } else {
    868         return NULL;
    869     }
    870 }
    871 
    872 #endif  // !UCONFIG_NO_NORMALIZATION
    873