Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2009-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  normalizer2.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009nov22
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_NORMALIZATION
     20 
     21 #include "unicode/localpointer.h"
     22 #include "unicode/normalizer2.h"
     23 #include "unicode/unistr.h"
     24 #include "unicode/unorm.h"
     25 #include "cpputils.h"
     26 #include "cstring.h"
     27 #include "mutex.h"
     28 #include "normalizer2impl.h"
     29 #include "ucln_cmn.h"
     30 #include "uhash.h"
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
     35 
     36 // Normalizer2 implementation for the old UNORM_NONE.
     37 class NoopNormalizer2 : public Normalizer2 {
     38     virtual UnicodeString &
     39     normalize(const UnicodeString &src,
     40               UnicodeString &dest,
     41               UErrorCode &errorCode) const {
     42         if(U_SUCCESS(errorCode)) {
     43             if(&dest!=&src) {
     44                 dest=src;
     45             } else {
     46                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     47             }
     48         }
     49         return dest;
     50     }
     51     virtual UnicodeString &
     52     normalizeSecondAndAppend(UnicodeString &first,
     53                              const UnicodeString &second,
     54                              UErrorCode &errorCode) const {
     55         if(U_SUCCESS(errorCode)) {
     56             if(&first!=&second) {
     57                 first.append(second);
     58             } else {
     59                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     60             }
     61         }
     62         return first;
     63     }
     64     virtual UnicodeString &
     65     append(UnicodeString &first,
     66            const UnicodeString &second,
     67            UErrorCode &errorCode) const {
     68         if(U_SUCCESS(errorCode)) {
     69             if(&first!=&second) {
     70                 first.append(second);
     71             } else {
     72                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     73             }
     74         }
     75         return first;
     76     }
     77     virtual UBool
     78     getDecomposition(UChar32, UnicodeString &) const {
     79         return FALSE;
     80     }
     81     virtual UBool
     82     isNormalized(const UnicodeString &, UErrorCode &) const {
     83         return TRUE;
     84     }
     85     virtual UNormalizationCheckResult
     86     quickCheck(const UnicodeString &, UErrorCode &) const {
     87         return UNORM_YES;
     88     }
     89     virtual int32_t
     90     spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
     91         return s.length();
     92     }
     93     virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
     94     virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
     95     virtual UBool isInert(UChar32) const { return TRUE; }
     96 };
     97 
     98 // Intermediate class:
     99 // Has Normalizer2Impl and does boilerplate argument checking and setup.
    100 class Normalizer2WithImpl : public Normalizer2 {
    101 public:
    102     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
    103 
    104     // normalize
    105     virtual UnicodeString &
    106     normalize(const UnicodeString &src,
    107               UnicodeString &dest,
    108               UErrorCode &errorCode) const {
    109         if(U_FAILURE(errorCode)) {
    110             dest.setToBogus();
    111             return dest;
    112         }
    113         const UChar *sArray=src.getBuffer();
    114         if(&dest==&src || sArray==NULL) {
    115             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    116             dest.setToBogus();
    117             return dest;
    118         }
    119         dest.remove();
    120         ReorderingBuffer buffer(impl, dest);
    121         if(buffer.init(src.length(), errorCode)) {
    122             normalize(sArray, sArray+src.length(), buffer, errorCode);
    123         }
    124         return dest;
    125     }
    126     virtual void
    127     normalize(const UChar *src, const UChar *limit,
    128               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    129 
    130     // normalize and append
    131     virtual UnicodeString &
    132     normalizeSecondAndAppend(UnicodeString &first,
    133                              const UnicodeString &second,
    134                              UErrorCode &errorCode) const {
    135         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
    136     }
    137     virtual UnicodeString &
    138     append(UnicodeString &first,
    139            const UnicodeString &second,
    140            UErrorCode &errorCode) const {
    141         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
    142     }
    143     UnicodeString &
    144     normalizeSecondAndAppend(UnicodeString &first,
    145                              const UnicodeString &second,
    146                              UBool doNormalize,
    147                              UErrorCode &errorCode) const {
    148         uprv_checkCanGetBuffer(first, errorCode);
    149         if(U_FAILURE(errorCode)) {
    150             return first;
    151         }
    152         const UChar *secondArray=second.getBuffer();
    153         if(&first==&second || secondArray==NULL) {
    154             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    155             return first;
    156         }
    157         ReorderingBuffer buffer(impl, first);
    158         if(buffer.init(first.length()+second.length(), errorCode)) {
    159             normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
    160                                buffer, errorCode);
    161         }
    162         return first;
    163     }
    164     virtual void
    165     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    166                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    167     virtual UBool
    168     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
    169         UChar buffer[4];
    170         int32_t length;
    171         const UChar *d=impl.getDecomposition(c, buffer, length);
    172         if(d==NULL) {
    173             return FALSE;
    174         }
    175         if(d==buffer) {
    176             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
    177         } else {
    178             decomposition.setTo(FALSE, d, length);  // read-only alias
    179         }
    180         return TRUE;
    181     }
    182 
    183     // quick checks
    184     virtual UBool
    185     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    186         if(U_FAILURE(errorCode)) {
    187             return FALSE;
    188         }
    189         const UChar *sArray=s.getBuffer();
    190         if(sArray==NULL) {
    191             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    192             return FALSE;
    193         }
    194         const UChar *sLimit=sArray+s.length();
    195         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
    196     }
    197     virtual UNormalizationCheckResult
    198     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    199         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
    200     }
    201     virtual int32_t
    202     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
    203         if(U_FAILURE(errorCode)) {
    204             return 0;
    205         }
    206         const UChar *sArray=s.getBuffer();
    207         if(sArray==NULL) {
    208             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    209             return 0;
    210         }
    211         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
    212     }
    213     virtual const UChar *
    214     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
    215 
    216     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
    217         return UNORM_YES;
    218     }
    219 
    220     const Normalizer2Impl &impl;
    221 };
    222 
    223 class DecomposeNormalizer2 : public Normalizer2WithImpl {
    224 public:
    225     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    226 
    227 private:
    228     virtual void
    229     normalize(const UChar *src, const UChar *limit,
    230               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    231         impl.decompose(src, limit, &buffer, errorCode);
    232     }
    233     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    234     virtual void
    235     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    236                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    237         impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode);
    238     }
    239     virtual const UChar *
    240     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    241         return impl.decompose(src, limit, NULL, errorCode);
    242     }
    243     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    244     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    245         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
    246     }
    247     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
    248     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
    249     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
    250 };
    251 
    252 class ComposeNormalizer2 : public Normalizer2WithImpl {
    253 public:
    254     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
    255         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
    256 
    257 private:
    258     virtual void
    259     normalize(const UChar *src, const UChar *limit,
    260               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    261         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
    262     }
    263     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    264     virtual void
    265     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    266                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    267         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode);
    268     }
    269 
    270     virtual UBool
    271     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    272         if(U_FAILURE(errorCode)) {
    273             return FALSE;
    274         }
    275         const UChar *sArray=s.getBuffer();
    276         if(sArray==NULL) {
    277             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    278             return FALSE;
    279         }
    280         UnicodeString temp;
    281         ReorderingBuffer buffer(impl, temp);
    282         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
    283             return FALSE;
    284         }
    285         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
    286     }
    287     virtual UNormalizationCheckResult
    288     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    289         if(U_FAILURE(errorCode)) {
    290             return UNORM_MAYBE;
    291         }
    292         const UChar *sArray=s.getBuffer();
    293         if(sArray==NULL) {
    294             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    295             return UNORM_MAYBE;
    296         }
    297         UNormalizationCheckResult qcResult=UNORM_YES;
    298         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
    299         return qcResult;
    300     }
    301     virtual const UChar *
    302     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
    303         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
    304     }
    305     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    306     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    307         return impl.getCompQuickCheck(impl.getNorm16(c));
    308     }
    309     virtual UBool hasBoundaryBefore(UChar32 c) const {
    310         return impl.hasCompBoundaryBefore(c);
    311     }
    312     virtual UBool hasBoundaryAfter(UChar32 c) const {
    313         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
    314     }
    315     virtual UBool isInert(UChar32 c) const {
    316         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
    317     }
    318 
    319     const UBool onlyContiguous;
    320 };
    321 
    322 class FCDNormalizer2 : public Normalizer2WithImpl {
    323 public:
    324     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    325 
    326 private:
    327     virtual void
    328     normalize(const UChar *src, const UChar *limit,
    329               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    330         impl.makeFCD(src, limit, &buffer, errorCode);
    331     }
    332     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    333     virtual void
    334     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    335                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    336         impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode);
    337     }
    338     virtual const UChar *
    339     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    340         return impl.makeFCD(src, limit, NULL, errorCode);
    341     }
    342     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    343     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
    344     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
    345     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
    346 };
    347 
    348 // instance cache ---------------------------------------------------------- ***
    349 
    350 struct Norm2AllModes : public UMemory {
    351     static Norm2AllModes *createInstance(const char *packageName,
    352                                          const char *name,
    353                                          UErrorCode &errorCode);
    354     Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
    355 
    356     Normalizer2Impl impl;
    357     ComposeNormalizer2 comp;
    358     DecomposeNormalizer2 decomp;
    359     FCDNormalizer2 fcd;
    360     ComposeNormalizer2 fcc;
    361 };
    362 
    363 Norm2AllModes *
    364 Norm2AllModes::createInstance(const char *packageName,
    365                               const char *name,
    366                               UErrorCode &errorCode) {
    367     if(U_FAILURE(errorCode)) {
    368         return NULL;
    369     }
    370     LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
    371     if(allModes.isNull()) {
    372         errorCode=U_MEMORY_ALLOCATION_ERROR;
    373         return NULL;
    374     }
    375     allModes->impl.load(packageName, name, errorCode);
    376     return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
    377 }
    378 
    379 U_CDECL_BEGIN
    380 static UBool U_CALLCONV uprv_normalizer2_cleanup();
    381 U_CDECL_END
    382 
    383 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
    384 public:
    385     Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
    386         TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
    387     Norm2AllModes *getInstance(UErrorCode &errorCode) {
    388         return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
    389     }
    390 private:
    391     static void *createInstance(const void *context, UErrorCode &errorCode) {
    392         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
    393         return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
    394     }
    395 
    396     const char *name;
    397 };
    398 
    399 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
    400 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
    401 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
    402 
    403 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
    404 public:
    405     Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
    406     Normalizer2 *getInstance(UErrorCode &errorCode) {
    407         return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
    408     }
    409 private:
    410     static void *createInstance(const void *, UErrorCode &errorCode) {
    411         Normalizer2 *noop=new NoopNormalizer2;
    412         if(noop==NULL) {
    413             errorCode=U_MEMORY_ALLOCATION_ERROR;
    414         }
    415         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
    416         return noop;
    417     }
    418 };
    419 
    420 STATIC_SIMPLE_SINGLETON(noopSingleton);
    421 
    422 static UHashtable *cache=NULL;
    423 
    424 U_CDECL_BEGIN
    425 
    426 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
    427     delete (Norm2AllModes *)allModes;
    428 }
    429 
    430 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
    431     Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
    432     Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
    433     Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
    434     Norm2Singleton(noopSingleton).deleteInstance();
    435     uhash_close(cache);
    436     cache=NULL;
    437     return TRUE;
    438 }
    439 
    440 U_CDECL_END
    441 
    442 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
    443     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    444     return allModes!=NULL ? &allModes->comp : NULL;
    445 }
    446 
    447 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
    448     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    449     return allModes!=NULL ? &allModes->decomp : NULL;
    450 }
    451 
    452 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
    453     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    454     if(allModes!=NULL) {
    455         allModes->impl.getFCDTrie(errorCode);
    456         return &allModes->fcd;
    457     } else {
    458         return NULL;
    459     }
    460 }
    461 
    462 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
    463     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    464     return allModes!=NULL ? &allModes->fcc : NULL;
    465 }
    466 
    467 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
    468     Norm2AllModes *allModes=
    469         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    470     return allModes!=NULL ? &allModes->comp : NULL;
    471 }
    472 
    473 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
    474     Norm2AllModes *allModes=
    475         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    476     return allModes!=NULL ? &allModes->decomp : NULL;
    477 }
    478 
    479 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
    480     Norm2AllModes *allModes=
    481         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    482     return allModes!=NULL ? &allModes->comp : NULL;
    483 }
    484 
    485 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
    486     return Norm2Singleton(noopSingleton).getInstance(errorCode);
    487 }
    488 
    489 const Normalizer2 *
    490 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    491     if(U_FAILURE(errorCode)) {
    492         return NULL;
    493     }
    494     switch(mode) {
    495     case UNORM_NFD:
    496         return getNFDInstance(errorCode);
    497     case UNORM_NFKD:
    498         return getNFKDInstance(errorCode);
    499     case UNORM_NFC:
    500         return getNFCInstance(errorCode);
    501     case UNORM_NFKC:
    502         return getNFKCInstance(errorCode);
    503     case UNORM_FCD:
    504         return getFCDInstance(errorCode);
    505     default:  // UNORM_NONE
    506         return getNoopInstance(errorCode);
    507     }
    508 }
    509 
    510 const Normalizer2Impl *
    511 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
    512     Norm2AllModes *allModes=
    513         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    514     return allModes!=NULL ? &allModes->impl : NULL;
    515 }
    516 
    517 const Normalizer2Impl *
    518 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    519     Norm2AllModes *allModes=
    520         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    521     return allModes!=NULL ? &allModes->impl : NULL;
    522 }
    523 
    524 const Normalizer2Impl *
    525 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    526     Norm2AllModes *allModes=
    527         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    528     return allModes!=NULL ? &allModes->impl : NULL;
    529 }
    530 
    531 const Normalizer2Impl *
    532 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
    533     return &((Normalizer2WithImpl *)norm2)->impl;
    534 }
    535 
    536 const UTrie2 *
    537 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
    538     Norm2AllModes *allModes=
    539         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    540     if(allModes!=NULL) {
    541         return allModes->impl.getFCDTrie(errorCode);
    542     } else {
    543         return NULL;
    544     }
    545 }
    546 
    547 const Normalizer2 *
    548 Normalizer2::getInstance(const char *packageName,
    549                          const char *name,
    550                          UNormalization2Mode mode,
    551                          UErrorCode &errorCode) {
    552     if(U_FAILURE(errorCode)) {
    553         return NULL;
    554     }
    555     if(name==NULL || *name==0) {
    556         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    557     }
    558     Norm2AllModes *allModes=NULL;
    559     if(packageName==NULL) {
    560         if(0==uprv_strcmp(name, "nfc")) {
    561             allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    562         } else if(0==uprv_strcmp(name, "nfkc")) {
    563             allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    564         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
    565             allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    566         }
    567     }
    568     if(allModes==NULL && U_SUCCESS(errorCode)) {
    569         {
    570             Mutex lock;
    571             if(cache!=NULL) {
    572                 allModes=(Norm2AllModes *)uhash_get(cache, name);
    573             }
    574         }
    575         if(allModes==NULL) {
    576             LocalPointer<Norm2AllModes> localAllModes(
    577                 Norm2AllModes::createInstance(packageName, name, errorCode));
    578             if(U_SUCCESS(errorCode)) {
    579                 Mutex lock;
    580                 if(cache==NULL) {
    581                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
    582                     if(U_FAILURE(errorCode)) {
    583                         return NULL;
    584                     }
    585                     uhash_setKeyDeleter(cache, uprv_free);
    586                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
    587                 }
    588                 void *temp=uhash_get(cache, name);
    589                 if(temp==NULL) {
    590                     int32_t keyLength=uprv_strlen(name)+1;
    591                     char *nameCopy=(char *)uprv_malloc(keyLength);
    592                     if(nameCopy==NULL) {
    593                         errorCode=U_MEMORY_ALLOCATION_ERROR;
    594                         return NULL;
    595                     }
    596                     uprv_memcpy(nameCopy, name, keyLength);
    597                     uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
    598                 } else {
    599                     // race condition
    600                     allModes=(Norm2AllModes *)temp;
    601                 }
    602             }
    603         }
    604     }
    605     if(allModes!=NULL && U_SUCCESS(errorCode)) {
    606         switch(mode) {
    607         case UNORM2_COMPOSE:
    608             return &allModes->comp;
    609         case UNORM2_DECOMPOSE:
    610             return &allModes->decomp;
    611         case UNORM2_FCD:
    612             allModes->impl.getFCDTrie(errorCode);
    613             return &allModes->fcd;
    614         case UNORM2_COMPOSE_CONTIGUOUS:
    615             return &allModes->fcc;
    616         default:
    617             break;  // do nothing
    618         }
    619     }
    620     return NULL;
    621 }
    622 
    623 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
    624 
    625 U_NAMESPACE_END
    626 
    627 // C API ------------------------------------------------------------------- ***
    628 
    629 U_NAMESPACE_USE
    630 
    631 U_DRAFT const UNormalizer2 * U_EXPORT2
    632 unorm2_getInstance(const char *packageName,
    633                    const char *name,
    634                    UNormalization2Mode mode,
    635                    UErrorCode *pErrorCode) {
    636     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
    637 }
    638 
    639 U_DRAFT void U_EXPORT2
    640 unorm2_close(UNormalizer2 *norm2) {
    641     delete (Normalizer2 *)norm2;
    642 }
    643 
    644 U_DRAFT int32_t U_EXPORT2
    645 unorm2_normalize(const UNormalizer2 *norm2,
    646                  const UChar *src, int32_t length,
    647                  UChar *dest, int32_t capacity,
    648                  UErrorCode *pErrorCode) {
    649     if(U_FAILURE(*pErrorCode)) {
    650         return 0;
    651     }
    652     if( (src==NULL ? length!=0 : length<-1) ||
    653         (dest==NULL ? capacity!=0 : capacity<0) ||
    654         (src==dest && src!=NULL)
    655     ) {
    656         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    657         return 0;
    658     }
    659     UnicodeString destString(dest, 0, capacity);
    660     // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
    661     if(length!=0) {
    662         const Normalizer2 *n2=(const Normalizer2 *)norm2;
    663         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
    664         if(n2wi!=NULL) {
    665             // Avoid duplicate argument checking and support NUL-terminated src.
    666             ReorderingBuffer buffer(n2wi->impl, destString);
    667             if(buffer.init(length, *pErrorCode)) {
    668                 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
    669             }
    670         } else {
    671             UnicodeString srcString(length<0, src, length);
    672             n2->normalize(srcString, destString, *pErrorCode);
    673         }
    674     }
    675     return destString.extract(dest, capacity, *pErrorCode);
    676 }
    677 
    678 static int32_t
    679 normalizeSecondAndAppend(const UNormalizer2 *norm2,
    680                          UChar *first, int32_t firstLength, int32_t firstCapacity,
    681                          const UChar *second, int32_t secondLength,
    682                          UBool doNormalize,
    683                          UErrorCode *pErrorCode) {
    684     if(U_FAILURE(*pErrorCode)) {
    685         return 0;
    686     }
    687     if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
    688         (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
    689                        (firstCapacity<0 || firstLength<-1)) ||
    690         (first==second && first!=NULL)
    691     ) {
    692         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    693         return 0;
    694     }
    695     UnicodeString firstString(first, firstLength, firstCapacity);
    696     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
    697     if(secondLength!=0) {
    698         const Normalizer2 *n2=(const Normalizer2 *)norm2;
    699         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
    700         if(n2wi!=NULL) {
    701             // Avoid duplicate argument checking and support NUL-terminated src.
    702             ReorderingBuffer buffer(n2wi->impl, firstString);
    703             if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
    704                 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
    705                                         doNormalize, buffer, *pErrorCode);
    706             }
    707         } else {
    708             UnicodeString secondString(secondLength<0, second, secondLength);
    709             if(doNormalize) {
    710                 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
    711             } else {
    712                 n2->append(firstString, secondString, *pErrorCode);
    713             }
    714         }
    715     }
    716     return firstString.extract(first, firstCapacity, *pErrorCode);
    717 }
    718 
    719 U_DRAFT int32_t U_EXPORT2
    720 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
    721                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
    722                                 const UChar *second, int32_t secondLength,
    723                                 UErrorCode *pErrorCode) {
    724     return normalizeSecondAndAppend(norm2,
    725                                     first, firstLength, firstCapacity,
    726                                     second, secondLength,
    727                                     TRUE, pErrorCode);
    728 }
    729 
    730 U_DRAFT int32_t U_EXPORT2
    731 unorm2_append(const UNormalizer2 *norm2,
    732               UChar *first, int32_t firstLength, int32_t firstCapacity,
    733               const UChar *second, int32_t secondLength,
    734               UErrorCode *pErrorCode) {
    735     return normalizeSecondAndAppend(norm2,
    736                                     first, firstLength, firstCapacity,
    737                                     second, secondLength,
    738                                     FALSE, pErrorCode);
    739 }
    740 
    741 U_DRAFT int32_t U_EXPORT2
    742 unorm2_getDecomposition(const UNormalizer2 *norm2,
    743                         UChar32 c, UChar *decomposition, int32_t capacity,
    744                         UErrorCode *pErrorCode) {
    745     if(U_FAILURE(*pErrorCode)) {
    746         return 0;
    747     }
    748     if(decomposition==NULL ? capacity!=0 : capacity<0) {
    749         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    750         return 0;
    751     }
    752     UnicodeString destString(decomposition, 0, capacity);
    753     if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
    754         return destString.extract(decomposition, capacity, *pErrorCode);
    755     } else {
    756         return -1;
    757     }
    758 }
    759 
    760 U_DRAFT UBool U_EXPORT2
    761 unorm2_isNormalized(const UNormalizer2 *norm2,
    762                     const UChar *s, int32_t length,
    763                     UErrorCode *pErrorCode) {
    764     if(U_FAILURE(*pErrorCode)) {
    765         return 0;
    766     }
    767     if((s==NULL && length!=0) || length<-1) {
    768         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    769         return 0;
    770     }
    771     UnicodeString sString(length<0, s, length);
    772     return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
    773 }
    774 
    775 U_DRAFT UNormalizationCheckResult U_EXPORT2
    776 unorm2_quickCheck(const UNormalizer2 *norm2,
    777                   const UChar *s, int32_t length,
    778                   UErrorCode *pErrorCode) {
    779     if(U_FAILURE(*pErrorCode)) {
    780         return UNORM_NO;
    781     }
    782     if((s==NULL && length!=0) || length<-1) {
    783         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    784         return UNORM_NO;
    785     }
    786     UnicodeString sString(length<0, s, length);
    787     return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
    788 }
    789 
    790 U_DRAFT int32_t U_EXPORT2
    791 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
    792                          const UChar *s, int32_t length,
    793                          UErrorCode *pErrorCode) {
    794     if(U_FAILURE(*pErrorCode)) {
    795         return 0;
    796     }
    797     if((s==NULL && length!=0) || length<-1) {
    798         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    799         return 0;
    800     }
    801     UnicodeString sString(length<0, s, length);
    802     return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
    803 }
    804 
    805 U_DRAFT UBool U_EXPORT2
    806 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
    807     return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
    808 }
    809 
    810 U_DRAFT UBool U_EXPORT2
    811 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
    812     return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
    813 }
    814 
    815 U_DRAFT UBool U_EXPORT2
    816 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
    817     return ((const Normalizer2 *)norm2)->isInert(c);
    818 }
    819 
    820 // Some properties APIs ---------------------------------------------------- ***
    821 
    822 U_CFUNC UNormalizationCheckResult U_EXPORT2
    823 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
    824     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
    825         return UNORM_YES;
    826     }
    827     UErrorCode errorCode=U_ZERO_ERROR;
    828     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
    829     if(U_SUCCESS(errorCode)) {
    830         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
    831     } else {
    832         return UNORM_MAYBE;
    833     }
    834 }
    835 
    836 U_CAPI const uint16_t * U_EXPORT2
    837 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
    838     const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
    839     if(U_SUCCESS(*pErrorCode)) {
    840         fcdHighStart=trie->highStart;
    841         return trie->index;
    842     } else {
    843         return NULL;
    844     }
    845 }
    846 
    847 #endif  // !UCONFIG_NO_NORMALIZATION
    848