Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2009-2012, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  normalizer2.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009nov22
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_NORMALIZATION
     20 
     21 #include "unicode/localpointer.h"
     22 #include "unicode/normalizer2.h"
     23 #include "unicode/unistr.h"
     24 #include "unicode/unorm.h"
     25 #include "cpputils.h"
     26 #include "cstring.h"
     27 #include "mutex.h"
     28 #include "normalizer2impl.h"
     29 #include "ucln_cmn.h"
     30 #include "uhash.h"
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
     35 
     36 Normalizer2::~Normalizer2() {}
     37 
     38 UBool
     39 Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
     40     return FALSE;
     41 }
     42 
     43 UChar32
     44 Normalizer2::composePair(UChar32, UChar32) const {
     45     return U_SENTINEL;
     46 }
     47 
     48 uint8_t
     49 Normalizer2::getCombiningClass(UChar32 /*c*/) const {
     50     return 0;
     51 }
     52 
     53 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
     54 
     55 // Normalizer2 implementation for the old UNORM_NONE.
     56 class NoopNormalizer2 : public Normalizer2 {
     57     virtual ~NoopNormalizer2();
     58 
     59     virtual UnicodeString &
     60     normalize(const UnicodeString &src,
     61               UnicodeString &dest,
     62               UErrorCode &errorCode) const {
     63         if(U_SUCCESS(errorCode)) {
     64             if(&dest!=&src) {
     65                 dest=src;
     66             } else {
     67                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     68             }
     69         }
     70         return dest;
     71     }
     72     virtual UnicodeString &
     73     normalizeSecondAndAppend(UnicodeString &first,
     74                              const UnicodeString &second,
     75                              UErrorCode &errorCode) const {
     76         if(U_SUCCESS(errorCode)) {
     77             if(&first!=&second) {
     78                 first.append(second);
     79             } else {
     80                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     81             }
     82         }
     83         return first;
     84     }
     85     virtual UnicodeString &
     86     append(UnicodeString &first,
     87            const UnicodeString &second,
     88            UErrorCode &errorCode) const {
     89         if(U_SUCCESS(errorCode)) {
     90             if(&first!=&second) {
     91                 first.append(second);
     92             } else {
     93                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     94             }
     95         }
     96         return first;
     97     }
     98     virtual UBool
     99     getDecomposition(UChar32, UnicodeString &) const {
    100         return FALSE;
    101     }
    102     // No need to override the default getRawDecomposition().
    103     virtual UBool
    104     isNormalized(const UnicodeString &, UErrorCode &) const {
    105         return TRUE;
    106     }
    107     virtual UNormalizationCheckResult
    108     quickCheck(const UnicodeString &, UErrorCode &) const {
    109         return UNORM_YES;
    110     }
    111     virtual int32_t
    112     spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
    113         return s.length();
    114     }
    115     virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
    116     virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
    117     virtual UBool isInert(UChar32) const { return TRUE; }
    118 };
    119 
    120 NoopNormalizer2::~NoopNormalizer2() {}
    121 
    122 // Intermediate class:
    123 // Has Normalizer2Impl and does boilerplate argument checking and setup.
    124 class Normalizer2WithImpl : public Normalizer2 {
    125 public:
    126     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
    127     virtual ~Normalizer2WithImpl();
    128 
    129     // normalize
    130     virtual UnicodeString &
    131     normalize(const UnicodeString &src,
    132               UnicodeString &dest,
    133               UErrorCode &errorCode) const {
    134         if(U_FAILURE(errorCode)) {
    135             dest.setToBogus();
    136             return dest;
    137         }
    138         const UChar *sArray=src.getBuffer();
    139         if(&dest==&src || sArray==NULL) {
    140             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    141             dest.setToBogus();
    142             return dest;
    143         }
    144         dest.remove();
    145         ReorderingBuffer buffer(impl, dest);
    146         if(buffer.init(src.length(), errorCode)) {
    147             normalize(sArray, sArray+src.length(), buffer, errorCode);
    148         }
    149         return dest;
    150     }
    151     virtual void
    152     normalize(const UChar *src, const UChar *limit,
    153               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    154 
    155     // normalize and append
    156     virtual UnicodeString &
    157     normalizeSecondAndAppend(UnicodeString &first,
    158                              const UnicodeString &second,
    159                              UErrorCode &errorCode) const {
    160         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
    161     }
    162     virtual UnicodeString &
    163     append(UnicodeString &first,
    164            const UnicodeString &second,
    165            UErrorCode &errorCode) const {
    166         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
    167     }
    168     UnicodeString &
    169     normalizeSecondAndAppend(UnicodeString &first,
    170                              const UnicodeString &second,
    171                              UBool doNormalize,
    172                              UErrorCode &errorCode) const {
    173         uprv_checkCanGetBuffer(first, errorCode);
    174         if(U_FAILURE(errorCode)) {
    175             return first;
    176         }
    177         const UChar *secondArray=second.getBuffer();
    178         if(&first==&second || secondArray==NULL) {
    179             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    180             return first;
    181         }
    182         int32_t firstLength=first.length();
    183         UnicodeString safeMiddle;
    184         {
    185             ReorderingBuffer buffer(impl, first);
    186             if(buffer.init(firstLength+second.length(), errorCode)) {
    187                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
    188                                    safeMiddle, buffer, errorCode);
    189             }
    190         }  // The ReorderingBuffer destructor finalizes the first string.
    191         if(U_FAILURE(errorCode)) {
    192             // Restore the modified suffix of the first string.
    193             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
    194         }
    195         return first;
    196     }
    197     virtual void
    198     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    199                        UnicodeString &safeMiddle,
    200                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    201     virtual UBool
    202     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
    203         UChar buffer[4];
    204         int32_t length;
    205         const UChar *d=impl.getDecomposition(c, buffer, length);
    206         if(d==NULL) {
    207             return FALSE;
    208         }
    209         if(d==buffer) {
    210             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
    211         } else {
    212             decomposition.setTo(FALSE, d, length);  // read-only alias
    213         }
    214         return TRUE;
    215     }
    216     virtual UBool
    217     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
    218         UChar buffer[30];
    219         int32_t length;
    220         const UChar *d=impl.getRawDecomposition(c, buffer, length);
    221         if(d==NULL) {
    222             return FALSE;
    223         }
    224         if(d==buffer) {
    225             decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
    226         } else {
    227             decomposition.setTo(FALSE, d, length);  // read-only alias
    228         }
    229         return TRUE;
    230     }
    231     virtual UChar32
    232     composePair(UChar32 a, UChar32 b) const {
    233         return impl.composePair(a, b);
    234     }
    235 
    236     virtual uint8_t
    237     getCombiningClass(UChar32 c) const {
    238         return impl.getCC(impl.getNorm16(c));
    239     }
    240 
    241     // quick checks
    242     virtual UBool
    243     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    244         if(U_FAILURE(errorCode)) {
    245             return FALSE;
    246         }
    247         const UChar *sArray=s.getBuffer();
    248         if(sArray==NULL) {
    249             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    250             return FALSE;
    251         }
    252         const UChar *sLimit=sArray+s.length();
    253         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
    254     }
    255     virtual UNormalizationCheckResult
    256     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    257         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
    258     }
    259     virtual int32_t
    260     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
    261         if(U_FAILURE(errorCode)) {
    262             return 0;
    263         }
    264         const UChar *sArray=s.getBuffer();
    265         if(sArray==NULL) {
    266             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    267             return 0;
    268         }
    269         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
    270     }
    271     virtual const UChar *
    272     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
    273 
    274     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
    275         return UNORM_YES;
    276     }
    277 
    278     const Normalizer2Impl &impl;
    279 };
    280 
    281 Normalizer2WithImpl::~Normalizer2WithImpl() {}
    282 
    283 class DecomposeNormalizer2 : public Normalizer2WithImpl {
    284 public:
    285     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    286     virtual ~DecomposeNormalizer2();
    287 
    288 private:
    289     virtual void
    290     normalize(const UChar *src, const UChar *limit,
    291               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    292         impl.decompose(src, limit, &buffer, errorCode);
    293     }
    294     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    295     virtual void
    296     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    297                        UnicodeString &safeMiddle,
    298                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    299         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    300     }
    301     virtual const UChar *
    302     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    303         return impl.decompose(src, limit, NULL, errorCode);
    304     }
    305     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    306     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    307         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
    308     }
    309     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
    310     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
    311     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
    312 };
    313 
    314 DecomposeNormalizer2::~DecomposeNormalizer2() {}
    315 
    316 class ComposeNormalizer2 : public Normalizer2WithImpl {
    317 public:
    318     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
    319         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
    320     virtual ~ComposeNormalizer2();
    321 
    322 private:
    323     virtual void
    324     normalize(const UChar *src, const UChar *limit,
    325               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    326         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
    327     }
    328     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    329     virtual void
    330     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    331                        UnicodeString &safeMiddle,
    332                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    333         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
    334     }
    335 
    336     virtual UBool
    337     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    338         if(U_FAILURE(errorCode)) {
    339             return FALSE;
    340         }
    341         const UChar *sArray=s.getBuffer();
    342         if(sArray==NULL) {
    343             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    344             return FALSE;
    345         }
    346         UnicodeString temp;
    347         ReorderingBuffer buffer(impl, temp);
    348         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
    349             return FALSE;
    350         }
    351         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
    352     }
    353     virtual UNormalizationCheckResult
    354     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    355         if(U_FAILURE(errorCode)) {
    356             return UNORM_MAYBE;
    357         }
    358         const UChar *sArray=s.getBuffer();
    359         if(sArray==NULL) {
    360             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    361             return UNORM_MAYBE;
    362         }
    363         UNormalizationCheckResult qcResult=UNORM_YES;
    364         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
    365         return qcResult;
    366     }
    367     virtual const UChar *
    368     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
    369         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
    370     }
    371     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    372     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    373         return impl.getCompQuickCheck(impl.getNorm16(c));
    374     }
    375     virtual UBool hasBoundaryBefore(UChar32 c) const {
    376         return impl.hasCompBoundaryBefore(c);
    377     }
    378     virtual UBool hasBoundaryAfter(UChar32 c) const {
    379         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
    380     }
    381     virtual UBool isInert(UChar32 c) const {
    382         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
    383     }
    384 
    385     const UBool onlyContiguous;
    386 };
    387 
    388 ComposeNormalizer2::~ComposeNormalizer2() {}
    389 
    390 class FCDNormalizer2 : public Normalizer2WithImpl {
    391 public:
    392     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    393     virtual ~FCDNormalizer2();
    394 
    395 private:
    396     virtual void
    397     normalize(const UChar *src, const UChar *limit,
    398               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    399         impl.makeFCD(src, limit, &buffer, errorCode);
    400     }
    401     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    402     virtual void
    403     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    404                        UnicodeString &safeMiddle,
    405                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    406         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    407     }
    408     virtual const UChar *
    409     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    410         return impl.makeFCD(src, limit, NULL, errorCode);
    411     }
    412     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    413     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
    414     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
    415     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
    416 };
    417 
    418 FCDNormalizer2::~FCDNormalizer2() {}
    419 
    420 // instance cache ---------------------------------------------------------- ***
    421 
    422 struct Norm2AllModes : public UMemory {
    423     static Norm2AllModes *createInstance(const char *packageName,
    424                                          const char *name,
    425                                          UErrorCode &errorCode);
    426     Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
    427 
    428     Normalizer2Impl impl;
    429     ComposeNormalizer2 comp;
    430     DecomposeNormalizer2 decomp;
    431     FCDNormalizer2 fcd;
    432     ComposeNormalizer2 fcc;
    433 };
    434 
    435 Norm2AllModes *
    436 Norm2AllModes::createInstance(const char *packageName,
    437                               const char *name,
    438                               UErrorCode &errorCode) {
    439     if(U_FAILURE(errorCode)) {
    440         return NULL;
    441     }
    442     LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
    443     if(allModes.isNull()) {
    444         errorCode=U_MEMORY_ALLOCATION_ERROR;
    445         return NULL;
    446     }
    447     allModes->impl.load(packageName, name, errorCode);
    448     return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
    449 }
    450 
    451 U_CDECL_BEGIN
    452 static UBool U_CALLCONV uprv_normalizer2_cleanup();
    453 U_CDECL_END
    454 
    455 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
    456 public:
    457     Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
    458         TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
    459     Norm2AllModes *getInstance(UErrorCode &errorCode) {
    460         return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
    461     }
    462 private:
    463     static void *createInstance(const void *context, UErrorCode &errorCode) {
    464         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
    465         return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
    466     }
    467 
    468     const char *name;
    469 };
    470 
    471 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
    472 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
    473 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
    474 
    475 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
    476 public:
    477     Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
    478     Normalizer2 *getInstance(UErrorCode &errorCode) {
    479         return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
    480     }
    481 private:
    482     static void *createInstance(const void *, UErrorCode &errorCode) {
    483         Normalizer2 *noop=new NoopNormalizer2;
    484         if(noop==NULL) {
    485             errorCode=U_MEMORY_ALLOCATION_ERROR;
    486         }
    487         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
    488         return noop;
    489     }
    490 };
    491 
    492 STATIC_SIMPLE_SINGLETON(noopSingleton);
    493 
    494 static UHashtable *cache=NULL;
    495 
    496 U_CDECL_BEGIN
    497 
    498 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
    499     delete (Norm2AllModes *)allModes;
    500 }
    501 
    502 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
    503     Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
    504     Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
    505     Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
    506     Norm2Singleton(noopSingleton).deleteInstance();
    507     uhash_close(cache);
    508     cache=NULL;
    509     return TRUE;
    510 }
    511 
    512 U_CDECL_END
    513 
    514 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
    515     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    516     return allModes!=NULL ? &allModes->comp : NULL;
    517 }
    518 
    519 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
    520     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    521     return allModes!=NULL ? &allModes->decomp : NULL;
    522 }
    523 
    524 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
    525     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    526     return allModes!=NULL ? &allModes->fcd : NULL;
    527 }
    528 
    529 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
    530     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    531     return allModes!=NULL ? &allModes->fcc : NULL;
    532 }
    533 
    534 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
    535     Norm2AllModes *allModes=
    536         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    537     return allModes!=NULL ? &allModes->comp : NULL;
    538 }
    539 
    540 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
    541     Norm2AllModes *allModes=
    542         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    543     return allModes!=NULL ? &allModes->decomp : NULL;
    544 }
    545 
    546 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
    547     Norm2AllModes *allModes=
    548         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    549     return allModes!=NULL ? &allModes->comp : NULL;
    550 }
    551 
    552 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
    553     return Norm2Singleton(noopSingleton).getInstance(errorCode);
    554 }
    555 
    556 const Normalizer2 *
    557 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    558     if(U_FAILURE(errorCode)) {
    559         return NULL;
    560     }
    561     switch(mode) {
    562     case UNORM_NFD:
    563         return getNFDInstance(errorCode);
    564     case UNORM_NFKD:
    565         return getNFKDInstance(errorCode);
    566     case UNORM_NFC:
    567         return getNFCInstance(errorCode);
    568     case UNORM_NFKC:
    569         return getNFKCInstance(errorCode);
    570     case UNORM_FCD:
    571         return getFCDInstance(errorCode);
    572     default:  // UNORM_NONE
    573         return getNoopInstance(errorCode);
    574     }
    575 }
    576 
    577 const Normalizer2Impl *
    578 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
    579     Norm2AllModes *allModes=
    580         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    581     return allModes!=NULL ? &allModes->impl : NULL;
    582 }
    583 
    584 const Normalizer2Impl *
    585 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    586     Norm2AllModes *allModes=
    587         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    588     return allModes!=NULL ? &allModes->impl : NULL;
    589 }
    590 
    591 const Normalizer2Impl *
    592 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    593     Norm2AllModes *allModes=
    594         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    595     return allModes!=NULL ? &allModes->impl : NULL;
    596 }
    597 
    598 const Normalizer2Impl *
    599 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
    600     return &((Normalizer2WithImpl *)norm2)->impl;
    601 }
    602 
    603 const Normalizer2 *
    604 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
    605     return Normalizer2Factory::getNFCInstance(errorCode);
    606 }
    607 
    608 const Normalizer2 *
    609 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
    610     return Normalizer2Factory::getNFDInstance(errorCode);
    611 }
    612 
    613 const Normalizer2 *
    614 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
    615     return Normalizer2Factory::getNFKCInstance(errorCode);
    616 }
    617 
    618 const Normalizer2 *
    619 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
    620     return Normalizer2Factory::getNFKDInstance(errorCode);
    621 }
    622 
    623 const Normalizer2 *
    624 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
    625     return Normalizer2Factory::getNFKC_CFInstance(errorCode);
    626 }
    627 
    628 const Normalizer2 *
    629 Normalizer2::getInstance(const char *packageName,
    630                          const char *name,
    631                          UNormalization2Mode mode,
    632                          UErrorCode &errorCode) {
    633     if(U_FAILURE(errorCode)) {
    634         return NULL;
    635     }
    636     if(name==NULL || *name==0) {
    637         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    638         return NULL;
    639     }
    640     Norm2AllModes *allModes=NULL;
    641     if(packageName==NULL) {
    642         if(0==uprv_strcmp(name, "nfc")) {
    643             allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    644         } else if(0==uprv_strcmp(name, "nfkc")) {
    645             allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    646         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
    647             allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    648         }
    649     }
    650     if(allModes==NULL && U_SUCCESS(errorCode)) {
    651         {
    652             Mutex lock;
    653             if(cache!=NULL) {
    654                 allModes=(Norm2AllModes *)uhash_get(cache, name);
    655             }
    656         }
    657         if(allModes==NULL) {
    658             LocalPointer<Norm2AllModes> localAllModes(
    659                 Norm2AllModes::createInstance(packageName, name, errorCode));
    660             if(U_SUCCESS(errorCode)) {
    661                 Mutex lock;
    662                 if(cache==NULL) {
    663                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
    664                     if(U_FAILURE(errorCode)) {
    665                         return NULL;
    666                     }
    667                     uhash_setKeyDeleter(cache, uprv_free);
    668                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
    669                 }
    670                 void *temp=uhash_get(cache, name);
    671                 if(temp==NULL) {
    672                     int32_t keyLength=uprv_strlen(name)+1;
    673                     char *nameCopy=(char *)uprv_malloc(keyLength);
    674                     if(nameCopy==NULL) {
    675                         errorCode=U_MEMORY_ALLOCATION_ERROR;
    676                         return NULL;
    677                     }
    678                     uprv_memcpy(nameCopy, name, keyLength);
    679                     uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
    680                 } else {
    681                     // race condition
    682                     allModes=(Norm2AllModes *)temp;
    683                 }
    684             }
    685         }
    686     }
    687     if(allModes!=NULL && U_SUCCESS(errorCode)) {
    688         switch(mode) {
    689         case UNORM2_COMPOSE:
    690             return &allModes->comp;
    691         case UNORM2_DECOMPOSE:
    692             return &allModes->decomp;
    693         case UNORM2_FCD:
    694             return &allModes->fcd;
    695         case UNORM2_COMPOSE_CONTIGUOUS:
    696             return &allModes->fcc;
    697         default:
    698             break;  // do nothing
    699         }
    700     }
    701     return NULL;
    702 }
    703 
    704 U_NAMESPACE_END
    705 
    706 // C API ------------------------------------------------------------------- ***
    707 
    708 U_NAMESPACE_USE
    709 
    710 U_CAPI const UNormalizer2 * U_EXPORT2
    711 unorm2_getNFCInstance(UErrorCode *pErrorCode) {
    712     return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
    713 }
    714 
    715 U_CAPI const UNormalizer2 * U_EXPORT2
    716 unorm2_getNFDInstance(UErrorCode *pErrorCode) {
    717     return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
    718 }
    719 
    720 U_CAPI const UNormalizer2 * U_EXPORT2
    721 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
    722     return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
    723 }
    724 
    725 U_CAPI const UNormalizer2 * U_EXPORT2
    726 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
    727     return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
    728 }
    729 
    730 U_CAPI const UNormalizer2 * U_EXPORT2
    731 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
    732     return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
    733 }
    734 
    735 U_CAPI const UNormalizer2 * U_EXPORT2
    736 unorm2_getInstance(const char *packageName,
    737                    const char *name,
    738                    UNormalization2Mode mode,
    739                    UErrorCode *pErrorCode) {
    740     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
    741 }
    742 
    743 U_CAPI void U_EXPORT2
    744 unorm2_close(UNormalizer2 *norm2) {
    745     delete (Normalizer2 *)norm2;
    746 }
    747 
    748 U_CAPI int32_t U_EXPORT2
    749 unorm2_normalize(const UNormalizer2 *norm2,
    750                  const UChar *src, int32_t length,
    751                  UChar *dest, int32_t capacity,
    752                  UErrorCode *pErrorCode) {
    753     if(U_FAILURE(*pErrorCode)) {
    754         return 0;
    755     }
    756     if( (src==NULL ? length!=0 : length<-1) ||
    757         (dest==NULL ? capacity!=0 : capacity<0) ||
    758         (src==dest && src!=NULL)
    759     ) {
    760         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    761         return 0;
    762     }
    763     UnicodeString destString(dest, 0, capacity);
    764     // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
    765     if(length!=0) {
    766         const Normalizer2 *n2=(const Normalizer2 *)norm2;
    767         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
    768         if(n2wi!=NULL) {
    769             // Avoid duplicate argument checking and support NUL-terminated src.
    770             ReorderingBuffer buffer(n2wi->impl, destString);
    771             if(buffer.init(length, *pErrorCode)) {
    772                 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
    773             }
    774         } else {
    775             UnicodeString srcString(length<0, src, length);
    776             n2->normalize(srcString, destString, *pErrorCode);
    777         }
    778     }
    779     return destString.extract(dest, capacity, *pErrorCode);
    780 }
    781 
    782 static int32_t
    783 normalizeSecondAndAppend(const UNormalizer2 *norm2,
    784                          UChar *first, int32_t firstLength, int32_t firstCapacity,
    785                          const UChar *second, int32_t secondLength,
    786                          UBool doNormalize,
    787                          UErrorCode *pErrorCode) {
    788     if(U_FAILURE(*pErrorCode)) {
    789         return 0;
    790     }
    791     if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
    792         (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
    793                        (firstCapacity<0 || firstLength<-1)) ||
    794         (first==second && first!=NULL)
    795     ) {
    796         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    797         return 0;
    798     }
    799     UnicodeString firstString(first, firstLength, firstCapacity);
    800     firstLength=firstString.length();  // In case it was -1.
    801     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
    802     if(secondLength!=0) {
    803         const Normalizer2 *n2=(const Normalizer2 *)norm2;
    804         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
    805         if(n2wi!=NULL) {
    806             // Avoid duplicate argument checking and support NUL-terminated src.
    807             UnicodeString safeMiddle;
    808             {
    809                 ReorderingBuffer buffer(n2wi->impl, firstString);
    810                 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
    811                     n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
    812                                              doNormalize, safeMiddle, buffer, *pErrorCode);
    813                 }
    814             }  // The ReorderingBuffer destructor finalizes firstString.
    815             if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
    816                 // Restore the modified suffix of the first string.
    817                 // This does not restore first[] array contents between firstLength and firstCapacity.
    818                 // (That might be uninitialized memory, as far as we know.)
    819                 if(first!=NULL) { /* don't dereference NULL */
    820                   safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
    821                   if(firstLength<firstCapacity) {
    822                     first[firstLength]=0;  // NUL-terminate in case it was originally.
    823                   }
    824                 }
    825             }
    826         } else {
    827             UnicodeString secondString(secondLength<0, second, secondLength);
    828             if(doNormalize) {
    829                 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
    830             } else {
    831                 n2->append(firstString, secondString, *pErrorCode);
    832             }
    833         }
    834     }
    835     return firstString.extract(first, firstCapacity, *pErrorCode);
    836 }
    837 
    838 U_CAPI int32_t U_EXPORT2
    839 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
    840                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
    841                                 const UChar *second, int32_t secondLength,
    842                                 UErrorCode *pErrorCode) {
    843     return normalizeSecondAndAppend(norm2,
    844                                     first, firstLength, firstCapacity,
    845                                     second, secondLength,
    846                                     TRUE, pErrorCode);
    847 }
    848 
    849 U_CAPI int32_t U_EXPORT2
    850 unorm2_append(const UNormalizer2 *norm2,
    851               UChar *first, int32_t firstLength, int32_t firstCapacity,
    852               const UChar *second, int32_t secondLength,
    853               UErrorCode *pErrorCode) {
    854     return normalizeSecondAndAppend(norm2,
    855                                     first, firstLength, firstCapacity,
    856                                     second, secondLength,
    857                                     FALSE, pErrorCode);
    858 }
    859 
    860 U_CAPI int32_t U_EXPORT2
    861 unorm2_getDecomposition(const UNormalizer2 *norm2,
    862                         UChar32 c, UChar *decomposition, int32_t capacity,
    863                         UErrorCode *pErrorCode) {
    864     if(U_FAILURE(*pErrorCode)) {
    865         return 0;
    866     }
    867     if(decomposition==NULL ? capacity!=0 : capacity<0) {
    868         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    869         return 0;
    870     }
    871     UnicodeString destString(decomposition, 0, capacity);
    872     if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
    873         return destString.extract(decomposition, capacity, *pErrorCode);
    874     } else {
    875         return -1;
    876     }
    877 }
    878 
    879 U_CAPI int32_t U_EXPORT2
    880 unorm2_getRawDecomposition(const UNormalizer2 *norm2,
    881                            UChar32 c, UChar *decomposition, int32_t capacity,
    882                            UErrorCode *pErrorCode) {
    883     if(U_FAILURE(*pErrorCode)) {
    884         return 0;
    885     }
    886     if(decomposition==NULL ? capacity!=0 : capacity<0) {
    887         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    888         return 0;
    889     }
    890     UnicodeString destString(decomposition, 0, capacity);
    891     if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
    892         return destString.extract(decomposition, capacity, *pErrorCode);
    893     } else {
    894         return -1;
    895     }
    896 }
    897 
    898 U_CAPI UChar32 U_EXPORT2
    899 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
    900     return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
    901 }
    902 
    903 U_CAPI uint8_t U_EXPORT2
    904 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
    905     return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
    906 }
    907 
    908 U_CAPI UBool U_EXPORT2
    909 unorm2_isNormalized(const UNormalizer2 *norm2,
    910                     const UChar *s, int32_t length,
    911                     UErrorCode *pErrorCode) {
    912     if(U_FAILURE(*pErrorCode)) {
    913         return 0;
    914     }
    915     if((s==NULL && length!=0) || length<-1) {
    916         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    917         return 0;
    918     }
    919     UnicodeString sString(length<0, s, length);
    920     return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
    921 }
    922 
    923 U_CAPI UNormalizationCheckResult U_EXPORT2
    924 unorm2_quickCheck(const UNormalizer2 *norm2,
    925                   const UChar *s, int32_t length,
    926                   UErrorCode *pErrorCode) {
    927     if(U_FAILURE(*pErrorCode)) {
    928         return UNORM_NO;
    929     }
    930     if((s==NULL && length!=0) || length<-1) {
    931         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    932         return UNORM_NO;
    933     }
    934     UnicodeString sString(length<0, s, length);
    935     return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
    936 }
    937 
    938 U_CAPI int32_t U_EXPORT2
    939 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
    940                          const UChar *s, int32_t length,
    941                          UErrorCode *pErrorCode) {
    942     if(U_FAILURE(*pErrorCode)) {
    943         return 0;
    944     }
    945     if((s==NULL && length!=0) || length<-1) {
    946         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    947         return 0;
    948     }
    949     UnicodeString sString(length<0, s, length);
    950     return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
    951 }
    952 
    953 U_CAPI UBool U_EXPORT2
    954 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
    955     return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
    956 }
    957 
    958 U_CAPI UBool U_EXPORT2
    959 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
    960     return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
    961 }
    962 
    963 U_CAPI UBool U_EXPORT2
    964 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
    965     return ((const Normalizer2 *)norm2)->isInert(c);
    966 }
    967 
    968 // Some properties APIs ---------------------------------------------------- ***
    969 
    970 U_CAPI uint8_t U_EXPORT2
    971 u_getCombiningClass(UChar32 c) {
    972     UErrorCode errorCode=U_ZERO_ERROR;
    973     const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
    974     if(U_SUCCESS(errorCode)) {
    975         return nfd->getCombiningClass(c);
    976     } else {
    977         return 0;
    978     }
    979 }
    980 
    981 U_CFUNC UNormalizationCheckResult
    982 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
    983     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
    984         return UNORM_YES;
    985     }
    986     UErrorCode errorCode=U_ZERO_ERROR;
    987     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
    988     if(U_SUCCESS(errorCode)) {
    989         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
    990     } else {
    991         return UNORM_MAYBE;
    992     }
    993 }
    994 
    995 U_CFUNC uint16_t
    996 unorm_getFCD16(UChar32 c) {
    997     UErrorCode errorCode=U_ZERO_ERROR;
    998     const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
    999     if(U_SUCCESS(errorCode)) {
   1000         return impl->getFCD16(c);
   1001     } else {
   1002         return 0;
   1003     }
   1004 }
   1005 
   1006 #endif  // !UCONFIG_NO_NORMALIZATION
   1007