Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2009-2012, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  normalizer2.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009nov22
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_NORMALIZATION
     20 
     21 #include "unicode/localpointer.h"
     22 #include "unicode/normalizer2.h"
     23 #include "unicode/unistr.h"
     24 #include "unicode/unorm.h"
     25 #include "cpputils.h"
     26 #include "cstring.h"
     27 #include "mutex.h"
     28 #include "normalizer2impl.h"
     29 #include "ucln_cmn.h"
     30 #include "uhash.h"
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
     35 
     36 Normalizer2::~Normalizer2() {}
     37 
     38 UBool
     39 Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
     40     return FALSE;
     41 }
     42 
     43 UChar32
     44 Normalizer2::composePair(UChar32, UChar32) const {
     45     return U_SENTINEL;
     46 }
     47 
     48 uint8_t
     49 Normalizer2::getCombiningClass(UChar32 /*c*/) const {
     50     return 0;
     51 }
     52 
     53 // Normalizer2 implementation for the old UNORM_NONE.
     54 class NoopNormalizer2 : public Normalizer2 {
     55     virtual ~NoopNormalizer2();
     56 
     57     virtual UnicodeString &
     58     normalize(const UnicodeString &src,
     59               UnicodeString &dest,
     60               UErrorCode &errorCode) const {
     61         if(U_SUCCESS(errorCode)) {
     62             if(&dest!=&src) {
     63                 dest=src;
     64             } else {
     65                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     66             }
     67         }
     68         return dest;
     69     }
     70     virtual UnicodeString &
     71     normalizeSecondAndAppend(UnicodeString &first,
     72                              const UnicodeString &second,
     73                              UErrorCode &errorCode) const {
     74         if(U_SUCCESS(errorCode)) {
     75             if(&first!=&second) {
     76                 first.append(second);
     77             } else {
     78                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     79             }
     80         }
     81         return first;
     82     }
     83     virtual UnicodeString &
     84     append(UnicodeString &first,
     85            const UnicodeString &second,
     86            UErrorCode &errorCode) const {
     87         if(U_SUCCESS(errorCode)) {
     88             if(&first!=&second) {
     89                 first.append(second);
     90             } else {
     91                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     92             }
     93         }
     94         return first;
     95     }
     96     virtual UBool
     97     getDecomposition(UChar32, UnicodeString &) const {
     98         return FALSE;
     99     }
    100     // No need to override the default getRawDecomposition().
    101     virtual UBool
    102     isNormalized(const UnicodeString &, UErrorCode &) const {
    103         return TRUE;
    104     }
    105     virtual UNormalizationCheckResult
    106     quickCheck(const UnicodeString &, UErrorCode &) const {
    107         return UNORM_YES;
    108     }
    109     virtual int32_t
    110     spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
    111         return s.length();
    112     }
    113     virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
    114     virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
    115     virtual UBool isInert(UChar32) const { return TRUE; }
    116 };
    117 
    118 NoopNormalizer2::~NoopNormalizer2() {}
    119 
    120 // Intermediate class:
    121 // Has Normalizer2Impl and does boilerplate argument checking and setup.
    122 class Normalizer2WithImpl : public Normalizer2 {
    123 public:
    124     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
    125     virtual ~Normalizer2WithImpl();
    126 
    127     // normalize
    128     virtual UnicodeString &
    129     normalize(const UnicodeString &src,
    130               UnicodeString &dest,
    131               UErrorCode &errorCode) const {
    132         if(U_FAILURE(errorCode)) {
    133             dest.setToBogus();
    134             return dest;
    135         }
    136         const UChar *sArray=src.getBuffer();
    137         if(&dest==&src || sArray==NULL) {
    138             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    139             dest.setToBogus();
    140             return dest;
    141         }
    142         dest.remove();
    143         ReorderingBuffer buffer(impl, dest);
    144         if(buffer.init(src.length(), errorCode)) {
    145             normalize(sArray, sArray+src.length(), buffer, errorCode);
    146         }
    147         return dest;
    148     }
    149     virtual void
    150     normalize(const UChar *src, const UChar *limit,
    151               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    152 
    153     // normalize and append
    154     virtual UnicodeString &
    155     normalizeSecondAndAppend(UnicodeString &first,
    156                              const UnicodeString &second,
    157                              UErrorCode &errorCode) const {
    158         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
    159     }
    160     virtual UnicodeString &
    161     append(UnicodeString &first,
    162            const UnicodeString &second,
    163            UErrorCode &errorCode) const {
    164         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
    165     }
    166     UnicodeString &
    167     normalizeSecondAndAppend(UnicodeString &first,
    168                              const UnicodeString &second,
    169                              UBool doNormalize,
    170                              UErrorCode &errorCode) const {
    171         uprv_checkCanGetBuffer(first, errorCode);
    172         if(U_FAILURE(errorCode)) {
    173             return first;
    174         }
    175         const UChar *secondArray=second.getBuffer();
    176         if(&first==&second || secondArray==NULL) {
    177             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    178             return first;
    179         }
    180         int32_t firstLength=first.length();
    181         UnicodeString safeMiddle;
    182         {
    183             ReorderingBuffer buffer(impl, first);
    184             if(buffer.init(firstLength+second.length(), errorCode)) {
    185                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
    186                                    safeMiddle, buffer, errorCode);
    187             }
    188         }  // The ReorderingBuffer destructor finalizes the first string.
    189         if(U_FAILURE(errorCode)) {
    190             // Restore the modified suffix of the first string.
    191             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
    192         }
    193         return first;
    194     }
    195     virtual void
    196     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    197                        UnicodeString &safeMiddle,
    198                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    199     virtual UBool
    200     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
    201         UChar buffer[4];
    202         int32_t length;
    203         const UChar *d=impl.getDecomposition(c, buffer, length);
    204         if(d==NULL) {
    205             return FALSE;
    206         }
    207         if(d==buffer) {
    208             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
    209         } else {
    210             decomposition.setTo(FALSE, d, length);  // read-only alias
    211         }
    212         return TRUE;
    213     }
    214     virtual UBool
    215     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
    216         UChar buffer[30];
    217         int32_t length;
    218         const UChar *d=impl.getRawDecomposition(c, buffer, length);
    219         if(d==NULL) {
    220             return FALSE;
    221         }
    222         if(d==buffer) {
    223             decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
    224         } else {
    225             decomposition.setTo(FALSE, d, length);  // read-only alias
    226         }
    227         return TRUE;
    228     }
    229     virtual UChar32
    230     composePair(UChar32 a, UChar32 b) const {
    231         return impl.composePair(a, b);
    232     }
    233 
    234     virtual uint8_t
    235     getCombiningClass(UChar32 c) const {
    236         return impl.getCC(impl.getNorm16(c));
    237     }
    238 
    239     // quick checks
    240     virtual UBool
    241     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    242         if(U_FAILURE(errorCode)) {
    243             return FALSE;
    244         }
    245         const UChar *sArray=s.getBuffer();
    246         if(sArray==NULL) {
    247             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    248             return FALSE;
    249         }
    250         const UChar *sLimit=sArray+s.length();
    251         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
    252     }
    253     virtual UNormalizationCheckResult
    254     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    255         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
    256     }
    257     virtual int32_t
    258     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
    259         if(U_FAILURE(errorCode)) {
    260             return 0;
    261         }
    262         const UChar *sArray=s.getBuffer();
    263         if(sArray==NULL) {
    264             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    265             return 0;
    266         }
    267         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
    268     }
    269     virtual const UChar *
    270     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
    271 
    272     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
    273         return UNORM_YES;
    274     }
    275 
    276     const Normalizer2Impl &impl;
    277 };
    278 
    279 Normalizer2WithImpl::~Normalizer2WithImpl() {}
    280 
    281 class DecomposeNormalizer2 : public Normalizer2WithImpl {
    282 public:
    283     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    284     virtual ~DecomposeNormalizer2();
    285 
    286 private:
    287     virtual void
    288     normalize(const UChar *src, const UChar *limit,
    289               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    290         impl.decompose(src, limit, &buffer, errorCode);
    291     }
    292     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    293     virtual void
    294     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    295                        UnicodeString &safeMiddle,
    296                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    297         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    298     }
    299     virtual const UChar *
    300     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    301         return impl.decompose(src, limit, NULL, errorCode);
    302     }
    303     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    304     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    305         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
    306     }
    307     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
    308     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
    309     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
    310 };
    311 
    312 DecomposeNormalizer2::~DecomposeNormalizer2() {}
    313 
    314 class ComposeNormalizer2 : public Normalizer2WithImpl {
    315 public:
    316     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
    317         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
    318     virtual ~ComposeNormalizer2();
    319 
    320 private:
    321     virtual void
    322     normalize(const UChar *src, const UChar *limit,
    323               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    324         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
    325     }
    326     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    327     virtual void
    328     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    329                        UnicodeString &safeMiddle,
    330                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    331         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
    332     }
    333 
    334     virtual UBool
    335     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    336         if(U_FAILURE(errorCode)) {
    337             return FALSE;
    338         }
    339         const UChar *sArray=s.getBuffer();
    340         if(sArray==NULL) {
    341             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    342             return FALSE;
    343         }
    344         UnicodeString temp;
    345         ReorderingBuffer buffer(impl, temp);
    346         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
    347             return FALSE;
    348         }
    349         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
    350     }
    351     virtual UNormalizationCheckResult
    352     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    353         if(U_FAILURE(errorCode)) {
    354             return UNORM_MAYBE;
    355         }
    356         const UChar *sArray=s.getBuffer();
    357         if(sArray==NULL) {
    358             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    359             return UNORM_MAYBE;
    360         }
    361         UNormalizationCheckResult qcResult=UNORM_YES;
    362         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
    363         return qcResult;
    364     }
    365     virtual const UChar *
    366     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
    367         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
    368     }
    369     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    370     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    371         return impl.getCompQuickCheck(impl.getNorm16(c));
    372     }
    373     virtual UBool hasBoundaryBefore(UChar32 c) const {
    374         return impl.hasCompBoundaryBefore(c);
    375     }
    376     virtual UBool hasBoundaryAfter(UChar32 c) const {
    377         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
    378     }
    379     virtual UBool isInert(UChar32 c) const {
    380         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
    381     }
    382 
    383     const UBool onlyContiguous;
    384 };
    385 
    386 ComposeNormalizer2::~ComposeNormalizer2() {}
    387 
    388 class FCDNormalizer2 : public Normalizer2WithImpl {
    389 public:
    390     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    391     virtual ~FCDNormalizer2();
    392 
    393 private:
    394     virtual void
    395     normalize(const UChar *src, const UChar *limit,
    396               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    397         impl.makeFCD(src, limit, &buffer, errorCode);
    398     }
    399     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    400     virtual void
    401     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    402                        UnicodeString &safeMiddle,
    403                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    404         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    405     }
    406     virtual const UChar *
    407     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    408         return impl.makeFCD(src, limit, NULL, errorCode);
    409     }
    410     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    411     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
    412     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
    413     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
    414 };
    415 
    416 FCDNormalizer2::~FCDNormalizer2() {}
    417 
    418 // instance cache ---------------------------------------------------------- ***
    419 
    420 struct Norm2AllModes : public UMemory {
    421     static Norm2AllModes *createInstance(const char *packageName,
    422                                          const char *name,
    423                                          UErrorCode &errorCode);
    424     Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
    425 
    426     Normalizer2Impl impl;
    427     ComposeNormalizer2 comp;
    428     DecomposeNormalizer2 decomp;
    429     FCDNormalizer2 fcd;
    430     ComposeNormalizer2 fcc;
    431 };
    432 
    433 Norm2AllModes *
    434 Norm2AllModes::createInstance(const char *packageName,
    435                               const char *name,
    436                               UErrorCode &errorCode) {
    437     if(U_FAILURE(errorCode)) {
    438         return NULL;
    439     }
    440     LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
    441     if(allModes.isNull()) {
    442         errorCode=U_MEMORY_ALLOCATION_ERROR;
    443         return NULL;
    444     }
    445     allModes->impl.load(packageName, name, errorCode);
    446     return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
    447 }
    448 
    449 U_CDECL_BEGIN
    450 static UBool U_CALLCONV uprv_normalizer2_cleanup();
    451 U_CDECL_END
    452 
    453 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
    454 public:
    455     Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
    456         TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
    457     Norm2AllModes *getInstance(UErrorCode &errorCode) {
    458         return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
    459     }
    460 private:
    461     static void *createInstance(const void *context, UErrorCode &errorCode) {
    462         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
    463         return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
    464     }
    465 
    466     const char *name;
    467 };
    468 
    469 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
    470 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
    471 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
    472 
    473 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
    474 public:
    475     Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
    476     Normalizer2 *getInstance(UErrorCode &errorCode) {
    477         return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
    478     }
    479 private:
    480     static void *createInstance(const void *, UErrorCode &errorCode) {
    481         Normalizer2 *noop=new NoopNormalizer2;
    482         if(noop==NULL) {
    483             errorCode=U_MEMORY_ALLOCATION_ERROR;
    484         }
    485         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
    486         return noop;
    487     }
    488 };
    489 
    490 STATIC_SIMPLE_SINGLETON(noopSingleton);
    491 
    492 static UHashtable *cache=NULL;
    493 
    494 U_CDECL_BEGIN
    495 
    496 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
    497     delete (Norm2AllModes *)allModes;
    498 }
    499 
    500 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
    501     Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
    502     Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
    503     Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
    504     Norm2Singleton(noopSingleton).deleteInstance();
    505     uhash_close(cache);
    506     cache=NULL;
    507     return TRUE;
    508 }
    509 
    510 U_CDECL_END
    511 
    512 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
    513     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    514     return allModes!=NULL ? &allModes->comp : NULL;
    515 }
    516 
    517 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
    518     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    519     return allModes!=NULL ? &allModes->decomp : NULL;
    520 }
    521 
    522 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
    523     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    524     return allModes!=NULL ? &allModes->fcd : NULL;
    525 }
    526 
    527 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
    528     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    529     return allModes!=NULL ? &allModes->fcc : NULL;
    530 }
    531 
    532 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
    533     Norm2AllModes *allModes=
    534         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    535     return allModes!=NULL ? &allModes->comp : NULL;
    536 }
    537 
    538 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
    539     Norm2AllModes *allModes=
    540         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    541     return allModes!=NULL ? &allModes->decomp : NULL;
    542 }
    543 
    544 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
    545     Norm2AllModes *allModes=
    546         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    547     return allModes!=NULL ? &allModes->comp : NULL;
    548 }
    549 
    550 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
    551     return Norm2Singleton(noopSingleton).getInstance(errorCode);
    552 }
    553 
    554 const Normalizer2 *
    555 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    556     if(U_FAILURE(errorCode)) {
    557         return NULL;
    558     }
    559     switch(mode) {
    560     case UNORM_NFD:
    561         return getNFDInstance(errorCode);
    562     case UNORM_NFKD:
    563         return getNFKDInstance(errorCode);
    564     case UNORM_NFC:
    565         return getNFCInstance(errorCode);
    566     case UNORM_NFKC:
    567         return getNFKCInstance(errorCode);
    568     case UNORM_FCD:
    569         return getFCDInstance(errorCode);
    570     default:  // UNORM_NONE
    571         return getNoopInstance(errorCode);
    572     }
    573 }
    574 
    575 const Normalizer2Impl *
    576 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
    577     Norm2AllModes *allModes=
    578         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    579     return allModes!=NULL ? &allModes->impl : NULL;
    580 }
    581 
    582 const Normalizer2Impl *
    583 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    584     Norm2AllModes *allModes=
    585         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    586     return allModes!=NULL ? &allModes->impl : NULL;
    587 }
    588 
    589 const Normalizer2Impl *
    590 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    591     Norm2AllModes *allModes=
    592         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    593     return allModes!=NULL ? &allModes->impl : NULL;
    594 }
    595 
    596 const Normalizer2Impl *
    597 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
    598     return &((Normalizer2WithImpl *)norm2)->impl;
    599 }
    600 
    601 const Normalizer2 *
    602 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
    603     return Normalizer2Factory::getNFCInstance(errorCode);
    604 }
    605 
    606 const Normalizer2 *
    607 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
    608     return Normalizer2Factory::getNFDInstance(errorCode);
    609 }
    610 
    611 const Normalizer2 *
    612 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
    613     return Normalizer2Factory::getNFKCInstance(errorCode);
    614 }
    615 
    616 const Normalizer2 *
    617 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
    618     return Normalizer2Factory::getNFKDInstance(errorCode);
    619 }
    620 
    621 const Normalizer2 *
    622 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
    623     return Normalizer2Factory::getNFKC_CFInstance(errorCode);
    624 }
    625 
    626 const Normalizer2 *
    627 Normalizer2::getInstance(const char *packageName,
    628                          const char *name,
    629                          UNormalization2Mode mode,
    630                          UErrorCode &errorCode) {
    631     if(U_FAILURE(errorCode)) {
    632         return NULL;
    633     }
    634     if(name==NULL || *name==0) {
    635         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    636         return NULL;
    637     }
    638     Norm2AllModes *allModes=NULL;
    639     if(packageName==NULL) {
    640         if(0==uprv_strcmp(name, "nfc")) {
    641             allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
    642         } else if(0==uprv_strcmp(name, "nfkc")) {
    643             allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
    644         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
    645             allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
    646         }
    647     }
    648     if(allModes==NULL && U_SUCCESS(errorCode)) {
    649         {
    650             Mutex lock;
    651             if(cache!=NULL) {
    652                 allModes=(Norm2AllModes *)uhash_get(cache, name);
    653             }
    654         }
    655         if(allModes==NULL) {
    656             LocalPointer<Norm2AllModes> localAllModes(
    657                 Norm2AllModes::createInstance(packageName, name, errorCode));
    658             if(U_SUCCESS(errorCode)) {
    659                 Mutex lock;
    660                 if(cache==NULL) {
    661                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
    662                     if(U_FAILURE(errorCode)) {
    663                         return NULL;
    664                     }
    665                     uhash_setKeyDeleter(cache, uprv_free);
    666                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
    667                 }
    668                 void *temp=uhash_get(cache, name);
    669                 if(temp==NULL) {
    670                     int32_t keyLength=uprv_strlen(name)+1;
    671                     char *nameCopy=(char *)uprv_malloc(keyLength);
    672                     if(nameCopy==NULL) {
    673                         errorCode=U_MEMORY_ALLOCATION_ERROR;
    674                         return NULL;
    675                     }
    676                     uprv_memcpy(nameCopy, name, keyLength);
    677                     uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
    678                 } else {
    679                     // race condition
    680                     allModes=(Norm2AllModes *)temp;
    681                 }
    682             }
    683         }
    684     }
    685     if(allModes!=NULL && U_SUCCESS(errorCode)) {
    686         switch(mode) {
    687         case UNORM2_COMPOSE:
    688             return &allModes->comp;
    689         case UNORM2_DECOMPOSE:
    690             return &allModes->decomp;
    691         case UNORM2_FCD:
    692             return &allModes->fcd;
    693         case UNORM2_COMPOSE_CONTIGUOUS:
    694             return &allModes->fcc;
    695         default:
    696             break;  // do nothing
    697         }
    698     }
    699     return NULL;
    700 }
    701 
    702 U_NAMESPACE_END
    703 
    704 // C API ------------------------------------------------------------------- ***
    705 
    706 U_NAMESPACE_USE
    707 
    708 U_CAPI const UNormalizer2 * U_EXPORT2
    709 unorm2_getNFCInstance(UErrorCode *pErrorCode) {
    710     return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
    711 }
    712 
    713 U_CAPI const UNormalizer2 * U_EXPORT2
    714 unorm2_getNFDInstance(UErrorCode *pErrorCode) {
    715     return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
    716 }
    717 
    718 U_CAPI const UNormalizer2 * U_EXPORT2
    719 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
    720     return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
    721 }
    722 
    723 U_CAPI const UNormalizer2 * U_EXPORT2
    724 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
    725     return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
    726 }
    727 
    728 U_CAPI const UNormalizer2 * U_EXPORT2
    729 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
    730     return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
    731 }
    732 
    733 U_CAPI const UNormalizer2 * U_EXPORT2
    734 unorm2_getInstance(const char *packageName,
    735                    const char *name,
    736                    UNormalization2Mode mode,
    737                    UErrorCode *pErrorCode) {
    738     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
    739 }
    740 
    741 U_CAPI void U_EXPORT2
    742 unorm2_close(UNormalizer2 *norm2) {
    743     delete (Normalizer2 *)norm2;
    744 }
    745 
    746 U_CAPI int32_t U_EXPORT2
    747 unorm2_normalize(const UNormalizer2 *norm2,
    748                  const UChar *src, int32_t length,
    749                  UChar *dest, int32_t capacity,
    750                  UErrorCode *pErrorCode) {
    751     if(U_FAILURE(*pErrorCode)) {
    752         return 0;
    753     }
    754     if( (src==NULL ? length!=0 : length<-1) ||
    755         (dest==NULL ? capacity!=0 : capacity<0) ||
    756         (src==dest && src!=NULL)
    757     ) {
    758         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    759         return 0;
    760     }
    761     UnicodeString destString(dest, 0, capacity);
    762     // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
    763     if(length!=0) {
    764         const Normalizer2 *n2=(const Normalizer2 *)norm2;
    765         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
    766         if(n2wi!=NULL) {
    767             // Avoid duplicate argument checking and support NUL-terminated src.
    768             ReorderingBuffer buffer(n2wi->impl, destString);
    769             if(buffer.init(length, *pErrorCode)) {
    770                 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
    771             }
    772         } else {
    773             UnicodeString srcString(length<0, src, length);
    774             n2->normalize(srcString, destString, *pErrorCode);
    775         }
    776     }
    777     return destString.extract(dest, capacity, *pErrorCode);
    778 }
    779 
    780 static int32_t
    781 normalizeSecondAndAppend(const UNormalizer2 *norm2,
    782                          UChar *first, int32_t firstLength, int32_t firstCapacity,
    783                          const UChar *second, int32_t secondLength,
    784                          UBool doNormalize,
    785                          UErrorCode *pErrorCode) {
    786     if(U_FAILURE(*pErrorCode)) {
    787         return 0;
    788     }
    789     if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
    790         (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
    791                        (firstCapacity<0 || firstLength<-1)) ||
    792         (first==second && first!=NULL)
    793     ) {
    794         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    795         return 0;
    796     }
    797     UnicodeString firstString(first, firstLength, firstCapacity);
    798     firstLength=firstString.length();  // In case it was -1.
    799     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
    800     if(secondLength!=0) {
    801         const Normalizer2 *n2=(const Normalizer2 *)norm2;
    802         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
    803         if(n2wi!=NULL) {
    804             // Avoid duplicate argument checking and support NUL-terminated src.
    805             UnicodeString safeMiddle;
    806             {
    807                 ReorderingBuffer buffer(n2wi->impl, firstString);
    808                 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
    809                     n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
    810                                              doNormalize, safeMiddle, buffer, *pErrorCode);
    811                 }
    812             }  // The ReorderingBuffer destructor finalizes firstString.
    813             if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
    814                 // Restore the modified suffix of the first string.
    815                 // This does not restore first[] array contents between firstLength and firstCapacity.
    816                 // (That might be uninitialized memory, as far as we know.)
    817                 if(first!=NULL) { /* don't dereference NULL */
    818                   safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
    819                   if(firstLength<firstCapacity) {
    820                     first[firstLength]=0;  // NUL-terminate in case it was originally.
    821                   }
    822                 }
    823             }
    824         } else {
    825             UnicodeString secondString(secondLength<0, second, secondLength);
    826             if(doNormalize) {
    827                 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
    828             } else {
    829                 n2->append(firstString, secondString, *pErrorCode);
    830             }
    831         }
    832     }
    833     return firstString.extract(first, firstCapacity, *pErrorCode);
    834 }
    835 
    836 U_CAPI int32_t U_EXPORT2
    837 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
    838                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
    839                                 const UChar *second, int32_t secondLength,
    840                                 UErrorCode *pErrorCode) {
    841     return normalizeSecondAndAppend(norm2,
    842                                     first, firstLength, firstCapacity,
    843                                     second, secondLength,
    844                                     TRUE, pErrorCode);
    845 }
    846 
    847 U_CAPI int32_t U_EXPORT2
    848 unorm2_append(const UNormalizer2 *norm2,
    849               UChar *first, int32_t firstLength, int32_t firstCapacity,
    850               const UChar *second, int32_t secondLength,
    851               UErrorCode *pErrorCode) {
    852     return normalizeSecondAndAppend(norm2,
    853                                     first, firstLength, firstCapacity,
    854                                     second, secondLength,
    855                                     FALSE, pErrorCode);
    856 }
    857 
    858 U_CAPI int32_t U_EXPORT2
    859 unorm2_getDecomposition(const UNormalizer2 *norm2,
    860                         UChar32 c, UChar *decomposition, int32_t capacity,
    861                         UErrorCode *pErrorCode) {
    862     if(U_FAILURE(*pErrorCode)) {
    863         return 0;
    864     }
    865     if(decomposition==NULL ? capacity!=0 : capacity<0) {
    866         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    867         return 0;
    868     }
    869     UnicodeString destString(decomposition, 0, capacity);
    870     if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
    871         return destString.extract(decomposition, capacity, *pErrorCode);
    872     } else {
    873         return -1;
    874     }
    875 }
    876 
    877 U_CAPI int32_t U_EXPORT2
    878 unorm2_getRawDecomposition(const UNormalizer2 *norm2,
    879                            UChar32 c, UChar *decomposition, int32_t capacity,
    880                            UErrorCode *pErrorCode) {
    881     if(U_FAILURE(*pErrorCode)) {
    882         return 0;
    883     }
    884     if(decomposition==NULL ? capacity!=0 : capacity<0) {
    885         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    886         return 0;
    887     }
    888     UnicodeString destString(decomposition, 0, capacity);
    889     if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
    890         return destString.extract(decomposition, capacity, *pErrorCode);
    891     } else {
    892         return -1;
    893     }
    894 }
    895 
    896 U_CAPI UChar32 U_EXPORT2
    897 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
    898     return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
    899 }
    900 
    901 U_CAPI uint8_t U_EXPORT2
    902 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
    903     return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
    904 }
    905 
    906 U_CAPI UBool U_EXPORT2
    907 unorm2_isNormalized(const UNormalizer2 *norm2,
    908                     const UChar *s, int32_t length,
    909                     UErrorCode *pErrorCode) {
    910     if(U_FAILURE(*pErrorCode)) {
    911         return 0;
    912     }
    913     if((s==NULL && length!=0) || length<-1) {
    914         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    915         return 0;
    916     }
    917     UnicodeString sString(length<0, s, length);
    918     return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
    919 }
    920 
    921 U_CAPI UNormalizationCheckResult U_EXPORT2
    922 unorm2_quickCheck(const UNormalizer2 *norm2,
    923                   const UChar *s, int32_t length,
    924                   UErrorCode *pErrorCode) {
    925     if(U_FAILURE(*pErrorCode)) {
    926         return UNORM_NO;
    927     }
    928     if((s==NULL && length!=0) || length<-1) {
    929         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    930         return UNORM_NO;
    931     }
    932     UnicodeString sString(length<0, s, length);
    933     return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
    934 }
    935 
    936 U_CAPI int32_t U_EXPORT2
    937 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
    938                          const UChar *s, int32_t length,
    939                          UErrorCode *pErrorCode) {
    940     if(U_FAILURE(*pErrorCode)) {
    941         return 0;
    942     }
    943     if((s==NULL && length!=0) || length<-1) {
    944         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    945         return 0;
    946     }
    947     UnicodeString sString(length<0, s, length);
    948     return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
    949 }
    950 
    951 U_CAPI UBool U_EXPORT2
    952 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
    953     return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
    954 }
    955 
    956 U_CAPI UBool U_EXPORT2
    957 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
    958     return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
    959 }
    960 
    961 U_CAPI UBool U_EXPORT2
    962 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
    963     return ((const Normalizer2 *)norm2)->isInert(c);
    964 }
    965 
    966 // Some properties APIs ---------------------------------------------------- ***
    967 
    968 U_CAPI uint8_t U_EXPORT2
    969 u_getCombiningClass(UChar32 c) {
    970     UErrorCode errorCode=U_ZERO_ERROR;
    971     const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
    972     if(U_SUCCESS(errorCode)) {
    973         return nfd->getCombiningClass(c);
    974     } else {
    975         return 0;
    976     }
    977 }
    978 
    979 U_CFUNC UNormalizationCheckResult
    980 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
    981     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
    982         return UNORM_YES;
    983     }
    984     UErrorCode errorCode=U_ZERO_ERROR;
    985     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
    986     if(U_SUCCESS(errorCode)) {
    987         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
    988     } else {
    989         return UNORM_MAYBE;
    990     }
    991 }
    992 
    993 U_CFUNC uint16_t
    994 unorm_getFCD16(UChar32 c) {
    995     UErrorCode errorCode=U_ZERO_ERROR;
    996     const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
    997     if(U_SUCCESS(errorCode)) {
    998         return impl->getFCD16(c);
    999     } else {
   1000         return 0;
   1001     }
   1002 }
   1003 
   1004 #endif  // !UCONFIG_NO_NORMALIZATION
   1005