Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * loadednormalizer2impl.h
      7 *
      8 * created on: 2014sep07
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #ifndef __NORM2ALLMODES_H__
     13 #define __NORM2ALLMODES_H__
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_NORMALIZATION
     18 
     19 #include "unicode/normalizer2.h"
     20 #include "unicode/unistr.h"
     21 #include "cpputils.h"
     22 #include "normalizer2impl.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 // Intermediate class:
     27 // Has Normalizer2Impl and does boilerplate argument checking and setup.
     28 class Normalizer2WithImpl : public Normalizer2 {
     29 public:
     30     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
     31     virtual ~Normalizer2WithImpl();
     32 
     33     // normalize
     34     virtual UnicodeString &
     35     normalize(const UnicodeString &src,
     36               UnicodeString &dest,
     37               UErrorCode &errorCode) const {
     38         if(U_FAILURE(errorCode)) {
     39             dest.setToBogus();
     40             return dest;
     41         }
     42         const UChar *sArray=src.getBuffer();
     43         if(&dest==&src || sArray==NULL) {
     44             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     45             dest.setToBogus();
     46             return dest;
     47         }
     48         dest.remove();
     49         ReorderingBuffer buffer(impl, dest);
     50         if(buffer.init(src.length(), errorCode)) {
     51             normalize(sArray, sArray+src.length(), buffer, errorCode);
     52         }
     53         return dest;
     54     }
     55     virtual void
     56     normalize(const UChar *src, const UChar *limit,
     57               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
     58 
     59     // normalize and append
     60     virtual UnicodeString &
     61     normalizeSecondAndAppend(UnicodeString &first,
     62                              const UnicodeString &second,
     63                              UErrorCode &errorCode) const {
     64         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
     65     }
     66     virtual UnicodeString &
     67     append(UnicodeString &first,
     68            const UnicodeString &second,
     69            UErrorCode &errorCode) const {
     70         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
     71     }
     72     UnicodeString &
     73     normalizeSecondAndAppend(UnicodeString &first,
     74                              const UnicodeString &second,
     75                              UBool doNormalize,
     76                              UErrorCode &errorCode) const {
     77         uprv_checkCanGetBuffer(first, errorCode);
     78         if(U_FAILURE(errorCode)) {
     79             return first;
     80         }
     81         const UChar *secondArray=second.getBuffer();
     82         if(&first==&second || secondArray==NULL) {
     83             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     84             return first;
     85         }
     86         int32_t firstLength=first.length();
     87         UnicodeString safeMiddle;
     88         {
     89             ReorderingBuffer buffer(impl, first);
     90             if(buffer.init(firstLength+second.length(), errorCode)) {
     91                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
     92                                    safeMiddle, buffer, errorCode);
     93             }
     94         }  // The ReorderingBuffer destructor finalizes the first string.
     95         if(U_FAILURE(errorCode)) {
     96             // Restore the modified suffix of the first string.
     97             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
     98         }
     99         return first;
    100     }
    101     virtual void
    102     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    103                        UnicodeString &safeMiddle,
    104                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    105     virtual UBool
    106     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
    107         UChar buffer[4];
    108         int32_t length;
    109         const UChar *d=impl.getDecomposition(c, buffer, length);
    110         if(d==NULL) {
    111             return FALSE;
    112         }
    113         if(d==buffer) {
    114             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
    115         } else {
    116             decomposition.setTo(FALSE, d, length);  // read-only alias
    117         }
    118         return TRUE;
    119     }
    120     virtual UBool
    121     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
    122         UChar buffer[30];
    123         int32_t length;
    124         const UChar *d=impl.getRawDecomposition(c, buffer, length);
    125         if(d==NULL) {
    126             return FALSE;
    127         }
    128         if(d==buffer) {
    129             decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
    130         } else {
    131             decomposition.setTo(FALSE, d, length);  // read-only alias
    132         }
    133         return TRUE;
    134     }
    135     virtual UChar32
    136     composePair(UChar32 a, UChar32 b) const {
    137         return impl.composePair(a, b);
    138     }
    139 
    140     virtual uint8_t
    141     getCombiningClass(UChar32 c) const {
    142         return impl.getCC(impl.getNorm16(c));
    143     }
    144 
    145     // quick checks
    146     virtual UBool
    147     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    148         if(U_FAILURE(errorCode)) {
    149             return FALSE;
    150         }
    151         const UChar *sArray=s.getBuffer();
    152         if(sArray==NULL) {
    153             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    154             return FALSE;
    155         }
    156         const UChar *sLimit=sArray+s.length();
    157         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
    158     }
    159     virtual UNormalizationCheckResult
    160     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    161         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
    162     }
    163     virtual int32_t
    164     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
    165         if(U_FAILURE(errorCode)) {
    166             return 0;
    167         }
    168         const UChar *sArray=s.getBuffer();
    169         if(sArray==NULL) {
    170             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    171             return 0;
    172         }
    173         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
    174     }
    175     virtual const UChar *
    176     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
    177 
    178     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
    179         return UNORM_YES;
    180     }
    181 
    182     const Normalizer2Impl &impl;
    183 };
    184 
    185 class DecomposeNormalizer2 : public Normalizer2WithImpl {
    186 public:
    187     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    188     virtual ~DecomposeNormalizer2();
    189 
    190 private:
    191     virtual void
    192     normalize(const UChar *src, const UChar *limit,
    193               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    194         impl.decompose(src, limit, &buffer, errorCode);
    195     }
    196     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    197     virtual void
    198     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    199                        UnicodeString &safeMiddle,
    200                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    201         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    202     }
    203     virtual const UChar *
    204     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    205         return impl.decompose(src, limit, NULL, errorCode);
    206     }
    207     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    208     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    209         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
    210     }
    211     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
    212     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
    213     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
    214 };
    215 
    216 class ComposeNormalizer2 : public Normalizer2WithImpl {
    217 public:
    218     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
    219         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
    220     virtual ~ComposeNormalizer2();
    221 
    222 private:
    223     virtual void
    224     normalize(const UChar *src, const UChar *limit,
    225               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    226         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
    227     }
    228     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    229     virtual void
    230     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    231                        UnicodeString &safeMiddle,
    232                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    233         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
    234     }
    235 
    236     virtual UBool
    237     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    238         if(U_FAILURE(errorCode)) {
    239             return FALSE;
    240         }
    241         const UChar *sArray=s.getBuffer();
    242         if(sArray==NULL) {
    243             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    244             return FALSE;
    245         }
    246         UnicodeString temp;
    247         ReorderingBuffer buffer(impl, temp);
    248         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
    249             return FALSE;
    250         }
    251         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
    252     }
    253     virtual UNormalizationCheckResult
    254     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    255         if(U_FAILURE(errorCode)) {
    256             return UNORM_MAYBE;
    257         }
    258         const UChar *sArray=s.getBuffer();
    259         if(sArray==NULL) {
    260             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    261             return UNORM_MAYBE;
    262         }
    263         UNormalizationCheckResult qcResult=UNORM_YES;
    264         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
    265         return qcResult;
    266     }
    267     virtual const UChar *
    268     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
    269         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
    270     }
    271     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    272     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
    273         return impl.getCompQuickCheck(impl.getNorm16(c));
    274     }
    275     virtual UBool hasBoundaryBefore(UChar32 c) const {
    276         return impl.hasCompBoundaryBefore(c);
    277     }
    278     virtual UBool hasBoundaryAfter(UChar32 c) const {
    279         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
    280     }
    281     virtual UBool isInert(UChar32 c) const {
    282         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
    283     }
    284 
    285     const UBool onlyContiguous;
    286 };
    287 
    288 class FCDNormalizer2 : public Normalizer2WithImpl {
    289 public:
    290     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    291     virtual ~FCDNormalizer2();
    292 
    293 private:
    294     virtual void
    295     normalize(const UChar *src, const UChar *limit,
    296               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    297         impl.makeFCD(src, limit, &buffer, errorCode);
    298     }
    299     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    300     virtual void
    301     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
    302                        UnicodeString &safeMiddle,
    303                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
    304         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    305     }
    306     virtual const UChar *
    307     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
    308         return impl.makeFCD(src, limit, NULL, errorCode);
    309     }
    310     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    311     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
    312     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
    313     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
    314 };
    315 
    316 struct Norm2AllModes : public UMemory {
    317     Norm2AllModes(Normalizer2Impl *i)
    318             : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
    319     ~Norm2AllModes();
    320 
    321     static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
    322     static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
    323     static Norm2AllModes *createInstance(const char *packageName,
    324                                          const char *name,
    325                                          UErrorCode &errorCode);
    326 
    327     static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
    328     static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
    329     static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
    330 
    331     Normalizer2Impl *impl;
    332     ComposeNormalizer2 comp;
    333     DecomposeNormalizer2 decomp;
    334     FCDNormalizer2 fcd;
    335     ComposeNormalizer2 fcc;
    336 };
    337 
    338 U_NAMESPACE_END
    339 
    340 #endif  // !UCONFIG_NO_NORMALIZATION
    341 #endif  // __NORM2ALLMODES_H__
    342