Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2005-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  ucasemap.cpp
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2005may06
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Case mapping service object and functions using it.
     19 */
     20 
     21 #include "unicode/utypes.h"
     22 #include "unicode/brkiter.h"
     23 #include "unicode/bytestream.h"
     24 #include "unicode/casemap.h"
     25 #include "unicode/edits.h"
     26 #include "unicode/stringoptions.h"
     27 #include "unicode/stringpiece.h"
     28 #include "unicode/ubrk.h"
     29 #include "unicode/uloc.h"
     30 #include "unicode/ustring.h"
     31 #include "unicode/ucasemap.h"
     32 #if !UCONFIG_NO_BREAK_ITERATION
     33 #include "unicode/utext.h"
     34 #endif
     35 #include "unicode/utf.h"
     36 #include "unicode/utf8.h"
     37 #include "unicode/utf16.h"
     38 #include "bytesinkutil.h"
     39 #include "cmemory.h"
     40 #include "cstring.h"
     41 #include "uassert.h"
     42 #include "ucase.h"
     43 #include "ucasemap_imp.h"
     44 #include "ustr_imp.h"
     45 
     46 U_NAMESPACE_USE
     47 
     48 /* UCaseMap service object -------------------------------------------------- */
     49 
     50 UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) :
     51 #if !UCONFIG_NO_BREAK_ITERATION
     52         iter(NULL),
     53 #endif
     54         caseLocale(UCASE_LOC_UNKNOWN), options(opts) {
     55     ucasemap_setLocale(this, localeID, pErrorCode);
     56 }
     57 
     58 UCaseMap::~UCaseMap() {
     59 #if !UCONFIG_NO_BREAK_ITERATION
     60     delete iter;
     61 #endif
     62 }
     63 
     64 U_CAPI UCaseMap * U_EXPORT2
     65 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
     66     if(U_FAILURE(*pErrorCode)) {
     67         return NULL;
     68     }
     69     UCaseMap *csm = new UCaseMap(locale, options, pErrorCode);
     70     if(csm==NULL) {
     71         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
     72         return NULL;
     73     } else if (U_FAILURE(*pErrorCode)) {
     74         delete csm;
     75         return NULL;
     76     }
     77     return csm;
     78 }
     79 
     80 U_CAPI void U_EXPORT2
     81 ucasemap_close(UCaseMap *csm) {
     82     delete csm;
     83 }
     84 
     85 U_CAPI const char * U_EXPORT2
     86 ucasemap_getLocale(const UCaseMap *csm) {
     87     return csm->locale;
     88 }
     89 
     90 U_CAPI uint32_t U_EXPORT2
     91 ucasemap_getOptions(const UCaseMap *csm) {
     92     return csm->options;
     93 }
     94 
     95 U_CAPI void U_EXPORT2
     96 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
     97     if(U_FAILURE(*pErrorCode)) {
     98         return;
     99     }
    100     if (locale != NULL && *locale == 0) {
    101         csm->locale[0] = 0;
    102         csm->caseLocale = UCASE_LOC_ROOT;
    103         return;
    104     }
    105 
    106     int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
    107     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
    108         *pErrorCode=U_ZERO_ERROR;
    109         /* we only really need the language code for case mappings */
    110         length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
    111     }
    112     if(length==sizeof(csm->locale)) {
    113         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    114     }
    115     if(U_SUCCESS(*pErrorCode)) {
    116         csm->caseLocale=UCASE_LOC_UNKNOWN;
    117         csm->caseLocale = ucase_getCaseLocale(csm->locale);
    118     } else {
    119         csm->locale[0]=0;
    120         csm->caseLocale = UCASE_LOC_ROOT;
    121     }
    122 }
    123 
    124 U_CAPI void U_EXPORT2
    125 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
    126     if(U_FAILURE(*pErrorCode)) {
    127         return;
    128     }
    129     csm->options=options;
    130 }
    131 
    132 /* UTF-8 string case mappings ----------------------------------------------- */
    133 
    134 /* TODO(markus): Move to a new, separate utf8case.cpp file. */
    135 
    136 namespace {
    137 
    138 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
    139 inline UBool
    140 appendResult(int32_t cpLength, int32_t result, const UChar *s,
    141              ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
    142     U_ASSERT(U_SUCCESS(errorCode));
    143 
    144     /* decode the result */
    145     if(result<0) {
    146         /* (not) original code point */
    147         if(edits!=NULL) {
    148             edits->addUnchanged(cpLength);
    149         }
    150         if((options & U_OMIT_UNCHANGED_TEXT) == 0) {
    151             ByteSinkUtil::appendCodePoint(cpLength, ~result, sink);
    152         }
    153     } else {
    154         if(result<=UCASE_MAX_STRING_LENGTH) {
    155             // string: "result" is the UTF-16 length
    156             return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode);
    157         } else {
    158             ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits);
    159         }
    160     }
    161     return TRUE;
    162 }
    163 
    164 // See unicode/utf8.h U8_APPEND_UNSAFE().
    165 inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
    166 inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
    167 
    168 }  // namespace
    169 
    170 static UChar32 U_CALLCONV
    171 utf8_caseContextIterator(void *context, int8_t dir) {
    172     UCaseContext *csc=(UCaseContext *)context;
    173     UChar32 c;
    174 
    175     if(dir<0) {
    176         /* reset for backward iteration */
    177         csc->index=csc->cpStart;
    178         csc->dir=dir;
    179     } else if(dir>0) {
    180         /* reset for forward iteration */
    181         csc->index=csc->cpLimit;
    182         csc->dir=dir;
    183     } else {
    184         /* continue current iteration direction */
    185         dir=csc->dir;
    186     }
    187 
    188     if(dir<0) {
    189         if(csc->start<csc->index) {
    190             U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
    191             return c;
    192         }
    193     } else {
    194         if(csc->index<csc->limit) {
    195             U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
    196             return c;
    197         }
    198     }
    199     return U_SENTINEL;
    200 }
    201 
    202 /*
    203  * Case-maps [srcStart..srcLimit[ but takes
    204  * context [0..srcLength[ into account.
    205  */
    206 static void
    207 _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
    208          const uint8_t *src, UCaseContext *csc,
    209          int32_t srcStart, int32_t srcLimit,
    210          icu::ByteSink &sink, icu::Edits *edits,
    211          UErrorCode &errorCode) {
    212     /* case mapping loop */
    213     int32_t srcIndex=srcStart;
    214     while (U_SUCCESS(errorCode) && srcIndex<srcLimit) {
    215         int32_t cpStart;
    216         csc->cpStart=cpStart=srcIndex;
    217         UChar32 c;
    218         U8_NEXT(src, srcIndex, srcLimit, c);
    219         csc->cpLimit=srcIndex;
    220         if(c<0) {
    221             // Malformed UTF-8.
    222             ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
    223                                           sink, options, edits, errorCode);
    224         } else {
    225             const UChar *s;
    226             c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
    227             appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
    228         }
    229     }
    230 }
    231 
    232 #if !UCONFIG_NO_BREAK_ITERATION
    233 
    234 U_CFUNC void U_CALLCONV
    235 ucasemap_internalUTF8ToTitle(
    236         int32_t caseLocale, uint32_t options, BreakIterator *iter,
    237         const uint8_t *src, int32_t srcLength,
    238         ByteSink &sink, icu::Edits *edits,
    239         UErrorCode &errorCode) {
    240     if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
    241         return;
    242     }
    243 
    244     /* set up local variables */
    245     UCaseContext csc=UCASECONTEXT_INITIALIZER;
    246     csc.p=(void *)src;
    247     csc.limit=srcLength;
    248     int32_t prev=0;
    249     UBool isFirstIndex=TRUE;
    250 
    251     /* titlecasing loop */
    252     while(prev<srcLength) {
    253         /* find next index where to titlecase */
    254         int32_t index;
    255         if(isFirstIndex) {
    256             isFirstIndex=FALSE;
    257             index=iter->first();
    258         } else {
    259             index=iter->next();
    260         }
    261         if(index==UBRK_DONE || index>srcLength) {
    262             index=srcLength;
    263         }
    264 
    265         /*
    266          * Segment [prev..index[ into 3 parts:
    267          * a) skipped characters (copy as-is) [prev..titleStart[
    268          * b) first letter (titlecase)              [titleStart..titleLimit[
    269          * c) subsequent characters (lowercase)                 [titleLimit..index[
    270          */
    271         if(prev<index) {
    272             /* find and copy skipped characters [prev..titleStart[ */
    273             int32_t titleStart=prev;
    274             int32_t titleLimit=prev;
    275             UChar32 c;
    276             U8_NEXT(src, titleLimit, index, c);
    277             if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
    278                 // Adjust the titlecasing index to the next cased character,
    279                 // or to the next letter/number/symbol/private use.
    280                 // Stop with titleStart<titleLimit<=index
    281                 // if there is a character to be titlecased,
    282                 // or else stop with titleStart==titleLimit==index.
    283                 UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
    284                 while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
    285                     titleStart=titleLimit;
    286                     if(titleLimit==index) {
    287                         break;
    288                     }
    289                     U8_NEXT(src, titleLimit, index, c);
    290                 }
    291                 if (prev < titleStart) {
    292                     if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev,
    293                                                        sink, options, edits, errorCode)) {
    294                         return;
    295                     }
    296                 }
    297             }
    298 
    299             if(titleStart<titleLimit) {
    300                 /* titlecase c which is from [titleStart..titleLimit[ */
    301                 if(c>=0) {
    302                     csc.cpStart=titleStart;
    303                     csc.cpLimit=titleLimit;
    304                     const UChar *s;
    305                     c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
    306                     if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) {
    307                         return;
    308                     }
    309                 } else {
    310                     // Malformed UTF-8.
    311                     if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart,
    312                                                        sink, options, edits, errorCode)) {
    313                         return;
    314                     }
    315                 }
    316 
    317                 /* Special case Dutch IJ titlecasing */
    318                 if (titleStart+1 < index &&
    319                         caseLocale == UCASE_LOC_DUTCH &&
    320                         (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
    321                     if (src[titleStart+1] == 0x006A) {
    322                         ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
    323                         titleLimit++;
    324                     } else if (src[titleStart+1] == 0x004A) {
    325                         // Keep the capital J from getting lowercased.
    326                         if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
    327                                                            sink, options, edits, errorCode)) {
    328                             return;
    329                         }
    330                         titleLimit++;
    331                     }
    332                 }
    333 
    334                 /* lowercase [titleLimit..index[ */
    335                 if(titleLimit<index) {
    336                     if((options&U_TITLECASE_NO_LOWERCASE)==0) {
    337                         /* Normal operation: Lowercase the rest of the word. */
    338                         _caseMap(caseLocale, options, ucase_toFullLower,
    339                                  src, &csc,
    340                                  titleLimit, index,
    341                                  sink, edits, errorCode);
    342                         if(U_FAILURE(errorCode)) {
    343                             return;
    344                         }
    345                     } else {
    346                         /* Optionally just copy the rest of the word unchanged. */
    347                         if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit,
    348                                                            sink, options, edits, errorCode)) {
    349                             return;
    350                         }
    351                     }
    352                 }
    353             }
    354         }
    355 
    356         prev=index;
    357     }
    358 }
    359 
    360 #endif
    361 
    362 U_NAMESPACE_BEGIN
    363 namespace GreekUpper {
    364 
    365 UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
    366     while (i < length) {
    367         UChar32 c;
    368         U8_NEXT(s, i, length, c);
    369         int32_t type = ucase_getTypeOrIgnorable(c);
    370         if ((type & UCASE_IGNORABLE) != 0) {
    371             // Case-ignorable, continue with the loop.
    372         } else if (type != UCASE_NONE) {
    373             return TRUE;  // Followed by cased letter.
    374         } else {
    375             return FALSE;  // Uncased and not case-ignorable.
    376         }
    377     }
    378     return FALSE;  // Not followed by cased letter.
    379 }
    380 
    381 // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
    382 void toUpper(uint32_t options,
    383              const uint8_t *src, int32_t srcLength,
    384              ByteSink &sink, Edits *edits,
    385              UErrorCode &errorCode) {
    386     uint32_t state = 0;
    387     for (int32_t i = 0; i < srcLength;) {
    388         int32_t nextIndex = i;
    389         UChar32 c;
    390         U8_NEXT(src, nextIndex, srcLength, c);
    391         uint32_t nextState = 0;
    392         int32_t type = ucase_getTypeOrIgnorable(c);
    393         if ((type & UCASE_IGNORABLE) != 0) {
    394             // c is case-ignorable
    395             nextState |= (state & AFTER_CASED);
    396         } else if (type != UCASE_NONE) {
    397             // c is cased
    398             nextState |= AFTER_CASED;
    399         }
    400         uint32_t data = getLetterData(c);
    401         if (data > 0) {
    402             uint32_t upper = data & UPPER_MASK;
    403             // Add a dialytika to this iota or ypsilon vowel
    404             // if we removed a tonos from the previous vowel,
    405             // and that previous vowel did not also have (or gain) a dialytika.
    406             // Adding one only to the final vowel in a longer sequence
    407             // (which does not occur in normal writing) would require lookahead.
    408             // Set the same flag as for preserving an existing dialytika.
    409             if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
    410                     (upper == 0x399 || upper == 0x3A5)) {
    411                 data |= HAS_DIALYTIKA;
    412             }
    413             int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
    414             if ((data & HAS_YPOGEGRAMMENI) != 0) {
    415                 numYpogegrammeni = 1;
    416             }
    417             // Skip combining diacritics after this Greek letter.
    418             int32_t nextNextIndex = nextIndex;
    419             while (nextIndex < srcLength) {
    420                 UChar32 c2;
    421                 U8_NEXT(src, nextNextIndex, srcLength, c2);
    422                 uint32_t diacriticData = getDiacriticData(c2);
    423                 if (diacriticData != 0) {
    424                     data |= diacriticData;
    425                     if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
    426                         ++numYpogegrammeni;
    427                     }
    428                     nextIndex = nextNextIndex;
    429                 } else {
    430                     break;  // not a Greek diacritic
    431                 }
    432             }
    433             if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
    434                 nextState |= AFTER_VOWEL_WITH_ACCENT;
    435             }
    436             // Map according to Greek rules.
    437             UBool addTonos = FALSE;
    438             if (upper == 0x397 &&
    439                     (data & HAS_ACCENT) != 0 &&
    440                     numYpogegrammeni == 0 &&
    441                     (state & AFTER_CASED) == 0 &&
    442                     !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
    443                 // Keep disjunctive "or" with (only) a tonos.
    444                 // We use the same "word boundary" conditions as for the Final_Sigma test.
    445                 if (i == nextIndex) {
    446                     upper = 0x389;  // Preserve the precomposed form.
    447                 } else {
    448                     addTonos = TRUE;
    449                 }
    450             } else if ((data & HAS_DIALYTIKA) != 0) {
    451                 // Preserve a vowel with dialytika in precomposed form if it exists.
    452                 if (upper == 0x399) {
    453                     upper = 0x3AA;
    454                     data &= ~HAS_EITHER_DIALYTIKA;
    455                 } else if (upper == 0x3A5) {
    456                     upper = 0x3AB;
    457                     data &= ~HAS_EITHER_DIALYTIKA;
    458                 }
    459             }
    460 
    461             UBool change;
    462             if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
    463                 change = TRUE;  // common, simple usage
    464             } else {
    465                 // Find out first whether we are changing the text.
    466                 U_ASSERT(0x370 <= upper && upper <= 0x3ff);  // 2-byte UTF-8, main Greek block
    467                 change = (i + 2) > nextIndex ||
    468                         src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) ||
    469                         numYpogegrammeni > 0;
    470                 int32_t i2 = i + 2;
    471                 if ((data & HAS_EITHER_DIALYTIKA) != 0) {
    472                     change |= (i2 + 2) > nextIndex ||
    473                             src[i2] != (uint8_t)u8"\u0308"[0] ||
    474                             src[i2 + 1] != (uint8_t)u8"\u0308"[1];
    475                     i2 += 2;
    476                 }
    477                 if (addTonos) {
    478                     change |= (i2 + 2) > nextIndex ||
    479                             src[i2] != (uint8_t)u8"\u0301"[0] ||
    480                             src[i2 + 1] != (uint8_t)u8"\u0301"[1];
    481                     i2 += 2;
    482                 }
    483                 int32_t oldLength = nextIndex - i;
    484                 int32_t newLength = (i2 - i) + numYpogegrammeni * 2;  // 2 bytes per U+0399
    485                 change |= oldLength != newLength;
    486                 if (change) {
    487                     if (edits != NULL) {
    488                         edits->addReplace(oldLength, newLength);
    489                     }
    490                 } else {
    491                     if (edits != NULL) {
    492                         edits->addUnchanged(oldLength);
    493                     }
    494                     // Write unchanged text?
    495                     change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
    496                 }
    497             }
    498 
    499             if (change) {
    500                 ByteSinkUtil::appendTwoBytes(upper, sink);
    501                 if ((data & HAS_EITHER_DIALYTIKA) != 0) {
    502                     sink.Append(u8"\u0308", 2);  // restore or add a dialytika
    503                 }
    504                 if (addTonos) {
    505                     sink.Append(u8"\u0301", 2);
    506                 }
    507                 while (numYpogegrammeni > 0) {
    508                     sink.Append(u8"\u0399", 2);
    509                     --numYpogegrammeni;
    510                 }
    511             }
    512         } else if(c>=0) {
    513             const UChar *s;
    514             c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
    515             if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) {
    516                 return;
    517             }
    518         } else {
    519             // Malformed UTF-8.
    520             if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i,
    521                                                sink, options, edits, errorCode)) {
    522                 return;
    523             }
    524         }
    525         i = nextIndex;
    526         state = nextState;
    527     }
    528 }
    529 
    530 }  // namespace GreekUpper
    531 U_NAMESPACE_END
    532 
    533 static void U_CALLCONV
    534 ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
    535                              const uint8_t *src, int32_t srcLength,
    536                              icu::ByteSink &sink, icu::Edits *edits,
    537                              UErrorCode &errorCode) {
    538     UCaseContext csc=UCASECONTEXT_INITIALIZER;
    539     csc.p=(void *)src;
    540     csc.limit=srcLength;
    541     _caseMap(
    542         caseLocale, options, ucase_toFullLower,
    543         src, &csc, 0, srcLength,
    544         sink, edits, errorCode);
    545 }
    546 
    547 static void U_CALLCONV
    548 ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
    549                              const uint8_t *src, int32_t srcLength,
    550                              icu::ByteSink &sink, icu::Edits *edits,
    551                              UErrorCode &errorCode) {
    552     if (caseLocale == UCASE_LOC_GREEK) {
    553         GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode);
    554     } else {
    555         UCaseContext csc=UCASECONTEXT_INITIALIZER;
    556         csc.p=(void *)src;
    557         csc.limit=srcLength;
    558         _caseMap(
    559             caseLocale, options, ucase_toFullUpper,
    560             src, &csc, 0, srcLength,
    561             sink, edits, errorCode);
    562     }
    563 }
    564 
    565 static void U_CALLCONV
    566 ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
    567                           const uint8_t *src, int32_t srcLength,
    568                           icu::ByteSink &sink, icu::Edits *edits,
    569                           UErrorCode &errorCode) {
    570     /* case mapping loop */
    571     int32_t srcIndex = 0;
    572     while (U_SUCCESS(errorCode) && srcIndex < srcLength) {
    573         int32_t cpStart = srcIndex;
    574         UChar32 c;
    575         U8_NEXT(src, srcIndex, srcLength, c);
    576         if(c<0) {
    577             // Malformed UTF-8.
    578             ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
    579                                           sink, options, edits, errorCode);
    580         } else {
    581             const UChar *s;
    582             c = ucase_toFullFolding(c, &s, options);
    583             appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
    584         }
    585     }
    586 }
    587 
    588 void
    589 ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
    590                  const char *src, int32_t srcLength,
    591                  UTF8CaseMapper *stringCaseMapper,
    592                  icu::ByteSink &sink, icu::Edits *edits,
    593                  UErrorCode &errorCode) {
    594     /* check argument values */
    595     if (U_FAILURE(errorCode)) {
    596         return;
    597     }
    598     if ((src == nullptr && srcLength != 0) || srcLength < -1) {
    599         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    600         return;
    601     }
    602 
    603     // Get the string length.
    604     if (srcLength == -1) {
    605         srcLength = (int32_t)uprv_strlen((const char *)src);
    606     }
    607 
    608     if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
    609         edits->reset();
    610     }
    611     stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
    612                      (const uint8_t *)src, srcLength, sink, edits, errorCode);
    613     sink.Flush();
    614     if (U_SUCCESS(errorCode)) {
    615         if (edits != nullptr) {
    616             edits->copyErrorTo(errorCode);
    617         }
    618     }
    619 }
    620 
    621 int32_t
    622 ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
    623                  char *dest, int32_t destCapacity,
    624                  const char *src, int32_t srcLength,
    625                  UTF8CaseMapper *stringCaseMapper,
    626                  icu::Edits *edits,
    627                  UErrorCode &errorCode) {
    628     /* check argument values */
    629     if(U_FAILURE(errorCode)) {
    630         return 0;
    631     }
    632     if( destCapacity<0 ||
    633         (dest==NULL && destCapacity>0) ||
    634         (src==NULL && srcLength!=0) || srcLength<-1
    635     ) {
    636         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    637         return 0;
    638     }
    639 
    640     /* get the string length */
    641     if(srcLength==-1) {
    642         srcLength=(int32_t)uprv_strlen((const char *)src);
    643     }
    644 
    645     /* check for overlapping source and destination */
    646     if( dest!=NULL &&
    647         ((src>=dest && src<(dest+destCapacity)) ||
    648          (dest>=src && dest<(src+srcLength)))
    649     ) {
    650         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    651         return 0;
    652     }
    653 
    654     CheckedArrayByteSink sink(dest, destCapacity);
    655     if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
    656         edits->reset();
    657     }
    658     stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
    659                      (const uint8_t *)src, srcLength, sink, edits, errorCode);
    660     sink.Flush();
    661     if (U_SUCCESS(errorCode)) {
    662         if (sink.Overflowed()) {
    663             errorCode = U_BUFFER_OVERFLOW_ERROR;
    664         } else if (edits != nullptr) {
    665             edits->copyErrorTo(errorCode);
    666         }
    667     }
    668     return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode);
    669 }
    670 
    671 /* public API functions */
    672 
    673 U_CAPI int32_t U_EXPORT2
    674 ucasemap_utf8ToLower(const UCaseMap *csm,
    675                      char *dest, int32_t destCapacity,
    676                      const char *src, int32_t srcLength,
    677                      UErrorCode *pErrorCode) {
    678     return ucasemap_mapUTF8(
    679         csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
    680         dest, destCapacity,
    681         src, srcLength,
    682         ucasemap_internalUTF8ToLower, NULL, *pErrorCode);
    683 }
    684 
    685 U_CAPI int32_t U_EXPORT2
    686 ucasemap_utf8ToUpper(const UCaseMap *csm,
    687                      char *dest, int32_t destCapacity,
    688                      const char *src, int32_t srcLength,
    689                      UErrorCode *pErrorCode) {
    690     return ucasemap_mapUTF8(
    691         csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
    692         dest, destCapacity,
    693         src, srcLength,
    694         ucasemap_internalUTF8ToUpper, NULL, *pErrorCode);
    695 }
    696 
    697 U_CAPI int32_t U_EXPORT2
    698 ucasemap_utf8FoldCase(const UCaseMap *csm,
    699                       char *dest, int32_t destCapacity,
    700                       const char *src, int32_t srcLength,
    701                       UErrorCode *pErrorCode) {
    702     return ucasemap_mapUTF8(
    703         UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
    704         dest, destCapacity,
    705         src, srcLength,
    706         ucasemap_internalUTF8Fold, NULL, *pErrorCode);
    707 }
    708 
    709 U_NAMESPACE_BEGIN
    710 
    711 void CaseMap::utf8ToLower(
    712         const char *locale, uint32_t options,
    713         StringPiece src, ByteSink &sink, Edits *edits,
    714         UErrorCode &errorCode) {
    715     ucasemap_mapUTF8(
    716         ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
    717         src.data(), src.length(),
    718         ucasemap_internalUTF8ToLower, sink, edits, errorCode);
    719 }
    720 
    721 void CaseMap::utf8ToUpper(
    722         const char *locale, uint32_t options,
    723         StringPiece src, ByteSink &sink, Edits *edits,
    724         UErrorCode &errorCode) {
    725     ucasemap_mapUTF8(
    726         ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
    727         src.data(), src.length(),
    728         ucasemap_internalUTF8ToUpper, sink, edits, errorCode);
    729 }
    730 
    731 void CaseMap::utf8Fold(
    732         uint32_t options,
    733         StringPiece src, ByteSink &sink, Edits *edits,
    734         UErrorCode &errorCode) {
    735     ucasemap_mapUTF8(
    736         UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
    737         src.data(), src.length(),
    738         ucasemap_internalUTF8Fold, sink, edits, errorCode);
    739 }
    740 
    741 int32_t CaseMap::utf8ToLower(
    742         const char *locale, uint32_t options,
    743         const char *src, int32_t srcLength,
    744         char *dest, int32_t destCapacity, Edits *edits,
    745         UErrorCode &errorCode) {
    746     return ucasemap_mapUTF8(
    747         ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
    748         dest, destCapacity,
    749         src, srcLength,
    750         ucasemap_internalUTF8ToLower, edits, errorCode);
    751 }
    752 
    753 int32_t CaseMap::utf8ToUpper(
    754         const char *locale, uint32_t options,
    755         const char *src, int32_t srcLength,
    756         char *dest, int32_t destCapacity, Edits *edits,
    757         UErrorCode &errorCode) {
    758     return ucasemap_mapUTF8(
    759         ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
    760         dest, destCapacity,
    761         src, srcLength,
    762         ucasemap_internalUTF8ToUpper, edits, errorCode);
    763 }
    764 
    765 int32_t CaseMap::utf8Fold(
    766         uint32_t options,
    767         const char *src, int32_t srcLength,
    768         char *dest, int32_t destCapacity, Edits *edits,
    769         UErrorCode &errorCode) {
    770     return ucasemap_mapUTF8(
    771         UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
    772         dest, destCapacity,
    773         src, srcLength,
    774         ucasemap_internalUTF8Fold, edits, errorCode);
    775 }
    776 
    777 U_NAMESPACE_END
    778