Home | History | Annotate | Download | only in common
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2005-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  ucasemap.cpp
     11 *   encoding:   US-ASCII
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2005may06
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Case mapping service object and functions using it.
     19 */
     20 
     21 #include "unicode/utypes.h"
     22 #include "unicode/brkiter.h"
     23 #include "unicode/ubrk.h"
     24 #include "unicode/uloc.h"
     25 #include "unicode/ustring.h"
     26 #include "unicode/ucasemap.h"
     27 #if !UCONFIG_NO_BREAK_ITERATION
     28 #include "unicode/utext.h"
     29 #endif
     30 #include "unicode/utf.h"
     31 #include "unicode/utf8.h"
     32 #include "unicode/utf16.h"
     33 #include "cmemory.h"
     34 #include "cstring.h"
     35 #include "ucase.h"
     36 #include "ustr_imp.h"
     37 
     38 U_NAMESPACE_USE
     39 
     40 /* UCaseMap service object -------------------------------------------------- */
     41 
     42 U_CAPI UCaseMap * U_EXPORT2
     43 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
     44     UCaseMap *csm;
     45 
     46     if(U_FAILURE(*pErrorCode)) {
     47         return NULL;
     48     }
     49 
     50     csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap));
     51     if(csm==NULL) {
     52         return NULL;
     53     }
     54     uprv_memset(csm, 0, sizeof(UCaseMap));
     55 
     56     csm->csp=ucase_getSingleton();
     57     ucasemap_setLocale(csm, locale, pErrorCode);
     58     if(U_FAILURE(*pErrorCode)) {
     59         uprv_free(csm);
     60         return NULL;
     61     }
     62 
     63     csm->options=options;
     64     return csm;
     65 }
     66 
     67 U_CAPI void U_EXPORT2
     68 ucasemap_close(UCaseMap *csm) {
     69     if(csm!=NULL) {
     70 #if !UCONFIG_NO_BREAK_ITERATION
     71         // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
     72         delete reinterpret_cast<BreakIterator *>(csm->iter);
     73 #endif
     74         uprv_free(csm);
     75     }
     76 }
     77 
     78 U_CAPI const char * U_EXPORT2
     79 ucasemap_getLocale(const UCaseMap *csm) {
     80     return csm->locale;
     81 }
     82 
     83 U_CAPI uint32_t U_EXPORT2
     84 ucasemap_getOptions(const UCaseMap *csm) {
     85     return csm->options;
     86 }
     87 
     88 U_CAPI void U_EXPORT2
     89 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
     90     int32_t length;
     91 
     92     if(U_FAILURE(*pErrorCode)) {
     93         return;
     94     }
     95 
     96     length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
     97     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
     98         *pErrorCode=U_ZERO_ERROR;
     99         /* we only really need the language code for case mappings */
    100         length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
    101     }
    102     if(length==sizeof(csm->locale)) {
    103         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    104     }
    105     csm->locCache=0;
    106     if(U_SUCCESS(*pErrorCode)) {
    107         ucase_getCaseLocale(csm->locale, &csm->locCache);
    108     } else {
    109         csm->locale[0]=0;
    110     }
    111 }
    112 
    113 U_CAPI void U_EXPORT2
    114 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/) {
    115     csm->options=options;
    116 }
    117 
    118 /* UTF-8 string case mappings ----------------------------------------------- */
    119 
    120 /* TODO(markus): Move to a new, separate utf8case.c file. */
    121 
    122 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
    123 static inline int32_t
    124 appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
    125              int32_t result, const UChar *s) {
    126     UChar32 c;
    127     int32_t length;
    128     UErrorCode errorCode;
    129 
    130     /* decode the result */
    131     if(result<0) {
    132         /* (not) original code point */
    133         c=~result;
    134         length=U8_LENGTH(c);
    135     } else if(result<=UCASE_MAX_STRING_LENGTH) {
    136         c=U_SENTINEL;
    137         length=result;
    138     } else {
    139         c=result;
    140         length=U8_LENGTH(c);
    141     }
    142     if(length>(INT32_MAX-destIndex)) {
    143         return -1;  // integer overflow
    144     }
    145 
    146     if(destIndex<destCapacity) {
    147         /* append the result */
    148         if(c>=0) {
    149             /* code point */
    150             UBool isError=FALSE;
    151             U8_APPEND(dest, destIndex, destCapacity, c, isError);
    152             if(isError) {
    153                 /* overflow, nothing written */
    154                 destIndex+=length;
    155             }
    156         } else {
    157             /* string */
    158             int32_t destLength;
    159             errorCode=U_ZERO_ERROR;
    160             u_strToUTF8(
    161                 (char *)(dest+destIndex), destCapacity-destIndex, &destLength,
    162                 s, length,
    163                 &errorCode);
    164             if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
    165                 return -1;
    166             }
    167             if(destLength>(INT32_MAX-destIndex)) {
    168                 return -1;  // integer overflow
    169             }
    170             destIndex+=destLength;
    171             /* we might have an overflow, but we know the actual length */
    172         }
    173     } else {
    174         /* preflight */
    175         if(c>=0) {
    176             destIndex+=length;
    177         } else {
    178             int32_t destLength;
    179             errorCode=U_ZERO_ERROR;
    180             u_strToUTF8(
    181                 NULL, 0, &destLength,
    182                 s, length,
    183                 &errorCode);
    184             if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
    185                 return -1;
    186             }
    187             if(destLength>(INT32_MAX-destIndex)) {
    188                 return -1;  // integer overflow
    189             }
    190             destIndex+=destLength;
    191         }
    192     }
    193     return destIndex;
    194 }
    195 
    196 static inline int32_t
    197 appendUChar(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
    198     int32_t length=U8_LENGTH(c);
    199     if(length>(INT32_MAX-destIndex)) {
    200         return -1;  // integer overflow
    201     }
    202     int32_t limit=destIndex+length;
    203     if(limit<=destCapacity) {
    204         U8_APPEND_UNSAFE(dest, destIndex, c);
    205     }
    206     return limit;
    207 }
    208 
    209 static inline int32_t
    210 appendString(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
    211              const uint8_t *s, int32_t length) {
    212     if(length>0) {
    213         if(length>(INT32_MAX-destIndex)) {
    214             return -1;  // integer overflow
    215         }
    216         if((destIndex+length)<=destCapacity) {
    217             uprv_memcpy(dest+destIndex, s, length);
    218         }
    219         destIndex+=length;
    220     }
    221     return destIndex;
    222 }
    223 
    224 static UChar32 U_CALLCONV
    225 utf8_caseContextIterator(void *context, int8_t dir) {
    226     UCaseContext *csc=(UCaseContext *)context;
    227     UChar32 c;
    228 
    229     if(dir<0) {
    230         /* reset for backward iteration */
    231         csc->index=csc->cpStart;
    232         csc->dir=dir;
    233     } else if(dir>0) {
    234         /* reset for forward iteration */
    235         csc->index=csc->cpLimit;
    236         csc->dir=dir;
    237     } else {
    238         /* continue current iteration direction */
    239         dir=csc->dir;
    240     }
    241 
    242     if(dir<0) {
    243         if(csc->start<csc->index) {
    244             U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
    245             return c;
    246         }
    247     } else {
    248         if(csc->index<csc->limit) {
    249             U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
    250             return c;
    251         }
    252     }
    253     return U_SENTINEL;
    254 }
    255 
    256 /*
    257  * Case-maps [srcStart..srcLimit[ but takes
    258  * context [0..srcLength[ into account.
    259  */
    260 static int32_t
    261 _caseMap(const UCaseMap *csm, UCaseMapFull *map,
    262          uint8_t *dest, int32_t destCapacity,
    263          const uint8_t *src, UCaseContext *csc,
    264          int32_t srcStart, int32_t srcLimit,
    265          UErrorCode *pErrorCode) {
    266     const UChar *s = NULL;
    267     UChar32 c, c2 = 0;
    268     int32_t srcIndex, destIndex;
    269     int32_t locCache;
    270 
    271     locCache=csm->locCache;
    272 
    273     /* case mapping loop */
    274     srcIndex=srcStart;
    275     destIndex=0;
    276     while(srcIndex<srcLimit) {
    277         csc->cpStart=srcIndex;
    278         U8_NEXT(src, srcIndex, srcLimit, c);
    279         csc->cpLimit=srcIndex;
    280         if(c<0) {
    281             // Malformed UTF-8.
    282             destIndex=appendString(dest, destIndex, destCapacity, src+csc->cpStart, srcIndex-csc->cpStart);
    283             if(destIndex<0) {
    284                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    285                 return 0;
    286             }
    287             continue;
    288         }
    289         c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache);
    290         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
    291             /* fast path version of appendResult() for ASCII results */
    292             dest[destIndex++]=(uint8_t)c2;
    293         } else {
    294             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
    295             if(destIndex<0) {
    296                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    297                 return 0;
    298             }
    299         }
    300     }
    301 
    302     if(destIndex>destCapacity) {
    303         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    304     }
    305     return destIndex;
    306 }
    307 
    308 #if !UCONFIG_NO_BREAK_ITERATION
    309 
    310 U_CFUNC int32_t U_CALLCONV
    311 ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
    312          uint8_t *dest, int32_t destCapacity,
    313          const uint8_t *src, int32_t srcLength,
    314          UErrorCode *pErrorCode) {
    315     const UChar *s;
    316     UChar32 c;
    317     int32_t prev, titleStart, titleLimit, idx, destIndex;
    318     UBool isFirstIndex;
    319 
    320     if(U_FAILURE(*pErrorCode)) {
    321         return 0;
    322     }
    323 
    324     // Use the C++ abstract base class to minimize dependencies.
    325     // TODO: Change UCaseMap.iter to store a BreakIterator directly.
    326     BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
    327 
    328     /* set up local variables */
    329     int32_t locCache=csm->locCache;
    330     UCaseContext csc=UCASECONTEXT_INITIALIZER;
    331     csc.p=(void *)src;
    332     csc.limit=srcLength;
    333     destIndex=0;
    334     prev=0;
    335     isFirstIndex=TRUE;
    336 
    337     /* titlecasing loop */
    338     while(prev<srcLength) {
    339         /* find next index where to titlecase */
    340         if(isFirstIndex) {
    341             isFirstIndex=FALSE;
    342             idx=bi->first();
    343         } else {
    344             idx=bi->next();
    345         }
    346         if(idx==UBRK_DONE || idx>srcLength) {
    347             idx=srcLength;
    348         }
    349 
    350         /*
    351          * Unicode 4 & 5 section 3.13 Default Case Operations:
    352          *
    353          * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
    354          * #29, "Text Boundaries." Between each pair of word boundaries, find the first
    355          * cased character F. If F exists, map F to default_title(F); then map each
    356          * subsequent character C to default_lower(C).
    357          *
    358          * In this implementation, segment [prev..index[ into 3 parts:
    359          * a) uncased characters (copy as-is) [prev..titleStart[
    360          * b) first case letter (titlecase)         [titleStart..titleLimit[
    361          * c) subsequent characters (lowercase)                 [titleLimit..index[
    362          */
    363         if(prev<idx) {
    364             /* find and copy uncased characters [prev..titleStart[ */
    365             titleStart=titleLimit=prev;
    366             U8_NEXT(src, titleLimit, idx, c);
    367             if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
    368                 /* Adjust the titlecasing index (titleStart) to the next cased character. */
    369                 for(;;) {
    370                     titleStart=titleLimit;
    371                     if(titleLimit==idx) {
    372                         /*
    373                          * only uncased characters in [prev..index[
    374                          * stop with titleStart==titleLimit==index
    375                          */
    376                         break;
    377                     }
    378                     U8_NEXT(src, titleLimit, idx, c);
    379                     if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
    380                         break; /* cased letter at [titleStart..titleLimit[ */
    381                     }
    382                 }
    383                 destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev);
    384                 if(destIndex<0) {
    385                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    386                     return 0;
    387                 }
    388             }
    389 
    390             if(titleStart<titleLimit) {
    391                 /* titlecase c which is from [titleStart..titleLimit[ */
    392                 if(c>=0) {
    393                     csc.cpStart=titleStart;
    394                     csc.cpLimit=titleLimit;
    395                     c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
    396                     destIndex=appendResult(dest, destIndex, destCapacity, c, s);
    397                 } else {
    398                     // Malformed UTF-8.
    399                     destIndex=appendString(dest, destIndex, destCapacity, src+titleStart, titleLimit-titleStart);
    400                 }
    401                 if(destIndex<0) {
    402                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    403                     return 0;
    404                 }
    405 
    406                 /* Special case Dutch IJ titlecasing */
    407                 if (titleStart+1 < idx &&
    408                         ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH &&
    409                         (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
    410                         (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
    411                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
    412                     titleLimit++;
    413                 }
    414                 /* lowercase [titleLimit..index[ */
    415                 if(titleLimit<idx) {
    416                     if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
    417                         /* Normal operation: Lowercase the rest of the word. */
    418                         destIndex+=
    419                             _caseMap(
    420                                 csm, ucase_toFullLower,
    421                                 dest+destIndex, destCapacity-destIndex,
    422                                 src, &csc,
    423                                 titleLimit, idx,
    424                                 pErrorCode);
    425                         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
    426                             *pErrorCode=U_ZERO_ERROR;
    427                         }
    428                         if(U_FAILURE(*pErrorCode)) {
    429                             return destIndex;
    430                         }
    431                     } else {
    432                         /* Optionally just copy the rest of the word unchanged. */
    433                         destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit);
    434                         if(destIndex<0) {
    435                             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    436                             return 0;
    437                         }
    438                     }
    439                 }
    440             }
    441         }
    442 
    443         prev=idx;
    444     }
    445 
    446     if(destIndex>destCapacity) {
    447         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    448     }
    449     return destIndex;
    450 }
    451 
    452 #endif
    453 
    454 U_NAMESPACE_BEGIN
    455 namespace GreekUpper {
    456 
    457 UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i, int32_t length) {
    458     while (i < length) {
    459         UChar32 c;
    460         U8_NEXT(s, i, length, c);
    461         int32_t type = ucase_getTypeOrIgnorable(csp, c);
    462         if ((type & UCASE_IGNORABLE) != 0) {
    463             // Case-ignorable, continue with the loop.
    464         } else if (type != UCASE_NONE) {
    465             return TRUE;  // Followed by cased letter.
    466         } else {
    467             return FALSE;  // Uncased and not case-ignorable.
    468         }
    469     }
    470     return FALSE;  // Not followed by cased letter.
    471 }
    472 
    473 // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
    474 int32_t toUpper(const UCaseMap *csm,
    475                 uint8_t *dest, int32_t destCapacity,
    476                 const uint8_t *src, int32_t srcLength,
    477                 UErrorCode *pErrorCode) {
    478     int32_t locCache = UCASE_LOC_GREEK;
    479     int32_t destIndex=0;
    480     uint32_t state = 0;
    481     for (int32_t i = 0; i < srcLength;) {
    482         int32_t nextIndex = i;
    483         UChar32 c;
    484         U8_NEXT(src, nextIndex, srcLength, c);
    485         uint32_t nextState = 0;
    486         int32_t type = ucase_getTypeOrIgnorable(csm->csp, c);
    487         if ((type & UCASE_IGNORABLE) != 0) {
    488             // c is case-ignorable
    489             nextState |= (state & AFTER_CASED);
    490         } else if (type != UCASE_NONE) {
    491             // c is cased
    492             nextState |= AFTER_CASED;
    493         }
    494         uint32_t data = getLetterData(c);
    495         if (data > 0) {
    496             uint32_t upper = data & UPPER_MASK;
    497             // Add a dialytika to this iota or ypsilon vowel
    498             // if we removed a tonos from the previous vowel,
    499             // and that previous vowel did not also have (or gain) a dialytika.
    500             // Adding one only to the final vowel in a longer sequence
    501             // (which does not occur in normal writing) would require lookahead.
    502             // Set the same flag as for preserving an existing dialytika.
    503             if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
    504                     (upper == 0x399 || upper == 0x3A5)) {
    505                 data |= HAS_DIALYTIKA;
    506             }
    507             int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
    508             if ((data & HAS_YPOGEGRAMMENI) != 0) {
    509                 numYpogegrammeni = 1;
    510             }
    511             // Skip combining diacritics after this Greek letter.
    512             int32_t nextNextIndex = nextIndex;
    513             while (nextIndex < srcLength) {
    514                 UChar32 c2;
    515                 U8_NEXT(src, nextNextIndex, srcLength, c2);
    516                 uint32_t diacriticData = getDiacriticData(c2);
    517                 if (diacriticData != 0) {
    518                     data |= diacriticData;
    519                     if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
    520                         ++numYpogegrammeni;
    521                     }
    522                     nextIndex = nextNextIndex;
    523                 } else {
    524                     break;  // not a Greek diacritic
    525                 }
    526             }
    527             if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
    528                 nextState |= AFTER_VOWEL_WITH_ACCENT;
    529             }
    530             // Map according to Greek rules.
    531             UBool addTonos = FALSE;
    532             if (upper == 0x397 &&
    533                     (data & HAS_ACCENT) != 0 &&
    534                     numYpogegrammeni == 0 &&
    535                     (state & AFTER_CASED) == 0 &&
    536                     !isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) {
    537                 // Keep disjunctive "or" with (only) a tonos.
    538                 // We use the same "word boundary" conditions as for the Final_Sigma test.
    539                 if (i == nextIndex) {
    540                     upper = 0x389;  // Preserve the precomposed form.
    541                 } else {
    542                     addTonos = TRUE;
    543                 }
    544             } else if ((data & HAS_DIALYTIKA) != 0) {
    545                 // Preserve a vowel with dialytika in precomposed form if it exists.
    546                 if (upper == 0x399) {
    547                     upper = 0x3AA;
    548                     data &= ~HAS_EITHER_DIALYTIKA;
    549                 } else if (upper == 0x3A5) {
    550                     upper = 0x3AB;
    551                     data &= ~HAS_EITHER_DIALYTIKA;
    552                 }
    553             }
    554             destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
    555             if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
    556                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x308);  // restore or add a dialytika
    557             }
    558             if (destIndex >= 0 && addTonos) {
    559                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
    560             }
    561             while (destIndex >= 0 && numYpogegrammeni > 0) {
    562                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
    563                 --numYpogegrammeni;
    564             }
    565             if(destIndex<0) {
    566                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    567                 return 0;
    568             }
    569         } else if(c>=0) {
    570             const UChar *s;
    571             UChar32 c2 = 0;
    572             c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache);
    573             if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
    574                 /* fast path version of appendResult() for ASCII results */
    575                 dest[destIndex++]=(uint8_t)c2;
    576             } else {
    577                 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
    578                 if(destIndex<0) {
    579                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    580                     return 0;
    581                 }
    582             }
    583         } else {
    584             // Malformed UTF-8.
    585             destIndex=appendString(dest, destIndex, destCapacity, src+i, nextIndex-i);
    586             if(destIndex<0) {
    587                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    588                 return 0;
    589             }
    590         }
    591         i = nextIndex;
    592         state = nextState;
    593     }
    594 
    595     if(destIndex>destCapacity) {
    596         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    597     }
    598     return destIndex;
    599 }
    600 
    601 }  // namespace GreekUpper
    602 U_NAMESPACE_END
    603 
    604 static int32_t U_CALLCONV
    605 ucasemap_internalUTF8ToLower(const UCaseMap *csm,
    606                              uint8_t *dest, int32_t destCapacity,
    607                              const uint8_t *src, int32_t srcLength,
    608                              UErrorCode *pErrorCode) {
    609     UCaseContext csc=UCASECONTEXT_INITIALIZER;
    610     csc.p=(void *)src;
    611     csc.limit=srcLength;
    612     return _caseMap(
    613         csm, ucase_toFullLower,
    614         dest, destCapacity,
    615         src, &csc, 0, srcLength,
    616         pErrorCode);
    617 }
    618 
    619 static int32_t U_CALLCONV
    620 ucasemap_internalUTF8ToUpper(const UCaseMap *csm,
    621                              uint8_t *dest, int32_t destCapacity,
    622                              const uint8_t *src, int32_t srcLength,
    623                              UErrorCode *pErrorCode) {
    624     int32_t locCache = csm->locCache;
    625     if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) {
    626         return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode);
    627     }
    628     UCaseContext csc=UCASECONTEXT_INITIALIZER;
    629     csc.p=(void *)src;
    630     csc.limit=srcLength;
    631     return _caseMap(
    632         csm, ucase_toFullUpper,
    633         dest, destCapacity,
    634         src, &csc, 0, srcLength,
    635         pErrorCode);
    636 }
    637 
    638 static int32_t
    639 utf8_foldCase(const UCaseProps *csp,
    640               uint8_t *dest, int32_t destCapacity,
    641               const uint8_t *src, int32_t srcLength,
    642               uint32_t options,
    643               UErrorCode *pErrorCode) {
    644     int32_t srcIndex, destIndex;
    645 
    646     const UChar *s;
    647     UChar32 c, c2;
    648     int32_t start;
    649 
    650     /* case mapping loop */
    651     srcIndex=destIndex=0;
    652     while(srcIndex<srcLength) {
    653         start=srcIndex;
    654         U8_NEXT(src, srcIndex, srcLength, c);
    655         if(c<0) {
    656             // Malformed UTF-8.
    657             destIndex=appendString(dest, destIndex, destCapacity, src+start, srcIndex-start);
    658             if(destIndex<0) {
    659                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    660                 return 0;
    661             }
    662             continue;
    663         }
    664         c=ucase_toFullFolding(csp, c, &s, options);
    665         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
    666             /* fast path version of appendResult() for ASCII results */
    667             dest[destIndex++]=(uint8_t)c2;
    668         } else {
    669             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
    670             if(destIndex<0) {
    671                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    672                 return 0;
    673             }
    674         }
    675     }
    676 
    677     if(destIndex>destCapacity) {
    678         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    679     }
    680     return destIndex;
    681 }
    682 
    683 static int32_t U_CALLCONV
    684 ucasemap_internalUTF8Fold(const UCaseMap *csm,
    685                           uint8_t *dest, int32_t destCapacity,
    686                           const uint8_t *src, int32_t srcLength,
    687                           UErrorCode *pErrorCode) {
    688     return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
    689 }
    690 
    691 U_CFUNC int32_t
    692 ucasemap_mapUTF8(const UCaseMap *csm,
    693                  uint8_t *dest, int32_t destCapacity,
    694                  const uint8_t *src, int32_t srcLength,
    695                  UTF8CaseMapper *stringCaseMapper,
    696                  UErrorCode *pErrorCode) {
    697     int32_t destLength;
    698 
    699     /* check argument values */
    700     if(U_FAILURE(*pErrorCode)) {
    701         return 0;
    702     }
    703     if( destCapacity<0 ||
    704         (dest==NULL && destCapacity>0) ||
    705         src==NULL ||
    706         srcLength<-1
    707     ) {
    708         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    709         return 0;
    710     }
    711 
    712     /* get the string length */
    713     if(srcLength==-1) {
    714         srcLength=(int32_t)uprv_strlen((const char *)src);
    715     }
    716 
    717     /* check for overlapping source and destination */
    718     if( dest!=NULL &&
    719         ((src>=dest && src<(dest+destCapacity)) ||
    720          (dest>=src && dest<(src+srcLength)))
    721     ) {
    722         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    723         return 0;
    724     }
    725 
    726     destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode);
    727     return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
    728 }
    729 
    730 /* public API functions */
    731 
    732 U_CAPI int32_t U_EXPORT2
    733 ucasemap_utf8ToLower(const UCaseMap *csm,
    734                      char *dest, int32_t destCapacity,
    735                      const char *src, int32_t srcLength,
    736                      UErrorCode *pErrorCode) {
    737     return ucasemap_mapUTF8(csm,
    738                    (uint8_t *)dest, destCapacity,
    739                    (const uint8_t *)src, srcLength,
    740                    ucasemap_internalUTF8ToLower, pErrorCode);
    741 }
    742 
    743 U_CAPI int32_t U_EXPORT2
    744 ucasemap_utf8ToUpper(const UCaseMap *csm,
    745                      char *dest, int32_t destCapacity,
    746                      const char *src, int32_t srcLength,
    747                      UErrorCode *pErrorCode) {
    748     return ucasemap_mapUTF8(csm,
    749                    (uint8_t *)dest, destCapacity,
    750                    (const uint8_t *)src, srcLength,
    751                    ucasemap_internalUTF8ToUpper, pErrorCode);
    752 }
    753 
    754 U_CAPI int32_t U_EXPORT2
    755 ucasemap_utf8FoldCase(const UCaseMap *csm,
    756                       char *dest, int32_t destCapacity,
    757                       const char *src, int32_t srcLength,
    758                       UErrorCode *pErrorCode) {
    759     return ucasemap_mapUTF8(csm,
    760                    (uint8_t *)dest, destCapacity,
    761                    (const uint8_t *)src, srcLength,
    762                    ucasemap_internalUTF8Fold, pErrorCode);
    763 }
    764