Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 2001-2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *
      9 * File ustrtrns.c
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   9/10/2001    Ram    Creation.
     15 ******************************************************************************
     16 */
     17 
     18 /*******************************************************************************
     19  *
     20  * u_strTo* and u_strFrom* APIs
     21  * WCS functions moved to ustr_wcs.c for better modularization
     22  *
     23  *******************************************************************************
     24  */
     25 
     26 
     27 #include "unicode/putil.h"
     28 #include "unicode/ustring.h"
     29 #include "cstring.h"
     30 #include "cmemory.h"
     31 #include "ustr_imp.h"
     32 
     33 U_CAPI UChar* U_EXPORT2
     34 u_strFromUTF32WithSub(UChar *dest,
     35                int32_t destCapacity,
     36                int32_t *pDestLength,
     37                const UChar32 *src,
     38                int32_t srcLength,
     39                UChar32 subchar, int32_t *pNumSubstitutions,
     40                UErrorCode *pErrorCode) {
     41     const UChar32 *srcLimit;
     42     UChar32 ch;
     43     UChar *destLimit;
     44     UChar *pDest;
     45     int32_t reqLength;
     46     int32_t numSubstitutions;
     47 
     48     /* args check */
     49     if(U_FAILURE(*pErrorCode)){
     50         return NULL;
     51     }
     52     if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0) ||
     53         subchar > 0x10ffff || U_IS_SURROGATE(subchar)
     54     ) {
     55         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
     56         return NULL;
     57     }
     58 
     59     if(pNumSubstitutions != NULL) {
     60         *pNumSubstitutions = 0;
     61     }
     62 
     63     pDest = dest;
     64     destLimit = dest + destCapacity;
     65     reqLength = 0;
     66     numSubstitutions = 0;
     67 
     68     if(srcLength < 0) {
     69         /* simple loop for conversion of a NUL-terminated BMP string */
     70         while((ch=*src) != 0 &&
     71               ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) {
     72             ++src;
     73             if(pDest < destLimit) {
     74                 *pDest++ = (UChar)ch;
     75             } else {
     76                 ++reqLength;
     77             }
     78         }
     79         srcLimit = src;
     80         if(ch != 0) {
     81             /* "complicated" case, find the end of the remaining string */
     82             while(*++srcLimit != 0) {}
     83         }
     84     } else {
     85         srcLimit = src + srcLength;
     86     }
     87 
     88     /* convert with length */
     89     while(src < srcLimit) {
     90         ch = *src++;
     91         do {
     92             /* usually "loops" once; twice only for writing subchar */
     93             if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
     94                 if(pDest < destLimit) {
     95                     *pDest++ = (UChar)ch;
     96                 } else {
     97                     ++reqLength;
     98                 }
     99                 break;
    100             } else if(0x10000 <= ch && ch <= 0x10ffff) {
    101                 if((pDest + 2) <= destLimit) {
    102                     *pDest++ = U16_LEAD(ch);
    103                     *pDest++ = U16_TRAIL(ch);
    104                 } else {
    105                     reqLength += 2;
    106                 }
    107                 break;
    108             } else if((ch = subchar) < 0) {
    109                 /* surrogate code point, or not a Unicode code point at all */
    110                 *pErrorCode = U_INVALID_CHAR_FOUND;
    111                 return NULL;
    112             } else {
    113                 ++numSubstitutions;
    114             }
    115         } while(TRUE);
    116     }
    117 
    118     reqLength += (int32_t)(pDest - dest);
    119     if(pDestLength) {
    120         *pDestLength = reqLength;
    121     }
    122     if(pNumSubstitutions != NULL) {
    123         *pNumSubstitutions = numSubstitutions;
    124     }
    125 
    126     /* Terminate the buffer */
    127     u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
    128 
    129     return dest;
    130 }
    131 
    132 U_CAPI UChar* U_EXPORT2
    133 u_strFromUTF32(UChar *dest,
    134                int32_t destCapacity,
    135                int32_t *pDestLength,
    136                const UChar32 *src,
    137                int32_t srcLength,
    138                UErrorCode *pErrorCode) {
    139     return u_strFromUTF32WithSub(
    140             dest, destCapacity, pDestLength,
    141             src, srcLength,
    142             U_SENTINEL, NULL,
    143             pErrorCode);
    144 }
    145 
    146 U_CAPI UChar32* U_EXPORT2
    147 u_strToUTF32WithSub(UChar32 *dest,
    148              int32_t destCapacity,
    149              int32_t *pDestLength,
    150              const UChar *src,
    151              int32_t srcLength,
    152              UChar32 subchar, int32_t *pNumSubstitutions,
    153              UErrorCode *pErrorCode) {
    154     const UChar *srcLimit;
    155     UChar32 ch;
    156     UChar ch2;
    157     UChar32 *destLimit;
    158     UChar32 *pDest;
    159     int32_t reqLength;
    160     int32_t numSubstitutions;
    161 
    162     /* args check */
    163     if(U_FAILURE(*pErrorCode)){
    164         return NULL;
    165     }
    166     if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0) ||
    167         subchar > 0x10ffff || U_IS_SURROGATE(subchar)
    168     ) {
    169         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    170         return NULL;
    171     }
    172 
    173     if(pNumSubstitutions != NULL) {
    174         *pNumSubstitutions = 0;
    175     }
    176 
    177     pDest = dest;
    178     destLimit = dest + destCapacity;
    179     reqLength = 0;
    180     numSubstitutions = 0;
    181 
    182     if(srcLength < 0) {
    183         /* simple loop for conversion of a NUL-terminated BMP string */
    184         while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) {
    185             ++src;
    186             if(pDest < destLimit) {
    187                 *pDest++ = ch;
    188             } else {
    189                 ++reqLength;
    190             }
    191         }
    192         srcLimit = src;
    193         if(ch != 0) {
    194             /* "complicated" case, find the end of the remaining string */
    195             while(*++srcLimit != 0) {}
    196         }
    197     } else {
    198         srcLimit = src + srcLength;
    199     }
    200 
    201     /* convert with length */
    202     while(src < srcLimit) {
    203         ch = *src++;
    204         if(!U16_IS_SURROGATE(ch)) {
    205             /* write or count ch below */
    206         } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
    207             ++src;
    208             ch = U16_GET_SUPPLEMENTARY(ch, ch2);
    209         } else if((ch = subchar) < 0) {
    210             /* unpaired surrogate */
    211             *pErrorCode = U_INVALID_CHAR_FOUND;
    212             return NULL;
    213         } else {
    214             ++numSubstitutions;
    215         }
    216         if(pDest < destLimit) {
    217             *pDest++ = ch;
    218         } else {
    219             ++reqLength;
    220         }
    221     }
    222 
    223     reqLength += (int32_t)(pDest - dest);
    224     if(pDestLength) {
    225         *pDestLength = reqLength;
    226     }
    227     if(pNumSubstitutions != NULL) {
    228         *pNumSubstitutions = numSubstitutions;
    229     }
    230 
    231     /* Terminate the buffer */
    232     u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
    233 
    234     return dest;
    235 }
    236 
    237 U_CAPI UChar32* U_EXPORT2
    238 u_strToUTF32(UChar32 *dest,
    239              int32_t destCapacity,
    240              int32_t *pDestLength,
    241              const UChar *src,
    242              int32_t srcLength,
    243              UErrorCode *pErrorCode) {
    244     return u_strToUTF32WithSub(
    245             dest, destCapacity, pDestLength,
    246             src, srcLength,
    247             U_SENTINEL, NULL,
    248             pErrorCode);
    249 }
    250 
    251 /* for utf8_nextCharSafeBodyTerminated() */
    252 static const UChar32
    253 utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
    254 
    255 /*
    256  * Version of utf8_nextCharSafeBody() with the following differences:
    257  * - checks for NUL termination instead of length
    258  * - works with pointers instead of indexes
    259  * - always strict (strict==-1)
    260  *
    261  * *ps points to after the lead byte and will be moved to after the last trail byte.
    262  * c is the lead byte.
    263  * @return the code point, or U_SENTINEL
    264  */
    265 static UChar32
    266 utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) {
    267     const uint8_t *s=*ps;
    268     uint8_t trail, illegal=0;
    269     uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
    270     UTF8_MASK_LEAD_BYTE((c), count);
    271     /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
    272     switch(count) {
    273     /* each branch falls through to the next one */
    274     case 5:
    275     case 4:
    276         /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
    277         illegal=1;
    278         break;
    279     case 3:
    280         trail=(uint8_t)(*s++ - 0x80);
    281         c=(c<<6)|trail;
    282         if(trail>0x3f || c>=0x110) {
    283             /* not a trail byte, or code point>0x10ffff (outside Unicode) */
    284             illegal=1;
    285             break;
    286         }
    287     case 2:
    288         trail=(uint8_t)(*s++ - 0x80);
    289         if(trail>0x3f) {
    290             /* not a trail byte */
    291             illegal=1;
    292             break;
    293         }
    294         c=(c<<6)|trail;
    295     case 1:
    296         trail=(uint8_t)(*s++ - 0x80);
    297         if(trail>0x3f) {
    298             /* not a trail byte */
    299             illegal=1;
    300         }
    301         c=(c<<6)|trail;
    302         break;
    303     case 0:
    304         return U_SENTINEL;
    305     /* no default branch to optimize switch()  - all values are covered */
    306     }
    307 
    308     /* correct sequence - all trail bytes have (b7..b6)==(10)? */
    309     /* illegal is also set if count>=4 */
    310     if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) {
    311         /* error handling */
    312         /* don't go beyond this sequence */
    313         s=*ps;
    314         while(count>0 && UTF8_IS_TRAIL(*s)) {
    315             ++s;
    316             --count;
    317         }
    318         c=U_SENTINEL;
    319     }
    320     *ps=s;
    321     return c;
    322 }
    323 
    324 /*
    325  * Version of utf8_nextCharSafeBody() with the following differences:
    326  * - works with pointers instead of indexes
    327  * - always strict (strict==-1)
    328  *
    329  * *ps points to after the lead byte and will be moved to after the last trail byte.
    330  * c is the lead byte.
    331  * @return the code point, or U_SENTINEL
    332  */
    333 static UChar32
    334 utf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c) {
    335     const uint8_t *s=*ps;
    336     uint8_t trail, illegal=0;
    337     uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
    338     if((limit-s)>=count) {
    339         UTF8_MASK_LEAD_BYTE((c), count);
    340         /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
    341         switch(count) {
    342         /* each branch falls through to the next one */
    343         case 5:
    344         case 4:
    345             /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
    346             illegal=1;
    347             break;
    348         case 3:
    349             trail=*s++;
    350             c=(c<<6)|(trail&0x3f);
    351             if(c<0x110) {
    352                 illegal|=(trail&0xc0)^0x80;
    353             } else {
    354                 /* code point>0x10ffff, outside Unicode */
    355                 illegal=1;
    356                 break;
    357             }
    358         case 2:
    359             trail=*s++;
    360             c=(c<<6)|(trail&0x3f);
    361             illegal|=(trail&0xc0)^0x80;
    362         case 1:
    363             trail=*s++;
    364             c=(c<<6)|(trail&0x3f);
    365             illegal|=(trail&0xc0)^0x80;
    366             break;
    367         case 0:
    368             return U_SENTINEL;
    369         /* no default branch to optimize switch()  - all values are covered */
    370         }
    371     } else {
    372         illegal=1; /* too few bytes left */
    373     }
    374 
    375     /* correct sequence - all trail bytes have (b7..b6)==(10)? */
    376     /* illegal is also set if count>=4 */
    377     if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) {
    378         /* error handling */
    379         /* don't go beyond this sequence */
    380         s=*ps;
    381         while(count>0 && s<limit && UTF8_IS_TRAIL(*s)) {
    382             ++s;
    383             --count;
    384         }
    385         c=U_SENTINEL;
    386     }
    387     *ps=s;
    388     return c;
    389 }
    390 
    391 U_CAPI UChar* U_EXPORT2
    392 u_strFromUTF8WithSub(UChar *dest,
    393               int32_t destCapacity,
    394               int32_t *pDestLength,
    395               const char* src,
    396               int32_t srcLength,
    397               UChar32 subchar, int32_t *pNumSubstitutions,
    398               UErrorCode *pErrorCode){
    399 
    400     UChar *pDest = dest;
    401     UChar *pDestLimit = dest+destCapacity;
    402     UChar32 ch;
    403     int32_t reqLength = 0;
    404     const uint8_t* pSrc = (const uint8_t*) src;
    405     uint8_t t1, t2; /* trail bytes */
    406     int32_t numSubstitutions;
    407 
    408     /* args check */
    409     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
    410         return NULL;
    411     }
    412 
    413     if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0) ||
    414         subchar > 0x10ffff || U_IS_SURROGATE(subchar)
    415     ) {
    416         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    417         return NULL;
    418     }
    419 
    420     if(pNumSubstitutions!=NULL) {
    421         *pNumSubstitutions=0;
    422     }
    423     numSubstitutions=0;
    424 
    425     /*
    426      * Inline processing of UTF-8 byte sequences:
    427      *
    428      * Byte sequences for the most common characters are handled inline in
    429      * the conversion loops. In order to reduce the path lengths for those
    430      * characters, the tests are arranged in a kind of binary search.
    431      * ASCII (<=0x7f) is checked first, followed by the dividing point
    432      * between 2- and 3-byte sequences (0xe0).
    433      * The 3-byte branch is tested first to speed up CJK text.
    434      * The compiler should combine the subtractions for the two tests for 0xe0.
    435      * Each branch then tests for the other end of its range.
    436      */
    437 
    438     if(srcLength < 0){
    439         /*
    440          * Transform a NUL-terminated string.
    441          * The code explicitly checks for NULs only in the lead byte position.
    442          * A NUL byte in the trail byte position fails the trail byte range check anyway.
    443          */
    444         while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
    445             if(ch <= 0x7f){
    446                 *pDest++=(UChar)ch;
    447                 ++pSrc;
    448             } else {
    449                 if(ch > 0xe0) {
    450                     if( /* handle U+1000..U+CFFF inline */
    451                         ch <= 0xec &&
    452                         (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
    453                         (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
    454                     ) {
    455                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
    456                         *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
    457                         pSrc += 3;
    458                         continue;
    459                     }
    460                 } else if(ch < 0xe0) {
    461                     if( /* handle U+0080..U+07FF inline */
    462                         ch >= 0xc2 &&
    463                         (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
    464                     ) {
    465                         *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
    466                         pSrc += 2;
    467                         continue;
    468                     }
    469                 }
    470 
    471                 /* function call for "complicated" and error cases */
    472                 ++pSrc; /* continue after the lead byte */
    473                 ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
    474                 if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
    475                     *pErrorCode = U_INVALID_CHAR_FOUND;
    476                     return NULL;
    477                 } else if(ch<=0xFFFF) {
    478                     *(pDest++)=(UChar)ch;
    479                 } else {
    480                     *(pDest++)=UTF16_LEAD(ch);
    481                     if(pDest<pDestLimit) {
    482                         *(pDest++)=UTF16_TRAIL(ch);
    483                     } else {
    484                         reqLength++;
    485                         break;
    486                     }
    487                 }
    488             }
    489         }
    490 
    491         /* Pre-flight the rest of the string. */
    492         while((ch = *pSrc) != 0) {
    493             if(ch <= 0x7f){
    494                 ++reqLength;
    495                 ++pSrc;
    496             } else {
    497                 if(ch > 0xe0) {
    498                     if( /* handle U+1000..U+CFFF inline */
    499                         ch <= 0xec &&
    500                         (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
    501                         (uint8_t)(pSrc[2] - 0x80) <= 0x3f
    502                     ) {
    503                         ++reqLength;
    504                         pSrc += 3;
    505                         continue;
    506                     }
    507                 } else if(ch < 0xe0) {
    508                     if( /* handle U+0080..U+07FF inline */
    509                         ch >= 0xc2 &&
    510                         (uint8_t)(pSrc[1] - 0x80) <= 0x3f
    511                     ) {
    512                         ++reqLength;
    513                         pSrc += 2;
    514                         continue;
    515                     }
    516                 }
    517 
    518                 /* function call for "complicated" and error cases */
    519                 ++pSrc; /* continue after the lead byte */
    520                 ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
    521                 if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
    522                     *pErrorCode = U_INVALID_CHAR_FOUND;
    523                     return NULL;
    524                 }
    525                 reqLength += U16_LENGTH(ch);
    526             }
    527         }
    528     } else /* srcLength >= 0 */ {
    529         const uint8_t *pSrcLimit = pSrc + srcLength;
    530         int32_t count;
    531 
    532         /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
    533         for(;;) {
    534             /*
    535              * Each iteration of the inner loop progresses by at most 3 UTF-8
    536              * bytes and one UChar, for most characters.
    537              * For supplementary code points (4 & 2), which are rare,
    538              * there is an additional adjustment.
    539              */
    540             count = (int32_t)(pDestLimit - pDest);
    541             srcLength = (int32_t)((pSrcLimit - pSrc) / 3);
    542             if(count > srcLength) {
    543                 count = srcLength; /* min(remaining dest, remaining src/3) */
    544             }
    545             if(count < 3) {
    546                 /*
    547                  * Too much overhead if we get near the end of the string,
    548                  * continue with the next loop.
    549                  */
    550                 break;
    551             }
    552 
    553             do {
    554                 ch = *pSrc;
    555                 if(ch <= 0x7f){
    556                     *pDest++=(UChar)ch;
    557                     ++pSrc;
    558                 } else {
    559                     if(ch > 0xe0) {
    560                         if( /* handle U+1000..U+CFFF inline */
    561                             ch <= 0xec &&
    562                             (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
    563                             (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
    564                         ) {
    565                             /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
    566                             *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
    567                             pSrc += 3;
    568                             continue;
    569                         }
    570                     } else if(ch < 0xe0) {
    571                         if( /* handle U+0080..U+07FF inline */
    572                             ch >= 0xc2 &&
    573                             (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
    574                         ) {
    575                             *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
    576                             pSrc += 2;
    577                             continue;
    578                         }
    579                     }
    580 
    581                     if(ch >= 0xf0 || subchar > 0xffff) {
    582                         /*
    583                          * We may read up to six bytes and write up to two UChars,
    584                          * which we didn't account for with computing count,
    585                          * so we adjust it here.
    586                          */
    587                         if(--count == 0) {
    588                             break;
    589                         }
    590                     }
    591 
    592                     /* function call for "complicated" and error cases */
    593                     ++pSrc; /* continue after the lead byte */
    594                     ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
    595                     if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
    596                         *pErrorCode = U_INVALID_CHAR_FOUND;
    597                         return NULL;
    598                     }else if(ch<=0xFFFF){
    599                         *(pDest++)=(UChar)ch;
    600                     }else{
    601                         *(pDest++)=UTF16_LEAD(ch);
    602                         if(pDest<pDestLimit){
    603                             *(pDest++)=UTF16_TRAIL(ch);
    604                         }else{
    605                             reqLength++;
    606                             break;
    607                         }
    608                     }
    609                 }
    610             } while(--count > 0);
    611         }
    612 
    613         while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
    614             ch = *pSrc;
    615             if(ch <= 0x7f){
    616                 *pDest++=(UChar)ch;
    617                 ++pSrc;
    618             } else {
    619                 if(ch > 0xe0) {
    620                     if( /* handle U+1000..U+CFFF inline */
    621                         ch <= 0xec &&
    622                         ((pSrcLimit - pSrc) >= 3) &&
    623                         (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
    624                         (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
    625                     ) {
    626                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
    627                         *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
    628                         pSrc += 3;
    629                         continue;
    630                     }
    631                 } else if(ch < 0xe0) {
    632                     if( /* handle U+0080..U+07FF inline */
    633                         ch >= 0xc2 &&
    634                         ((pSrcLimit - pSrc) >= 2) &&
    635                         (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
    636                     ) {
    637                         *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
    638                         pSrc += 2;
    639                         continue;
    640                     }
    641                 }
    642 
    643                 /* function call for "complicated" and error cases */
    644                 ++pSrc; /* continue after the lead byte */
    645                 ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
    646                 if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
    647                     *pErrorCode = U_INVALID_CHAR_FOUND;
    648                     return NULL;
    649                 }else if(ch<=0xFFFF){
    650                     *(pDest++)=(UChar)ch;
    651                 }else{
    652                     *(pDest++)=UTF16_LEAD(ch);
    653                     if(pDest<pDestLimit){
    654                         *(pDest++)=UTF16_TRAIL(ch);
    655                     }else{
    656                         reqLength++;
    657                         break;
    658                     }
    659                 }
    660             }
    661         }
    662         /* donot fill the dest buffer just count the UChars needed */
    663         while(pSrc < pSrcLimit){
    664             ch = *pSrc;
    665             if(ch <= 0x7f){
    666                 reqLength++;
    667                 ++pSrc;
    668             } else {
    669                 if(ch > 0xe0) {
    670                     if( /* handle U+1000..U+CFFF inline */
    671                         ch <= 0xec &&
    672                         ((pSrcLimit - pSrc) >= 3) &&
    673                         (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
    674                         (uint8_t)(pSrc[2] - 0x80) <= 0x3f
    675                     ) {
    676                         reqLength++;
    677                         pSrc += 3;
    678                         continue;
    679                     }
    680                 } else if(ch < 0xe0) {
    681                     if( /* handle U+0080..U+07FF inline */
    682                         ch >= 0xc2 &&
    683                         ((pSrcLimit - pSrc) >= 2) &&
    684                         (uint8_t)(pSrc[1] - 0x80) <= 0x3f
    685                     ) {
    686                         reqLength++;
    687                         pSrc += 2;
    688                         continue;
    689                     }
    690                 }
    691 
    692                 /* function call for "complicated" and error cases */
    693                 ++pSrc; /* continue after the lead byte */
    694                 ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
    695                 if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
    696                     *pErrorCode = U_INVALID_CHAR_FOUND;
    697                     return NULL;
    698                 }
    699                 reqLength+=UTF_CHAR_LENGTH(ch);
    700             }
    701         }
    702     }
    703 
    704     reqLength+=(int32_t)(pDest - dest);
    705 
    706     if(pNumSubstitutions!=NULL) {
    707         *pNumSubstitutions=numSubstitutions;
    708     }
    709 
    710     if(pDestLength){
    711         *pDestLength = reqLength;
    712     }
    713 
    714     /* Terminate the buffer */
    715     u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
    716 
    717     return dest;
    718 }
    719 
    720 U_CAPI UChar* U_EXPORT2
    721 u_strFromUTF8(UChar *dest,
    722               int32_t destCapacity,
    723               int32_t *pDestLength,
    724               const char* src,
    725               int32_t srcLength,
    726               UErrorCode *pErrorCode){
    727     return u_strFromUTF8WithSub(
    728             dest, destCapacity, pDestLength,
    729             src, srcLength,
    730             U_SENTINEL, NULL,
    731             pErrorCode);
    732 }
    733 
    734 U_CAPI UChar * U_EXPORT2
    735 u_strFromUTF8Lenient(UChar *dest,
    736                      int32_t destCapacity,
    737                      int32_t *pDestLength,
    738                      const char *src,
    739                      int32_t srcLength,
    740                      UErrorCode *pErrorCode) {
    741 
    742     UChar *pDest = dest;
    743     UChar32 ch;
    744     int32_t reqLength = 0;
    745     uint8_t* pSrc = (uint8_t*) src;
    746 
    747     /* args check */
    748     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
    749         return NULL;
    750     }
    751 
    752     if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)) {
    753         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    754         return NULL;
    755     }
    756 
    757     if(srcLength < 0) {
    758         /* Transform a NUL-terminated string. */
    759         UChar *pDestLimit = dest+destCapacity;
    760         uint8_t t1, t2, t3; /* trail bytes */
    761 
    762         while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
    763             if(ch < 0xc0) {
    764                 /*
    765                  * ASCII, or a trail byte in lead position which is treated like
    766                  * a single-byte sequence for better character boundary
    767                  * resynchronization after illegal sequences.
    768                  */
    769                 *pDest++=(UChar)ch;
    770                 ++pSrc;
    771                 continue;
    772             } else if(ch < 0xe0) { /* U+0080..U+07FF */
    773                 if((t1 = pSrc[1]) != 0) {
    774                     /* 0x3080 = (0xc0 << 6) + 0x80 */
    775                     *pDest++ = (UChar)((ch << 6) + t1 - 0x3080);
    776                     pSrc += 2;
    777                     continue;
    778                 }
    779             } else if(ch < 0xf0) { /* U+0800..U+FFFF */
    780                 if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) {
    781                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
    782                     /* 0x2080 = (0x80 << 6) + 0x80 */
    783                     *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080);
    784                     pSrc += 3;
    785                     continue;
    786                 }
    787             } else /* f0..f4 */ { /* U+10000..U+10FFFF */
    788                 if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) {
    789                     pSrc += 4;
    790                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
    791                     ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080;
    792                     *(pDest++) = U16_LEAD(ch);
    793                     if(pDest < pDestLimit) {
    794                         *(pDest++) = U16_TRAIL(ch);
    795                     } else {
    796                         reqLength = 1;
    797                         break;
    798                     }
    799                     continue;
    800                 }
    801             }
    802 
    803             /* truncated character at the end */
    804             *pDest++ = 0xfffd;
    805             while(*++pSrc != 0) {}
    806             break;
    807         }
    808 
    809         /* Pre-flight the rest of the string. */
    810         while((ch = *pSrc) != 0) {
    811             if(ch < 0xc0) {
    812                 /*
    813                  * ASCII, or a trail byte in lead position which is treated like
    814                  * a single-byte sequence for better character boundary
    815                  * resynchronization after illegal sequences.
    816                  */
    817                 ++reqLength;
    818                 ++pSrc;
    819                 continue;
    820             } else if(ch < 0xe0) { /* U+0080..U+07FF */
    821                 if(pSrc[1] != 0) {
    822                     ++reqLength;
    823                     pSrc += 2;
    824                     continue;
    825                 }
    826             } else if(ch < 0xf0) { /* U+0800..U+FFFF */
    827                 if(pSrc[1] != 0 && pSrc[2] != 0) {
    828                     ++reqLength;
    829                     pSrc += 3;
    830                     continue;
    831                 }
    832             } else /* f0..f4 */ { /* U+10000..U+10FFFF */
    833                 if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) {
    834                     reqLength += 2;
    835                     pSrc += 4;
    836                     continue;
    837                 }
    838             }
    839 
    840             /* truncated character at the end */
    841             ++reqLength;
    842             break;
    843         }
    844     } else /* srcLength >= 0 */ {
    845         const uint8_t *pSrcLimit = pSrc + srcLength;
    846 
    847         /*
    848          * This function requires that if srcLength is given, then it must be
    849          * destCapatity >= srcLength so that we need not check for
    850          * destination buffer overflow in the loop.
    851          */
    852         if(destCapacity < srcLength) {
    853             if(pDestLength != NULL) {
    854                 *pDestLength = srcLength; /* this likely overestimates the true destLength! */
    855             }
    856             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
    857             return NULL;
    858         }
    859 
    860         if((pSrcLimit - pSrc) >= 4) {
    861             pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
    862 
    863             /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
    864             do {
    865                 ch = *pSrc++;
    866                 if(ch < 0xc0) {
    867                     /*
    868                      * ASCII, or a trail byte in lead position which is treated like
    869                      * a single-byte sequence for better character boundary
    870                      * resynchronization after illegal sequences.
    871                      */
    872                     *pDest++=(UChar)ch;
    873                 } else if(ch < 0xe0) { /* U+0080..U+07FF */
    874                     /* 0x3080 = (0xc0 << 6) + 0x80 */
    875                     *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
    876                 } else if(ch < 0xf0) { /* U+0800..U+FFFF */
    877                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
    878                     /* 0x2080 = (0x80 << 6) + 0x80 */
    879                     ch = (ch << 12) + (*pSrc++ << 6);
    880                     *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
    881                 } else /* f0..f4 */ { /* U+10000..U+10FFFF */
    882                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
    883                     ch = (ch << 18) + (*pSrc++ << 12);
    884                     ch += *pSrc++ << 6;
    885                     ch += *pSrc++ - 0x3c82080;
    886                     *(pDest++) = U16_LEAD(ch);
    887                     *(pDest++) = U16_TRAIL(ch);
    888                 }
    889             } while(pSrc < pSrcLimit);
    890 
    891             pSrcLimit += 3; /* restore original pSrcLimit */
    892         }
    893 
    894         while(pSrc < pSrcLimit) {
    895             ch = *pSrc++;
    896             if(ch < 0xc0) {
    897                 /*
    898                  * ASCII, or a trail byte in lead position which is treated like
    899                  * a single-byte sequence for better character boundary
    900                  * resynchronization after illegal sequences.
    901                  */
    902                 *pDest++=(UChar)ch;
    903                 continue;
    904             } else if(ch < 0xe0) { /* U+0080..U+07FF */
    905                 if(pSrc < pSrcLimit) {
    906                     /* 0x3080 = (0xc0 << 6) + 0x80 */
    907                     *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
    908                     continue;
    909                 }
    910             } else if(ch < 0xf0) { /* U+0800..U+FFFF */
    911                 if((pSrcLimit - pSrc) >= 2) {
    912                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
    913                     /* 0x2080 = (0x80 << 6) + 0x80 */
    914                     ch = (ch << 12) + (*pSrc++ << 6);
    915                     *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
    916                     pSrc += 3;
    917                     continue;
    918                 }
    919             } else /* f0..f4 */ { /* U+10000..U+10FFFF */
    920                 if((pSrcLimit - pSrc) >= 3) {
    921                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
    922                     ch = (ch << 18) + (*pSrc++ << 12);
    923                     ch += *pSrc++ << 6;
    924                     ch += *pSrc++ - 0x3c82080;
    925                     *(pDest++) = U16_LEAD(ch);
    926                     *(pDest++) = U16_TRAIL(ch);
    927                     pSrc += 4;
    928                     continue;
    929                 }
    930             }
    931 
    932             /* truncated character at the end */
    933             *pDest++ = 0xfffd;
    934             break;
    935         }
    936     }
    937 
    938     reqLength+=(int32_t)(pDest - dest);
    939 
    940     if(pDestLength){
    941         *pDestLength = reqLength;
    942     }
    943 
    944     /* Terminate the buffer */
    945     u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
    946 
    947     return dest;
    948 }
    949 
    950 static U_INLINE uint8_t *
    951 _appendUTF8(uint8_t *pDest, UChar32 c) {
    952     /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */
    953     if((c)<=0x7f) {
    954         *pDest++=(uint8_t)c;
    955     } else if(c<=0x7ff) {
    956         *pDest++=(uint8_t)((c>>6)|0xc0);
    957         *pDest++=(uint8_t)((c&0x3f)|0x80);
    958     } else if(c<=0xffff) {
    959         *pDest++=(uint8_t)((c>>12)|0xe0);
    960         *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80);
    961         *pDest++=(uint8_t)(((c)&0x3f)|0x80);
    962     } else /* if((uint32_t)(c)<=0x10ffff) */ {
    963         *pDest++=(uint8_t)(((c)>>18)|0xf0);
    964         *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80);
    965         *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80);
    966         *pDest++=(uint8_t)(((c)&0x3f)|0x80);
    967     }
    968     return pDest;
    969 }
    970 
    971 
    972 U_CAPI char* U_EXPORT2
    973 u_strToUTF8WithSub(char *dest,
    974             int32_t destCapacity,
    975             int32_t *pDestLength,
    976             const UChar *pSrc,
    977             int32_t srcLength,
    978             UChar32 subchar, int32_t *pNumSubstitutions,
    979             UErrorCode *pErrorCode){
    980 
    981     int32_t reqLength=0;
    982     uint32_t ch=0,ch2=0;
    983     uint8_t *pDest = (uint8_t *)dest;
    984     uint8_t *pDestLimit = pDest + destCapacity;
    985     int32_t numSubstitutions;
    986 
    987     /* args check */
    988     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
    989         return NULL;
    990     }
    991 
    992     if( (pSrc==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0) ||
    993         subchar > 0x10ffff || U_IS_SURROGATE(subchar)
    994     ) {
    995         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    996         return NULL;
    997     }
    998 
    999     if(pNumSubstitutions!=NULL) {
   1000         *pNumSubstitutions=0;
   1001     }
   1002     numSubstitutions=0;
   1003 
   1004     if(srcLength==-1) {
   1005         while((ch=*pSrc)!=0) {
   1006             ++pSrc;
   1007             if(ch <= 0x7f) {
   1008                 if(pDest<pDestLimit) {
   1009                     *pDest++ = (char)ch;
   1010                 } else {
   1011                     reqLength = 1;
   1012                     break;
   1013                 }
   1014             } else if(ch <= 0x7ff) {
   1015                 if((pDestLimit - pDest) >= 2) {
   1016                     *pDest++=(uint8_t)((ch>>6)|0xc0);
   1017                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
   1018                 } else {
   1019                     reqLength = 2;
   1020                     break;
   1021                 }
   1022             } else if(ch <= 0xd7ff || ch >= 0xe000) {
   1023                 if((pDestLimit - pDest) >= 3) {
   1024                     *pDest++=(uint8_t)((ch>>12)|0xe0);
   1025                     *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
   1026                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
   1027                 } else {
   1028                     reqLength = 3;
   1029                     break;
   1030                 }
   1031             } else /* ch is a surrogate */ {
   1032                 int32_t length;
   1033 
   1034                 /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
   1035                 if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
   1036                     ++pSrc;
   1037                     ch=UTF16_GET_PAIR_VALUE(ch, ch2);
   1038                 } else if(subchar>=0) {
   1039                     ch=subchar;
   1040                     ++numSubstitutions;
   1041                 } else {
   1042                     /* Unicode 3.2 forbids surrogate code points in UTF-8 */
   1043                     *pErrorCode = U_INVALID_CHAR_FOUND;
   1044                     return NULL;
   1045                 }
   1046 
   1047                 length = U8_LENGTH(ch);
   1048                 if((pDestLimit - pDest) >= length) {
   1049                     /* convert and append*/
   1050                     pDest=_appendUTF8(pDest, ch);
   1051                 } else {
   1052                     reqLength = length;
   1053                     break;
   1054                 }
   1055             }
   1056         }
   1057         while((ch=*pSrc++)!=0) {
   1058             if(ch<=0x7f) {
   1059                 ++reqLength;
   1060             } else if(ch<=0x7ff) {
   1061                 reqLength+=2;
   1062             } else if(!UTF_IS_SURROGATE(ch)) {
   1063                 reqLength+=3;
   1064             } else if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
   1065                 ++pSrc;
   1066                 reqLength+=4;
   1067             } else if(subchar>=0) {
   1068                 reqLength+=U8_LENGTH(subchar);
   1069                 ++numSubstitutions;
   1070             } else {
   1071                 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
   1072                 *pErrorCode = U_INVALID_CHAR_FOUND;
   1073                 return NULL;
   1074             }
   1075         }
   1076     } else {
   1077         const UChar *pSrcLimit = pSrc+srcLength;
   1078         int32_t count;
   1079 
   1080         /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
   1081         for(;;) {
   1082             /*
   1083              * Each iteration of the inner loop progresses by at most 3 UTF-8
   1084              * bytes and one UChar, for most characters.
   1085              * For supplementary code points (4 & 2), which are rare,
   1086              * there is an additional adjustment.
   1087              */
   1088             count = (int32_t)((pDestLimit - pDest) / 3);
   1089             srcLength = (int32_t)(pSrcLimit - pSrc);
   1090             if(count > srcLength) {
   1091                 count = srcLength; /* min(remaining dest/3, remaining src) */
   1092             }
   1093             if(count < 3) {
   1094                 /*
   1095                  * Too much overhead if we get near the end of the string,
   1096                  * continue with the next loop.
   1097                  */
   1098                 break;
   1099             }
   1100             do {
   1101                 ch=*pSrc++;
   1102                 if(ch <= 0x7f) {
   1103                     *pDest++ = (char)ch;
   1104                 } else if(ch <= 0x7ff) {
   1105                     *pDest++=(uint8_t)((ch>>6)|0xc0);
   1106                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
   1107                 } else if(ch <= 0xd7ff || ch >= 0xe000) {
   1108                     *pDest++=(uint8_t)((ch>>12)|0xe0);
   1109                     *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
   1110                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
   1111                 } else /* ch is a surrogate */ {
   1112                     /*
   1113                      * We will read two UChars and probably output four bytes,
   1114                      * which we didn't account for with computing count,
   1115                      * so we adjust it here.
   1116                      */
   1117                     if(--count == 0) {
   1118                         --pSrc; /* undo ch=*pSrc++ for the lead surrogate */
   1119                         break;  /* recompute count */
   1120                     }
   1121 
   1122                     if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
   1123                         ++pSrc;
   1124                         ch=UTF16_GET_PAIR_VALUE(ch, ch2);
   1125 
   1126                         /* writing 4 bytes per 2 UChars is ok */
   1127                         *pDest++=(uint8_t)((ch>>18)|0xf0);
   1128                         *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80);
   1129                         *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
   1130                         *pDest++=(uint8_t)((ch&0x3f)|0x80);
   1131                     } else  {
   1132                         /* Unicode 3.2 forbids surrogate code points in UTF-8 */
   1133                         if(subchar>=0) {
   1134                             ch=subchar;
   1135                             ++numSubstitutions;
   1136                         } else {
   1137                             *pErrorCode = U_INVALID_CHAR_FOUND;
   1138                             return NULL;
   1139                         }
   1140 
   1141                         /* convert and append*/
   1142                         pDest=_appendUTF8(pDest, ch);
   1143                     }
   1144                 }
   1145             } while(--count > 0);
   1146         }
   1147 
   1148         while(pSrc<pSrcLimit) {
   1149             ch=*pSrc++;
   1150             if(ch <= 0x7f) {
   1151                 if(pDest<pDestLimit) {
   1152                     *pDest++ = (char)ch;
   1153                 } else {
   1154                     reqLength = 1;
   1155                     break;
   1156                 }
   1157             } else if(ch <= 0x7ff) {
   1158                 if((pDestLimit - pDest) >= 2) {
   1159                     *pDest++=(uint8_t)((ch>>6)|0xc0);
   1160                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
   1161                 } else {
   1162                     reqLength = 2;
   1163                     break;
   1164                 }
   1165             } else if(ch <= 0xd7ff || ch >= 0xe000) {
   1166                 if((pDestLimit - pDest) >= 3) {
   1167                     *pDest++=(uint8_t)((ch>>12)|0xe0);
   1168                     *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
   1169                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
   1170                 } else {
   1171                     reqLength = 3;
   1172                     break;
   1173                 }
   1174             } else /* ch is a surrogate */ {
   1175                 int32_t length;
   1176 
   1177                 if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
   1178                     ++pSrc;
   1179                     ch=UTF16_GET_PAIR_VALUE(ch, ch2);
   1180                 } else if(subchar>=0) {
   1181                     ch=subchar;
   1182                     ++numSubstitutions;
   1183                 } else {
   1184                     /* Unicode 3.2 forbids surrogate code points in UTF-8 */
   1185                     *pErrorCode = U_INVALID_CHAR_FOUND;
   1186                     return NULL;
   1187                 }
   1188 
   1189                 length = U8_LENGTH(ch);
   1190                 if((pDestLimit - pDest) >= length) {
   1191                     /* convert and append*/
   1192                     pDest=_appendUTF8(pDest, ch);
   1193                 } else {
   1194                     reqLength = length;
   1195                     break;
   1196                 }
   1197             }
   1198         }
   1199         while(pSrc<pSrcLimit) {
   1200             ch=*pSrc++;
   1201             if(ch<=0x7f) {
   1202                 ++reqLength;
   1203             } else if(ch<=0x7ff) {
   1204                 reqLength+=2;
   1205             } else if(!UTF_IS_SURROGATE(ch)) {
   1206                 reqLength+=3;
   1207             } else if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
   1208                 ++pSrc;
   1209                 reqLength+=4;
   1210             } else if(subchar>=0) {
   1211                 reqLength+=U8_LENGTH(subchar);
   1212                 ++numSubstitutions;
   1213             } else {
   1214                 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
   1215                 *pErrorCode = U_INVALID_CHAR_FOUND;
   1216                 return NULL;
   1217             }
   1218         }
   1219     }
   1220 
   1221     reqLength+=(int32_t)(pDest - (uint8_t *)dest);
   1222 
   1223     if(pNumSubstitutions!=NULL) {
   1224         *pNumSubstitutions=numSubstitutions;
   1225     }
   1226 
   1227     if(pDestLength){
   1228         *pDestLength = reqLength;
   1229     }
   1230 
   1231     /* Terminate the buffer */
   1232     u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode);
   1233 
   1234     return (char*)dest;
   1235 }
   1236 
   1237 U_CAPI char* U_EXPORT2
   1238 u_strToUTF8(char *dest,
   1239             int32_t destCapacity,
   1240             int32_t *pDestLength,
   1241             const UChar *pSrc,
   1242             int32_t srcLength,
   1243             UErrorCode *pErrorCode){
   1244     return u_strToUTF8WithSub(
   1245             dest, destCapacity, pDestLength,
   1246             pSrc, srcLength,
   1247             U_SENTINEL, NULL,
   1248             pErrorCode);
   1249 }
   1250