Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1998-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *
      9 * File ustring.h
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   12/07/98    bertrand    Creation.
     15 ******************************************************************************
     16 */
     17 
     18 #include "unicode/utypes.h"
     19 #include "unicode/putil.h"
     20 #include "unicode/ustring.h"
     21 #include "cstring.h"
     22 #include "cwchar.h"
     23 #include "cmemory.h"
     24 #include "ustr_imp.h"
     25 
     26 /* ANSI string.h - style functions ------------------------------------------ */
     27 
     28 /* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
     29 #define U_BMP_MAX 0xffff
     30 
     31 /* Forward binary string search functions ----------------------------------- */
     32 
     33 /*
     34  * Test if a substring match inside a string is at code point boundaries.
     35  * All pointers refer to the same buffer.
     36  * The limit pointer may be NULL, all others must be real pointers.
     37  */
     38 static U_INLINE UBool
     39 isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
     40     if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
     41         /* the leading edge of the match is in the middle of a surrogate pair */
     42         return FALSE;
     43     }
     44     if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
     45         /* the trailing edge of the match is in the middle of a surrogate pair */
     46         return FALSE;
     47     }
     48     return TRUE;
     49 }
     50 
     51 U_CAPI UChar * U_EXPORT2
     52 u_strFindFirst(const UChar *s, int32_t length,
     53                const UChar *sub, int32_t subLength) {
     54     const UChar *start, *p, *q, *subLimit;
     55     UChar c, cs, cq;
     56 
     57     if(sub==NULL || subLength<-1) {
     58         return (UChar *)s;
     59     }
     60     if(s==NULL || length<-1) {
     61         return NULL;
     62     }
     63 
     64     start=s;
     65 
     66     if(length<0 && subLength<0) {
     67         /* both strings are NUL-terminated */
     68         if((cs=*sub++)==0) {
     69             return (UChar *)s;
     70         }
     71         if(*sub==0 && !U16_IS_SURROGATE(cs)) {
     72             /* the substring consists of a single, non-surrogate BMP code point */
     73             return u_strchr(s, cs);
     74         }
     75 
     76         while((c=*s++)!=0) {
     77             if(c==cs) {
     78                 /* found first substring UChar, compare rest */
     79                 p=s;
     80                 q=sub;
     81                 for(;;) {
     82                     if((cq=*q)==0) {
     83                         if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
     84                             return (UChar *)(s-1); /* well-formed match */
     85                         } else {
     86                             break; /* no match because surrogate pair is split */
     87                         }
     88                     }
     89                     if((c=*p)==0) {
     90                         return NULL; /* no match, and none possible after s */
     91                     }
     92                     if(c!=cq) {
     93                         break; /* no match */
     94                     }
     95                     ++p;
     96                     ++q;
     97                 }
     98             }
     99         }
    100 
    101         /* not found */
    102         return NULL;
    103     }
    104 
    105     if(subLength<0) {
    106         subLength=u_strlen(sub);
    107     }
    108     if(subLength==0) {
    109         return (UChar *)s;
    110     }
    111 
    112     /* get sub[0] to search for it fast */
    113     cs=*sub++;
    114     --subLength;
    115     subLimit=sub+subLength;
    116 
    117     if(subLength==0 && !U16_IS_SURROGATE(cs)) {
    118         /* the substring consists of a single, non-surrogate BMP code point */
    119         return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
    120     }
    121 
    122     if(length<0) {
    123         /* s is NUL-terminated */
    124         while((c=*s++)!=0) {
    125             if(c==cs) {
    126                 /* found first substring UChar, compare rest */
    127                 p=s;
    128                 q=sub;
    129                 for(;;) {
    130                     if(q==subLimit) {
    131                         if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
    132                             return (UChar *)(s-1); /* well-formed match */
    133                         } else {
    134                             break; /* no match because surrogate pair is split */
    135                         }
    136                     }
    137                     if((c=*p)==0) {
    138                         return NULL; /* no match, and none possible after s */
    139                     }
    140                     if(c!=*q) {
    141                         break; /* no match */
    142                     }
    143                     ++p;
    144                     ++q;
    145                 }
    146             }
    147         }
    148     } else {
    149         const UChar *limit, *preLimit;
    150 
    151         /* subLength was decremented above */
    152         if(length<=subLength) {
    153             return NULL; /* s is shorter than sub */
    154         }
    155 
    156         limit=s+length;
    157 
    158         /* the substring must start before preLimit */
    159         preLimit=limit-subLength;
    160 
    161         while(s!=preLimit) {
    162             c=*s++;
    163             if(c==cs) {
    164                 /* found first substring UChar, compare rest */
    165                 p=s;
    166                 q=sub;
    167                 for(;;) {
    168                     if(q==subLimit) {
    169                         if(isMatchAtCPBoundary(start, s-1, p, limit)) {
    170                             return (UChar *)(s-1); /* well-formed match */
    171                         } else {
    172                             break; /* no match because surrogate pair is split */
    173                         }
    174                     }
    175                     if(*p!=*q) {
    176                         break; /* no match */
    177                     }
    178                     ++p;
    179                     ++q;
    180                 }
    181             }
    182         }
    183     }
    184 
    185     /* not found */
    186     return NULL;
    187 }
    188 
    189 U_CAPI UChar * U_EXPORT2
    190 u_strstr(const UChar *s, const UChar *substring) {
    191     return u_strFindFirst(s, -1, substring, -1);
    192 }
    193 
    194 U_CAPI UChar * U_EXPORT2
    195 u_strchr(const UChar *s, UChar c) {
    196     if(U16_IS_SURROGATE(c)) {
    197         /* make sure to not find half of a surrogate pair */
    198         return u_strFindFirst(s, -1, &c, 1);
    199     } else {
    200         UChar cs;
    201 
    202         /* trivial search for a BMP code point */
    203         for(;;) {
    204             if((cs=*s)==c) {
    205                 return (UChar *)s;
    206             }
    207             if(cs==0) {
    208                 return NULL;
    209             }
    210             ++s;
    211         }
    212     }
    213 }
    214 
    215 U_CAPI UChar * U_EXPORT2
    216 u_strchr32(const UChar *s, UChar32 c) {
    217     if((uint32_t)c<=U_BMP_MAX) {
    218         /* find BMP code point */
    219         return u_strchr(s, (UChar)c);
    220     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    221         /* find supplementary code point as surrogate pair */
    222         UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
    223 
    224         while((cs=*s++)!=0) {
    225             if(cs==lead && *s==trail) {
    226                 return (UChar *)(s-1);
    227             }
    228         }
    229         return NULL;
    230     } else {
    231         /* not a Unicode code point, not findable */
    232         return NULL;
    233     }
    234 }
    235 
    236 U_CAPI UChar * U_EXPORT2
    237 u_memchr(const UChar *s, UChar c, int32_t count) {
    238     if(count<=0) {
    239         return NULL; /* no string */
    240     } else if(U16_IS_SURROGATE(c)) {
    241         /* make sure to not find half of a surrogate pair */
    242         return u_strFindFirst(s, count, &c, 1);
    243     } else {
    244         /* trivial search for a BMP code point */
    245         const UChar *limit=s+count;
    246         do {
    247             if(*s==c) {
    248                 return (UChar *)s;
    249             }
    250         } while(++s!=limit);
    251         return NULL;
    252     }
    253 }
    254 
    255 U_CAPI UChar * U_EXPORT2
    256 u_memchr32(const UChar *s, UChar32 c, int32_t count) {
    257     if((uint32_t)c<=U_BMP_MAX) {
    258         /* find BMP code point */
    259         return u_memchr(s, (UChar)c, count);
    260     } else if(count<2) {
    261         /* too short for a surrogate pair */
    262         return NULL;
    263     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    264         /* find supplementary code point as surrogate pair */
    265         const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
    266         UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
    267 
    268         do {
    269             if(*s==lead && *(s+1)==trail) {
    270                 return (UChar *)s;
    271             }
    272         } while(++s!=limit);
    273         return NULL;
    274     } else {
    275         /* not a Unicode code point, not findable */
    276         return NULL;
    277     }
    278 }
    279 
    280 /* Backward binary string search functions ---------------------------------- */
    281 
    282 U_CAPI UChar * U_EXPORT2
    283 u_strFindLast(const UChar *s, int32_t length,
    284               const UChar *sub, int32_t subLength) {
    285     const UChar *start, *limit, *p, *q, *subLimit;
    286     UChar c, cs;
    287 
    288     if(sub==NULL || subLength<-1) {
    289         return (UChar *)s;
    290     }
    291     if(s==NULL || length<-1) {
    292         return NULL;
    293     }
    294 
    295     /*
    296      * This implementation is more lazy than the one for u_strFindFirst():
    297      * There is no special search code for NUL-terminated strings.
    298      * It does not seem to be worth it for searching substrings to
    299      * search forward and find all matches like in u_strrchr() and similar.
    300      * Therefore, we simply get both string lengths and search backward.
    301      *
    302      * markus 2002oct23
    303      */
    304 
    305     if(subLength<0) {
    306         subLength=u_strlen(sub);
    307     }
    308     if(subLength==0) {
    309         return (UChar *)s;
    310     }
    311 
    312     /* get sub[subLength-1] to search for it fast */
    313     subLimit=sub+subLength;
    314     cs=*(--subLimit);
    315     --subLength;
    316 
    317     if(subLength==0 && !U16_IS_SURROGATE(cs)) {
    318         /* the substring consists of a single, non-surrogate BMP code point */
    319         return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
    320     }
    321 
    322     if(length<0) {
    323         length=u_strlen(s);
    324     }
    325 
    326     /* subLength was decremented above */
    327     if(length<=subLength) {
    328         return NULL; /* s is shorter than sub */
    329     }
    330 
    331     start=s;
    332     limit=s+length;
    333 
    334     /* the substring must start no later than s+subLength */
    335     s+=subLength;
    336 
    337     while(s!=limit) {
    338         c=*(--limit);
    339         if(c==cs) {
    340             /* found last substring UChar, compare rest */
    341             p=limit;
    342             q=subLimit;
    343             for(;;) {
    344                 if(q==sub) {
    345                     if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
    346                         return (UChar *)p; /* well-formed match */
    347                     } else {
    348                         break; /* no match because surrogate pair is split */
    349                     }
    350                 }
    351                 if(*(--p)!=*(--q)) {
    352                     break; /* no match */
    353                 }
    354             }
    355         }
    356     }
    357 
    358     /* not found */
    359     return NULL;
    360 }
    361 
    362 U_CAPI UChar * U_EXPORT2
    363 u_strrstr(const UChar *s, const UChar *substring) {
    364     return u_strFindLast(s, -1, substring, -1);
    365 }
    366 
    367 U_CAPI UChar * U_EXPORT2
    368 u_strrchr(const UChar *s, UChar c) {
    369     if(U16_IS_SURROGATE(c)) {
    370         /* make sure to not find half of a surrogate pair */
    371         return u_strFindLast(s, -1, &c, 1);
    372     } else {
    373         const UChar *result=NULL;
    374         UChar cs;
    375 
    376         /* trivial search for a BMP code point */
    377         for(;;) {
    378             if((cs=*s)==c) {
    379                 result=s;
    380             }
    381             if(cs==0) {
    382                 return (UChar *)result;
    383             }
    384             ++s;
    385         }
    386     }
    387 }
    388 
    389 U_CAPI UChar * U_EXPORT2
    390 u_strrchr32(const UChar *s, UChar32 c) {
    391     if((uint32_t)c<=U_BMP_MAX) {
    392         /* find BMP code point */
    393         return u_strrchr(s, (UChar)c);
    394     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    395         /* find supplementary code point as surrogate pair */
    396         const UChar *result=NULL;
    397         UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
    398 
    399         while((cs=*s++)!=0) {
    400             if(cs==lead && *s==trail) {
    401                 result=s-1;
    402             }
    403         }
    404         return (UChar *)result;
    405     } else {
    406         /* not a Unicode code point, not findable */
    407         return NULL;
    408     }
    409 }
    410 
    411 U_CAPI UChar * U_EXPORT2
    412 u_memrchr(const UChar *s, UChar c, int32_t count) {
    413     if(count<=0) {
    414         return NULL; /* no string */
    415     } else if(U16_IS_SURROGATE(c)) {
    416         /* make sure to not find half of a surrogate pair */
    417         return u_strFindLast(s, count, &c, 1);
    418     } else {
    419         /* trivial search for a BMP code point */
    420         const UChar *limit=s+count;
    421         do {
    422             if(*(--limit)==c) {
    423                 return (UChar *)limit;
    424             }
    425         } while(s!=limit);
    426         return NULL;
    427     }
    428 }
    429 
    430 U_CAPI UChar * U_EXPORT2
    431 u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
    432     if((uint32_t)c<=U_BMP_MAX) {
    433         /* find BMP code point */
    434         return u_memrchr(s, (UChar)c, count);
    435     } else if(count<2) {
    436         /* too short for a surrogate pair */
    437         return NULL;
    438     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    439         /* find supplementary code point as surrogate pair */
    440         const UChar *limit=s+count-1;
    441         UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
    442 
    443         do {
    444             if(*limit==trail && *(limit-1)==lead) {
    445                 return (UChar *)(limit-1);
    446             }
    447         } while(s!=--limit);
    448         return NULL;
    449     } else {
    450         /* not a Unicode code point, not findable */
    451         return NULL;
    452     }
    453 }
    454 
    455 /* Tokenization functions --------------------------------------------------- */
    456 
    457 /*
    458  * Match each code point in a string against each code point in the matchSet.
    459  * Return the index of the first string code point that
    460  * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
    461  * Return -(string length)-1 if there is no such code point.
    462  */
    463 static int32_t
    464 _matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
    465     int32_t matchLen, matchBMPLen, strItr, matchItr;
    466     UChar32 stringCh, matchCh;
    467     UChar c, c2;
    468 
    469     /* first part of matchSet contains only BMP code points */
    470     matchBMPLen = 0;
    471     while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
    472         ++matchBMPLen;
    473     }
    474 
    475     /* second part of matchSet contains BMP and supplementary code points */
    476     matchLen = matchBMPLen;
    477     while(matchSet[matchLen] != 0) {
    478         ++matchLen;
    479     }
    480 
    481     for(strItr = 0; (c = string[strItr]) != 0;) {
    482         ++strItr;
    483         if(U16_IS_SINGLE(c)) {
    484             if(polarity) {
    485                 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
    486                     if(c == matchSet[matchItr]) {
    487                         return strItr - 1; /* one matches */
    488                     }
    489                 }
    490             } else {
    491                 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
    492                     if(c == matchSet[matchItr]) {
    493                         goto endloop;
    494                     }
    495                 }
    496                 return strItr - 1; /* none matches */
    497             }
    498         } else {
    499             /*
    500              * No need to check for string length before U16_IS_TRAIL
    501              * because c2 could at worst be the terminating NUL.
    502              */
    503             if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
    504                 ++strItr;
    505                 stringCh = U16_GET_SUPPLEMENTARY(c, c2);
    506             } else {
    507                 stringCh = c; /* unpaired trail surrogate */
    508             }
    509 
    510             if(polarity) {
    511                 for(matchItr = matchBMPLen; matchItr < matchLen;) {
    512                     U16_NEXT(matchSet, matchItr, matchLen, matchCh);
    513                     if(stringCh == matchCh) {
    514                         return strItr - U16_LENGTH(stringCh); /* one matches */
    515                     }
    516                 }
    517             } else {
    518                 for(matchItr = matchBMPLen; matchItr < matchLen;) {
    519                     U16_NEXT(matchSet, matchItr, matchLen, matchCh);
    520                     if(stringCh == matchCh) {
    521                         goto endloop;
    522                     }
    523                 }
    524                 return strItr - U16_LENGTH(stringCh); /* none matches */
    525             }
    526         }
    527 endloop:
    528         /* wish C had continue with labels like Java... */;
    529     }
    530 
    531     /* Didn't find it. */
    532     return -strItr-1;
    533 }
    534 
    535 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
    536 U_CAPI UChar * U_EXPORT2
    537 u_strpbrk(const UChar *string, const UChar *matchSet)
    538 {
    539     int32_t idx = _matchFromSet(string, matchSet, TRUE);
    540     if(idx >= 0) {
    541         return (UChar *)string + idx;
    542     } else {
    543         return NULL;
    544     }
    545 }
    546 
    547 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
    548 U_CAPI int32_t U_EXPORT2
    549 u_strcspn(const UChar *string, const UChar *matchSet)
    550 {
    551     int32_t idx = _matchFromSet(string, matchSet, TRUE);
    552     if(idx >= 0) {
    553         return idx;
    554     } else {
    555         return -idx - 1; /* == u_strlen(string) */
    556     }
    557 }
    558 
    559 /* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
    560 U_CAPI int32_t U_EXPORT2
    561 u_strspn(const UChar *string, const UChar *matchSet)
    562 {
    563     int32_t idx = _matchFromSet(string, matchSet, FALSE);
    564     if(idx >= 0) {
    565         return idx;
    566     } else {
    567         return -idx - 1; /* == u_strlen(string) */
    568     }
    569 }
    570 
    571 /* ----- Text manipulation functions --- */
    572 
    573 U_CAPI UChar* U_EXPORT2
    574 u_strtok_r(UChar    *src,
    575      const UChar    *delim,
    576            UChar   **saveState)
    577 {
    578     UChar *tokSource;
    579     UChar *nextToken;
    580     uint32_t nonDelimIdx;
    581 
    582     /* If saveState is NULL, the user messed up. */
    583     if (src != NULL) {
    584         tokSource = src;
    585         *saveState = src; /* Set to "src" in case there are no delimiters */
    586     }
    587     else if (*saveState) {
    588         tokSource = *saveState;
    589     }
    590     else {
    591         /* src == NULL && *saveState == NULL */
    592         /* This shouldn't happen. We already finished tokenizing. */
    593         return NULL;
    594     }
    595 
    596     /* Skip initial delimiters */
    597     nonDelimIdx = u_strspn(tokSource, delim);
    598     tokSource = &tokSource[nonDelimIdx];
    599 
    600     if (*tokSource) {
    601         nextToken = u_strpbrk(tokSource, delim);
    602         if (nextToken != NULL) {
    603             /* Create a token */
    604             *(nextToken++) = 0;
    605             *saveState = nextToken;
    606             return tokSource;
    607         }
    608         else if (*saveState) {
    609             /* Return the last token */
    610             *saveState = NULL;
    611             return tokSource;
    612         }
    613     }
    614     else {
    615         /* No tokens were found. Only delimiters were left. */
    616         *saveState = NULL;
    617     }
    618     return NULL;
    619 }
    620 
    621 /* Miscellaneous functions -------------------------------------------------- */
    622 
    623 U_CAPI UChar* U_EXPORT2
    624 u_strcat(UChar     *dst,
    625     const UChar     *src)
    626 {
    627     UChar *anchor = dst;            /* save a pointer to start of dst */
    628 
    629     while(*dst != 0) {              /* To end of first string          */
    630         ++dst;
    631     }
    632     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
    633     }
    634 
    635     return anchor;
    636 }
    637 
    638 U_CAPI UChar*  U_EXPORT2
    639 u_strncat(UChar     *dst,
    640      const UChar     *src,
    641      int32_t     n )
    642 {
    643     if(n > 0) {
    644         UChar *anchor = dst;            /* save a pointer to start of dst */
    645 
    646         while(*dst != 0) {              /* To end of first string          */
    647             ++dst;
    648         }
    649         while((*dst = *src) != 0) {     /* copy string 2 over              */
    650             ++dst;
    651             if(--n == 0) {
    652                 *dst = 0;
    653                 break;
    654             }
    655             ++src;
    656         }
    657 
    658         return anchor;
    659     } else {
    660         return dst;
    661     }
    662 }
    663 
    664 /* ----- Text property functions --- */
    665 
    666 U_CAPI int32_t   U_EXPORT2
    667 u_strcmp(const UChar *s1,
    668     const UChar *s2)
    669 {
    670     UChar  c1, c2;
    671 
    672     for(;;) {
    673         c1=*s1++;
    674         c2=*s2++;
    675         if (c1 != c2 || c1 == 0) {
    676             break;
    677         }
    678     }
    679     return (int32_t)c1 - (int32_t)c2;
    680 }
    681 
    682 U_CFUNC int32_t U_EXPORT2
    683 uprv_strCompare(const UChar *s1, int32_t length1,
    684                 const UChar *s2, int32_t length2,
    685                 UBool strncmpStyle, UBool codePointOrder) {
    686     const UChar *start1, *start2, *limit1, *limit2;
    687     UChar c1, c2;
    688 
    689     /* setup for fix-up */
    690     start1=s1;
    691     start2=s2;
    692 
    693     /* compare identical prefixes - they do not need to be fixed up */
    694     if(length1<0 && length2<0) {
    695         /* strcmp style, both NUL-terminated */
    696         if(s1==s2) {
    697             return 0;
    698         }
    699 
    700         for(;;) {
    701             c1=*s1;
    702             c2=*s2;
    703             if(c1!=c2) {
    704                 break;
    705             }
    706             if(c1==0) {
    707                 return 0;
    708             }
    709             ++s1;
    710             ++s2;
    711         }
    712 
    713         /* setup for fix-up */
    714         limit1=limit2=NULL;
    715     } else if(strncmpStyle) {
    716         /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
    717         if(s1==s2) {
    718             return 0;
    719         }
    720 
    721         limit1=start1+length1;
    722 
    723         for(;;) {
    724             /* both lengths are same, check only one limit */
    725             if(s1==limit1) {
    726                 return 0;
    727             }
    728 
    729             c1=*s1;
    730             c2=*s2;
    731             if(c1!=c2) {
    732                 break;
    733             }
    734             if(c1==0) {
    735                 return 0;
    736             }
    737             ++s1;
    738             ++s2;
    739         }
    740 
    741         /* setup for fix-up */
    742         limit2=start2+length1; /* use length1 here, too, to enforce assumption */
    743     } else {
    744         /* memcmp/UnicodeString style, both length-specified */
    745         int32_t lengthResult;
    746 
    747         if(length1<0) {
    748             length1=u_strlen(s1);
    749         }
    750         if(length2<0) {
    751             length2=u_strlen(s2);
    752         }
    753 
    754         /* limit1=start1+min(lenght1, length2) */
    755         if(length1<length2) {
    756             lengthResult=-1;
    757             limit1=start1+length1;
    758         } else if(length1==length2) {
    759             lengthResult=0;
    760             limit1=start1+length1;
    761         } else /* length1>length2 */ {
    762             lengthResult=1;
    763             limit1=start1+length2;
    764         }
    765 
    766         if(s1==s2) {
    767             return lengthResult;
    768         }
    769 
    770         for(;;) {
    771             /* check pseudo-limit */
    772             if(s1==limit1) {
    773                 return lengthResult;
    774             }
    775 
    776             c1=*s1;
    777             c2=*s2;
    778             if(c1!=c2) {
    779                 break;
    780             }
    781             ++s1;
    782             ++s2;
    783         }
    784 
    785         /* setup for fix-up */
    786         limit1=start1+length1;
    787         limit2=start2+length2;
    788     }
    789 
    790     /* if both values are in or above the surrogate range, fix them up */
    791     if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
    792         /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
    793         if(
    794             (c1<=0xdbff && (s1+1)!=limit1 && UTF_IS_TRAIL(*(s1+1))) ||
    795             (UTF_IS_TRAIL(c1) && start1!=s1 && UTF_IS_LEAD(*(s1-1)))
    796         ) {
    797             /* part of a surrogate pair, leave >=d800 */
    798         } else {
    799             /* BMP code point - may be surrogate code point - make <d800 */
    800             c1-=0x2800;
    801         }
    802 
    803         if(
    804             (c2<=0xdbff && (s2+1)!=limit2 && UTF_IS_TRAIL(*(s2+1))) ||
    805             (UTF_IS_TRAIL(c2) && start2!=s2 && UTF_IS_LEAD(*(s2-1)))
    806         ) {
    807             /* part of a surrogate pair, leave >=d800 */
    808         } else {
    809             /* BMP code point - may be surrogate code point - make <d800 */
    810             c2-=0x2800;
    811         }
    812     }
    813 
    814     /* now c1 and c2 are in the requested (code unit or code point) order */
    815     return (int32_t)c1-(int32_t)c2;
    816 }
    817 
    818 /*
    819  * Compare two strings as presented by UCharIterators.
    820  * Use code unit or code point order.
    821  * When the function returns, it is undefined where the iterators
    822  * have stopped.
    823  */
    824 U_CAPI int32_t U_EXPORT2
    825 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
    826     UChar32 c1, c2;
    827 
    828     /* argument checking */
    829     if(iter1==NULL || iter2==NULL) {
    830         return 0; /* bad arguments */
    831     }
    832     if(iter1==iter2) {
    833         return 0; /* identical iterators */
    834     }
    835 
    836     /* reset iterators to start? */
    837     iter1->move(iter1, 0, UITER_START);
    838     iter2->move(iter2, 0, UITER_START);
    839 
    840     /* compare identical prefixes - they do not need to be fixed up */
    841     for(;;) {
    842         c1=iter1->next(iter1);
    843         c2=iter2->next(iter2);
    844         if(c1!=c2) {
    845             break;
    846         }
    847         if(c1==-1) {
    848             return 0;
    849         }
    850     }
    851 
    852     /* if both values are in or above the surrogate range, fix them up */
    853     if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
    854         /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
    855         if(
    856             (c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
    857             (UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
    858         ) {
    859             /* part of a surrogate pair, leave >=d800 */
    860         } else {
    861             /* BMP code point - may be surrogate code point - make <d800 */
    862             c1-=0x2800;
    863         }
    864 
    865         if(
    866             (c2<=0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
    867             (UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
    868         ) {
    869             /* part of a surrogate pair, leave >=d800 */
    870         } else {
    871             /* BMP code point - may be surrogate code point - make <d800 */
    872             c2-=0x2800;
    873         }
    874     }
    875 
    876     /* now c1 and c2 are in the requested (code unit or code point) order */
    877     return (int32_t)c1-(int32_t)c2;
    878 }
    879 
    880 #if 0
    881 /*
    882  * u_strCompareIter() does not leave the iterators _on_ the different units.
    883  * This is possible but would cost a few extra indirect function calls to back
    884  * up if the last unit (c1 or c2 respectively) was >=0.
    885  *
    886  * Consistently leaving them _behind_ the different units is not an option
    887  * because the current "unit" is the end of the string if that is reached,
    888  * and in such a case the iterator does not move.
    889  * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
    890  * of their strings. Calling previous() on each does not move them to where
    891  * the comparison fails.
    892  *
    893  * So the simplest semantics is to not define where the iterators end up.
    894  *
    895  * The following fragment is part of what would need to be done for backing up.
    896  */
    897 void fragment {
    898         /* iff a surrogate is part of a surrogate pair, leave >=d800 */
    899         if(c1<=0xdbff) {
    900             if(!UTF_IS_TRAIL(iter1->current(iter1))) {
    901                 /* lead surrogate code point - make <d800 */
    902                 c1-=0x2800;
    903             }
    904         } else if(c1<=0xdfff) {
    905             int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
    906             iter1->previous(iter1); /* ==c1 */
    907             if(!UTF_IS_LEAD(iter1->previous(iter1))) {
    908                 /* trail surrogate code point - make <d800 */
    909                 c1-=0x2800;
    910             }
    911             /* go back to behind where the difference is */
    912             iter1->move(iter1, idx, UITER_ZERO);
    913         } else /* 0xe000<=c1<=0xffff */ {
    914             /* BMP code point - make <d800 */
    915             c1-=0x2800;
    916         }
    917 }
    918 #endif
    919 
    920 U_CAPI int32_t U_EXPORT2
    921 u_strCompare(const UChar *s1, int32_t length1,
    922              const UChar *s2, int32_t length2,
    923              UBool codePointOrder) {
    924     /* argument checking */
    925     if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
    926         return 0;
    927     }
    928     return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
    929 }
    930 
    931 /* String compare in code point order - u_strcmp() compares in code unit order. */
    932 U_CAPI int32_t U_EXPORT2
    933 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
    934     return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
    935 }
    936 
    937 U_CAPI int32_t   U_EXPORT2
    938 u_strncmp(const UChar     *s1,
    939      const UChar     *s2,
    940      int32_t     n)
    941 {
    942     if(n > 0) {
    943         int32_t rc;
    944         for(;;) {
    945             rc = (int32_t)*s1 - (int32_t)*s2;
    946             if(rc != 0 || *s1 == 0 || --n == 0) {
    947                 return rc;
    948             }
    949             ++s1;
    950             ++s2;
    951         }
    952     } else {
    953         return 0;
    954     }
    955 }
    956 
    957 U_CAPI int32_t U_EXPORT2
    958 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
    959     return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
    960 }
    961 
    962 U_CAPI UChar* U_EXPORT2
    963 u_strcpy(UChar     *dst,
    964     const UChar     *src)
    965 {
    966     UChar *anchor = dst;            /* save a pointer to start of dst */
    967 
    968     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
    969     }
    970 
    971     return anchor;
    972 }
    973 
    974 U_CAPI UChar*  U_EXPORT2
    975 u_strncpy(UChar     *dst,
    976      const UChar     *src,
    977      int32_t     n)
    978 {
    979     UChar *anchor = dst;            /* save a pointer to start of dst */
    980 
    981     /* copy string 2 over */
    982     while(n > 0 && (*(dst++) = *(src++)) != 0) {
    983         --n;
    984     }
    985 
    986     return anchor;
    987 }
    988 
    989 U_CAPI int32_t   U_EXPORT2
    990 u_strlen(const UChar *s)
    991 {
    992 #if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
    993     return (int32_t)uprv_wcslen(s);
    994 #else
    995     const UChar *t = s;
    996     while(*t != 0) {
    997       ++t;
    998     }
    999     return t - s;
   1000 #endif
   1001 }
   1002 
   1003 U_CAPI int32_t U_EXPORT2
   1004 u_countChar32(const UChar *s, int32_t length) {
   1005     int32_t count;
   1006 
   1007     if(s==NULL || length<-1) {
   1008         return 0;
   1009     }
   1010 
   1011     count=0;
   1012     if(length>=0) {
   1013         while(length>0) {
   1014             ++count;
   1015             if(UTF_IS_LEAD(*s) && length>=2 && UTF_IS_TRAIL(*(s+1))) {
   1016                 s+=2;
   1017                 length-=2;
   1018             } else {
   1019                 ++s;
   1020                 --length;
   1021             }
   1022         }
   1023     } else /* length==-1 */ {
   1024         UChar c;
   1025 
   1026         for(;;) {
   1027             if((c=*s++)==0) {
   1028                 break;
   1029             }
   1030             ++count;
   1031 
   1032             /*
   1033              * sufficient to look ahead one because of UTF-16;
   1034              * safe to look ahead one because at worst that would be the terminating NUL
   1035              */
   1036             if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(*s)) {
   1037                 ++s;
   1038             }
   1039         }
   1040     }
   1041     return count;
   1042 }
   1043 
   1044 U_CAPI UBool U_EXPORT2
   1045 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
   1046 
   1047     if(number<0) {
   1048         return TRUE;
   1049     }
   1050     if(s==NULL || length<-1) {
   1051         return FALSE;
   1052     }
   1053 
   1054     if(length==-1) {
   1055         /* s is NUL-terminated */
   1056         UChar c;
   1057 
   1058         /* count code points until they exceed */
   1059         for(;;) {
   1060             if((c=*s++)==0) {
   1061                 return FALSE;
   1062             }
   1063             if(number==0) {
   1064                 return TRUE;
   1065             }
   1066             if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
   1067                 ++s;
   1068             }
   1069             --number;
   1070         }
   1071     } else {
   1072         /* length>=0 known */
   1073         const UChar *limit;
   1074         int32_t maxSupplementary;
   1075 
   1076         /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
   1077         if(((length+1)/2)>number) {
   1078             return TRUE;
   1079         }
   1080 
   1081         /* check if s does not even contain enough UChars */
   1082         maxSupplementary=length-number;
   1083         if(maxSupplementary<=0) {
   1084             return FALSE;
   1085         }
   1086         /* there are maxSupplementary=length-number more UChars than asked-for code points */
   1087 
   1088         /*
   1089          * count code points until they exceed and also check that there are
   1090          * no more than maxSupplementary supplementary code points (UChar pairs)
   1091          */
   1092         limit=s+length;
   1093         for(;;) {
   1094             if(s==limit) {
   1095                 return FALSE;
   1096             }
   1097             if(number==0) {
   1098                 return TRUE;
   1099             }
   1100             if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
   1101                 ++s;
   1102                 if(--maxSupplementary<=0) {
   1103                     /* too many pairs - too few code points */
   1104                     return FALSE;
   1105                 }
   1106             }
   1107             --number;
   1108         }
   1109     }
   1110 }
   1111 
   1112 U_CAPI UChar * U_EXPORT2
   1113 u_memcpy(UChar *dest, const UChar *src, int32_t count) {
   1114     return (UChar *)uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR);
   1115 }
   1116 
   1117 U_CAPI UChar * U_EXPORT2
   1118 u_memmove(UChar *dest, const UChar *src, int32_t count) {
   1119     return (UChar *)uprv_memmove(dest, src, count*U_SIZEOF_UCHAR);
   1120 }
   1121 
   1122 U_CAPI UChar * U_EXPORT2
   1123 u_memset(UChar *dest, UChar c, int32_t count) {
   1124     if(count > 0) {
   1125         UChar *ptr = dest;
   1126         UChar *limit = dest + count;
   1127 
   1128         while (ptr < limit) {
   1129             *(ptr++) = c;
   1130         }
   1131     }
   1132     return dest;
   1133 }
   1134 
   1135 U_CAPI int32_t U_EXPORT2
   1136 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
   1137     if(count > 0) {
   1138         const UChar *limit = buf1 + count;
   1139         int32_t result;
   1140 
   1141         while (buf1 < limit) {
   1142             result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
   1143             if (result != 0) {
   1144                 return result;
   1145             }
   1146             buf1++;
   1147             buf2++;
   1148         }
   1149     }
   1150     return 0;
   1151 }
   1152 
   1153 U_CAPI int32_t U_EXPORT2
   1154 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
   1155     return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
   1156 }
   1157 
   1158 /* u_unescape & support fns ------------------------------------------------- */
   1159 
   1160 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
   1161 static const UChar UNESCAPE_MAP[] = {
   1162     /*"   0x22, 0x22 */
   1163     /*'   0x27, 0x27 */
   1164     /*?   0x3F, 0x3F */
   1165     /*\   0x5C, 0x5C */
   1166     /*a*/ 0x61, 0x07,
   1167     /*b*/ 0x62, 0x08,
   1168     /*e*/ 0x65, 0x1b,
   1169     /*f*/ 0x66, 0x0c,
   1170     /*n*/ 0x6E, 0x0a,
   1171     /*r*/ 0x72, 0x0d,
   1172     /*t*/ 0x74, 0x09,
   1173     /*v*/ 0x76, 0x0b
   1174 };
   1175 enum { UNESCAPE_MAP_LENGTH = sizeof(UNESCAPE_MAP) / sizeof(UNESCAPE_MAP[0]) };
   1176 
   1177 /* Convert one octal digit to a numeric value 0..7, or -1 on failure */
   1178 static int8_t _digit8(UChar c) {
   1179     if (c >= 0x0030 && c <= 0x0037) {
   1180         return (int8_t)(c - 0x0030);
   1181     }
   1182     return -1;
   1183 }
   1184 
   1185 /* Convert one hex digit to a numeric value 0..F, or -1 on failure */
   1186 static int8_t _digit16(UChar c) {
   1187     if (c >= 0x0030 && c <= 0x0039) {
   1188         return (int8_t)(c - 0x0030);
   1189     }
   1190     if (c >= 0x0041 && c <= 0x0046) {
   1191         return (int8_t)(c - (0x0041 - 10));
   1192     }
   1193     if (c >= 0x0061 && c <= 0x0066) {
   1194         return (int8_t)(c - (0x0061 - 10));
   1195     }
   1196     return -1;
   1197 }
   1198 
   1199 /* Parse a single escape sequence.  Although this method deals in
   1200  * UChars, it does not use C++ or UnicodeString.  This allows it to
   1201  * be used from C contexts. */
   1202 U_CAPI UChar32 U_EXPORT2
   1203 u_unescapeAt(UNESCAPE_CHAR_AT charAt,
   1204              int32_t *offset,
   1205              int32_t length,
   1206              void *context) {
   1207 
   1208     int32_t start = *offset;
   1209     UChar c;
   1210     UChar32 result = 0;
   1211     int8_t n = 0;
   1212     int8_t minDig = 0;
   1213     int8_t maxDig = 0;
   1214     int8_t bitsPerDigit = 4;
   1215     int8_t dig;
   1216     int32_t i;
   1217     UBool braces = FALSE;
   1218 
   1219     /* Check that offset is in range */
   1220     if (*offset < 0 || *offset >= length) {
   1221         goto err;
   1222     }
   1223 
   1224     /* Fetch first UChar after '\\' */
   1225     c = charAt((*offset)++, context);
   1226 
   1227     /* Convert hexadecimal and octal escapes */
   1228     switch (c) {
   1229     case 0x0075 /*'u'*/:
   1230         minDig = maxDig = 4;
   1231         break;
   1232     case 0x0055 /*'U'*/:
   1233         minDig = maxDig = 8;
   1234         break;
   1235     case 0x0078 /*'x'*/:
   1236         minDig = 1;
   1237         if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
   1238             ++(*offset);
   1239             braces = TRUE;
   1240             maxDig = 8;
   1241         } else {
   1242             maxDig = 2;
   1243         }
   1244         break;
   1245     default:
   1246         dig = _digit8(c);
   1247         if (dig >= 0) {
   1248             minDig = 1;
   1249             maxDig = 3;
   1250             n = 1; /* Already have first octal digit */
   1251             bitsPerDigit = 3;
   1252             result = dig;
   1253         }
   1254         break;
   1255     }
   1256     if (minDig != 0) {
   1257         while (*offset < length && n < maxDig) {
   1258             c = charAt(*offset, context);
   1259             dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
   1260             if (dig < 0) {
   1261                 break;
   1262             }
   1263             result = (result << bitsPerDigit) | dig;
   1264             ++(*offset);
   1265             ++n;
   1266         }
   1267         if (n < minDig) {
   1268             goto err;
   1269         }
   1270         if (braces) {
   1271             if (c != 0x7D /*}*/) {
   1272                 goto err;
   1273             }
   1274             ++(*offset);
   1275         }
   1276         if (result < 0 || result >= 0x110000) {
   1277             goto err;
   1278         }
   1279         /* If an escape sequence specifies a lead surrogate, see if
   1280          * there is a trail surrogate after it, either as an escape or
   1281          * as a literal.  If so, join them up into a supplementary.
   1282          */
   1283         if (*offset < length && U16_IS_LEAD(result)) {
   1284             int32_t ahead = *offset + 1;
   1285             c = charAt(*offset, context);
   1286             if (c == 0x5C /*'\\'*/ && ahead < length) {
   1287                 c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
   1288             }
   1289             if (U16_IS_TRAIL(c)) {
   1290                 *offset = ahead;
   1291                 result = U16_GET_SUPPLEMENTARY(result, c);
   1292             }
   1293         }
   1294         return result;
   1295     }
   1296 
   1297     /* Convert C-style escapes in table */
   1298     for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
   1299         if (c == UNESCAPE_MAP[i]) {
   1300             return UNESCAPE_MAP[i+1];
   1301         } else if (c < UNESCAPE_MAP[i]) {
   1302             break;
   1303         }
   1304     }
   1305 
   1306     /* Map \cX to control-X: X & 0x1F */
   1307     if (c == 0x0063 /*'c'*/ && *offset < length) {
   1308         c = charAt((*offset)++, context);
   1309         if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
   1310             UChar c2 = charAt(*offset, context);
   1311             if (UTF_IS_SECOND_SURROGATE(c2)) {
   1312                 ++(*offset);
   1313                 c = (UChar) UTF16_GET_PAIR_VALUE(c, c2); /* [sic] */
   1314             }
   1315         }
   1316         return 0x1F & c;
   1317     }
   1318 
   1319     /* If no special forms are recognized, then consider
   1320      * the backslash to generically escape the next character.
   1321      * Deal with surrogate pairs. */
   1322     if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
   1323         UChar c2 = charAt(*offset, context);
   1324         if (UTF_IS_SECOND_SURROGATE(c2)) {
   1325             ++(*offset);
   1326             return UTF16_GET_PAIR_VALUE(c, c2);
   1327         }
   1328     }
   1329     return c;
   1330 
   1331  err:
   1332     /* Invalid escape sequence */
   1333     *offset = start; /* Reset to initial value */
   1334     return (UChar32)0xFFFFFFFF;
   1335 }
   1336 
   1337 /* u_unescapeAt() callback to return a UChar from a char* */
   1338 static UChar U_CALLCONV
   1339 _charPtr_charAt(int32_t offset, void *context) {
   1340     UChar c16;
   1341     /* It would be more efficient to access the invariant tables
   1342      * directly but there is no API for that. */
   1343     u_charsToUChars(((char*) context) + offset, &c16, 1);
   1344     return c16;
   1345 }
   1346 
   1347 /* Append an escape-free segment of the text; used by u_unescape() */
   1348 static void _appendUChars(UChar *dest, int32_t destCapacity,
   1349                           const char *src, int32_t srcLen) {
   1350     if (destCapacity < 0) {
   1351         destCapacity = 0;
   1352     }
   1353     if (srcLen > destCapacity) {
   1354         srcLen = destCapacity;
   1355     }
   1356     u_charsToUChars(src, dest, srcLen);
   1357 }
   1358 
   1359 /* Do an invariant conversion of char* -> UChar*, with escape parsing */
   1360 U_CAPI int32_t U_EXPORT2
   1361 u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
   1362     const char *segment = src;
   1363     int32_t i = 0;
   1364     char c;
   1365 
   1366     while ((c=*src) != 0) {
   1367         /* '\\' intentionally written as compiler-specific
   1368          * character constant to correspond to compiler-specific
   1369          * char* constants. */
   1370         if (c == '\\') {
   1371             int32_t lenParsed = 0;
   1372             UChar32 c32;
   1373             if (src != segment) {
   1374                 if (dest != NULL) {
   1375                     _appendUChars(dest + i, destCapacity - i,
   1376                                   segment, (int32_t)(src - segment));
   1377                 }
   1378                 i += (int32_t)(src - segment);
   1379             }
   1380             ++src; /* advance past '\\' */
   1381             c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
   1382             if (lenParsed == 0) {
   1383                 goto err;
   1384             }
   1385             src += lenParsed; /* advance past escape seq. */
   1386             if (dest != NULL && UTF_CHAR_LENGTH(c32) <= (destCapacity - i)) {
   1387                 UTF_APPEND_CHAR_UNSAFE(dest, i, c32);
   1388             } else {
   1389                 i += UTF_CHAR_LENGTH(c32);
   1390             }
   1391             segment = src;
   1392         } else {
   1393             ++src;
   1394         }
   1395     }
   1396     if (src != segment) {
   1397         if (dest != NULL) {
   1398             _appendUChars(dest + i, destCapacity - i,
   1399                           segment, (int32_t)(src - segment));
   1400         }
   1401         i += (int32_t)(src - segment);
   1402     }
   1403     if (dest != NULL && i < destCapacity) {
   1404         dest[i] = 0;
   1405     }
   1406     return i;
   1407 
   1408  err:
   1409     if (dest != NULL && destCapacity > 0) {
   1410         *dest = 0;
   1411     }
   1412     return 0;
   1413 }
   1414 
   1415 /* NUL-termination of strings ----------------------------------------------- */
   1416 
   1417 /**
   1418  * NUL-terminate a string no matter what its type.
   1419  * Set warning and error codes accordingly.
   1420  */
   1421 #define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode)      \
   1422     if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
   1423         /* not a public function, so no complete argument checking */   \
   1424                                                                         \
   1425         if(length<0) {                                                  \
   1426             /* assume that the caller handles this */                   \
   1427         } else if(length<destCapacity) {                                \
   1428             /* NUL-terminate the string, the NUL fits */                \
   1429             dest[length]=0;                                             \
   1430             /* unset the not-terminated warning but leave all others */ \
   1431             if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
   1432                 *pErrorCode=U_ZERO_ERROR;                               \
   1433             }                                                           \
   1434         } else if(length==destCapacity) {                               \
   1435             /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
   1436             *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;                \
   1437         } else /* length>destCapacity */ {                              \
   1438             /* even the string itself did not fit - set an error code */ \
   1439             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;                        \
   1440         }                                                               \
   1441     }
   1442 
   1443 U_CAPI int32_t U_EXPORT2
   1444 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1445     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1446     return length;
   1447 }
   1448 
   1449 U_CAPI int32_t U_EXPORT2
   1450 u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1451     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1452     return length;
   1453 }
   1454 
   1455 U_CAPI int32_t U_EXPORT2
   1456 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1457     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1458     return length;
   1459 }
   1460 
   1461 U_CAPI int32_t U_EXPORT2
   1462 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1463     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1464     return length;
   1465 }
   1466