Home | History | Annotate | Download | only in common
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1998-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *
     11 * File ustring.cpp
     12 *
     13 * Modification History:
     14 *
     15 *   Date        Name        Description
     16 *   12/07/98    bertrand    Creation.
     17 ******************************************************************************
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/putil.h"
     22 #include "unicode/ustring.h"
     23 #include "unicode/utf16.h"
     24 #include "cstring.h"
     25 #include "cwchar.h"
     26 #include "cmemory.h"
     27 #include "ustr_imp.h"
     28 
     29 /* ANSI string.h - style functions ------------------------------------------ */
     30 
     31 /* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
     32 #define U_BMP_MAX 0xffff
     33 
     34 /* Forward binary string search functions ----------------------------------- */
     35 
     36 /*
     37  * Test if a substring match inside a string is at code point boundaries.
     38  * All pointers refer to the same buffer.
     39  * The limit pointer may be NULL, all others must be real pointers.
     40  */
     41 static inline UBool
     42 isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
     43     if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
     44         /* the leading edge of the match is in the middle of a surrogate pair */
     45         return FALSE;
     46     }
     47     if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
     48         /* the trailing edge of the match is in the middle of a surrogate pair */
     49         return FALSE;
     50     }
     51     return TRUE;
     52 }
     53 
     54 U_CAPI UChar * U_EXPORT2
     55 u_strFindFirst(const UChar *s, int32_t length,
     56                const UChar *sub, int32_t subLength) {
     57     const UChar *start, *p, *q, *subLimit;
     58     UChar c, cs, cq;
     59 
     60     if(sub==NULL || subLength<-1) {
     61         return (UChar *)s;
     62     }
     63     if(s==NULL || length<-1) {
     64         return NULL;
     65     }
     66 
     67     start=s;
     68 
     69     if(length<0 && subLength<0) {
     70         /* both strings are NUL-terminated */
     71         if((cs=*sub++)==0) {
     72             return (UChar *)s;
     73         }
     74         if(*sub==0 && !U16_IS_SURROGATE(cs)) {
     75             /* the substring consists of a single, non-surrogate BMP code point */
     76             return u_strchr(s, cs);
     77         }
     78 
     79         while((c=*s++)!=0) {
     80             if(c==cs) {
     81                 /* found first substring UChar, compare rest */
     82                 p=s;
     83                 q=sub;
     84                 for(;;) {
     85                     if((cq=*q)==0) {
     86                         if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
     87                             return (UChar *)(s-1); /* well-formed match */
     88                         } else {
     89                             break; /* no match because surrogate pair is split */
     90                         }
     91                     }
     92                     if((c=*p)==0) {
     93                         return NULL; /* no match, and none possible after s */
     94                     }
     95                     if(c!=cq) {
     96                         break; /* no match */
     97                     }
     98                     ++p;
     99                     ++q;
    100                 }
    101             }
    102         }
    103 
    104         /* not found */
    105         return NULL;
    106     }
    107 
    108     if(subLength<0) {
    109         subLength=u_strlen(sub);
    110     }
    111     if(subLength==0) {
    112         return (UChar *)s;
    113     }
    114 
    115     /* get sub[0] to search for it fast */
    116     cs=*sub++;
    117     --subLength;
    118     subLimit=sub+subLength;
    119 
    120     if(subLength==0 && !U16_IS_SURROGATE(cs)) {
    121         /* the substring consists of a single, non-surrogate BMP code point */
    122         return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
    123     }
    124 
    125     if(length<0) {
    126         /* s is NUL-terminated */
    127         while((c=*s++)!=0) {
    128             if(c==cs) {
    129                 /* found first substring UChar, compare rest */
    130                 p=s;
    131                 q=sub;
    132                 for(;;) {
    133                     if(q==subLimit) {
    134                         if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
    135                             return (UChar *)(s-1); /* well-formed match */
    136                         } else {
    137                             break; /* no match because surrogate pair is split */
    138                         }
    139                     }
    140                     if((c=*p)==0) {
    141                         return NULL; /* no match, and none possible after s */
    142                     }
    143                     if(c!=*q) {
    144                         break; /* no match */
    145                     }
    146                     ++p;
    147                     ++q;
    148                 }
    149             }
    150         }
    151     } else {
    152         const UChar *limit, *preLimit;
    153 
    154         /* subLength was decremented above */
    155         if(length<=subLength) {
    156             return NULL; /* s is shorter than sub */
    157         }
    158 
    159         limit=s+length;
    160 
    161         /* the substring must start before preLimit */
    162         preLimit=limit-subLength;
    163 
    164         while(s!=preLimit) {
    165             c=*s++;
    166             if(c==cs) {
    167                 /* found first substring UChar, compare rest */
    168                 p=s;
    169                 q=sub;
    170                 for(;;) {
    171                     if(q==subLimit) {
    172                         if(isMatchAtCPBoundary(start, s-1, p, limit)) {
    173                             return (UChar *)(s-1); /* well-formed match */
    174                         } else {
    175                             break; /* no match because surrogate pair is split */
    176                         }
    177                     }
    178                     if(*p!=*q) {
    179                         break; /* no match */
    180                     }
    181                     ++p;
    182                     ++q;
    183                 }
    184             }
    185         }
    186     }
    187 
    188     /* not found */
    189     return NULL;
    190 }
    191 
    192 U_CAPI UChar * U_EXPORT2
    193 u_strstr(const UChar *s, const UChar *substring) {
    194     return u_strFindFirst(s, -1, substring, -1);
    195 }
    196 
    197 U_CAPI UChar * U_EXPORT2
    198 u_strchr(const UChar *s, UChar c) {
    199     if(U16_IS_SURROGATE(c)) {
    200         /* make sure to not find half of a surrogate pair */
    201         return u_strFindFirst(s, -1, &c, 1);
    202     } else {
    203         UChar cs;
    204 
    205         /* trivial search for a BMP code point */
    206         for(;;) {
    207             if((cs=*s)==c) {
    208                 return (UChar *)s;
    209             }
    210             if(cs==0) {
    211                 return NULL;
    212             }
    213             ++s;
    214         }
    215     }
    216 }
    217 
    218 U_CAPI UChar * U_EXPORT2
    219 u_strchr32(const UChar *s, UChar32 c) {
    220     if((uint32_t)c<=U_BMP_MAX) {
    221         /* find BMP code point */
    222         return u_strchr(s, (UChar)c);
    223     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    224         /* find supplementary code point as surrogate pair */
    225         UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
    226 
    227         while((cs=*s++)!=0) {
    228             if(cs==lead && *s==trail) {
    229                 return (UChar *)(s-1);
    230             }
    231         }
    232         return NULL;
    233     } else {
    234         /* not a Unicode code point, not findable */
    235         return NULL;
    236     }
    237 }
    238 
    239 U_CAPI UChar * U_EXPORT2
    240 u_memchr(const UChar *s, UChar c, int32_t count) {
    241     if(count<=0) {
    242         return NULL; /* no string */
    243     } else if(U16_IS_SURROGATE(c)) {
    244         /* make sure to not find half of a surrogate pair */
    245         return u_strFindFirst(s, count, &c, 1);
    246     } else {
    247         /* trivial search for a BMP code point */
    248         const UChar *limit=s+count;
    249         do {
    250             if(*s==c) {
    251                 return (UChar *)s;
    252             }
    253         } while(++s!=limit);
    254         return NULL;
    255     }
    256 }
    257 
    258 U_CAPI UChar * U_EXPORT2
    259 u_memchr32(const UChar *s, UChar32 c, int32_t count) {
    260     if((uint32_t)c<=U_BMP_MAX) {
    261         /* find BMP code point */
    262         return u_memchr(s, (UChar)c, count);
    263     } else if(count<2) {
    264         /* too short for a surrogate pair */
    265         return NULL;
    266     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    267         /* find supplementary code point as surrogate pair */
    268         const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
    269         UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
    270 
    271         do {
    272             if(*s==lead && *(s+1)==trail) {
    273                 return (UChar *)s;
    274             }
    275         } while(++s!=limit);
    276         return NULL;
    277     } else {
    278         /* not a Unicode code point, not findable */
    279         return NULL;
    280     }
    281 }
    282 
    283 /* Backward binary string search functions ---------------------------------- */
    284 
    285 U_CAPI UChar * U_EXPORT2
    286 u_strFindLast(const UChar *s, int32_t length,
    287               const UChar *sub, int32_t subLength) {
    288     const UChar *start, *limit, *p, *q, *subLimit;
    289     UChar c, cs;
    290 
    291     if(sub==NULL || subLength<-1) {
    292         return (UChar *)s;
    293     }
    294     if(s==NULL || length<-1) {
    295         return NULL;
    296     }
    297 
    298     /*
    299      * This implementation is more lazy than the one for u_strFindFirst():
    300      * There is no special search code for NUL-terminated strings.
    301      * It does not seem to be worth it for searching substrings to
    302      * search forward and find all matches like in u_strrchr() and similar.
    303      * Therefore, we simply get both string lengths and search backward.
    304      *
    305      * markus 2002oct23
    306      */
    307 
    308     if(subLength<0) {
    309         subLength=u_strlen(sub);
    310     }
    311     if(subLength==0) {
    312         return (UChar *)s;
    313     }
    314 
    315     /* get sub[subLength-1] to search for it fast */
    316     subLimit=sub+subLength;
    317     cs=*(--subLimit);
    318     --subLength;
    319 
    320     if(subLength==0 && !U16_IS_SURROGATE(cs)) {
    321         /* the substring consists of a single, non-surrogate BMP code point */
    322         return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
    323     }
    324 
    325     if(length<0) {
    326         length=u_strlen(s);
    327     }
    328 
    329     /* subLength was decremented above */
    330     if(length<=subLength) {
    331         return NULL; /* s is shorter than sub */
    332     }
    333 
    334     start=s;
    335     limit=s+length;
    336 
    337     /* the substring must start no later than s+subLength */
    338     s+=subLength;
    339 
    340     while(s!=limit) {
    341         c=*(--limit);
    342         if(c==cs) {
    343             /* found last substring UChar, compare rest */
    344             p=limit;
    345             q=subLimit;
    346             for(;;) {
    347                 if(q==sub) {
    348                     if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
    349                         return (UChar *)p; /* well-formed match */
    350                     } else {
    351                         break; /* no match because surrogate pair is split */
    352                     }
    353                 }
    354                 if(*(--p)!=*(--q)) {
    355                     break; /* no match */
    356                 }
    357             }
    358         }
    359     }
    360 
    361     /* not found */
    362     return NULL;
    363 }
    364 
    365 U_CAPI UChar * U_EXPORT2
    366 u_strrstr(const UChar *s, const UChar *substring) {
    367     return u_strFindLast(s, -1, substring, -1);
    368 }
    369 
    370 U_CAPI UChar * U_EXPORT2
    371 u_strrchr(const UChar *s, UChar c) {
    372     if(U16_IS_SURROGATE(c)) {
    373         /* make sure to not find half of a surrogate pair */
    374         return u_strFindLast(s, -1, &c, 1);
    375     } else {
    376         const UChar *result=NULL;
    377         UChar cs;
    378 
    379         /* trivial search for a BMP code point */
    380         for(;;) {
    381             if((cs=*s)==c) {
    382                 result=s;
    383             }
    384             if(cs==0) {
    385                 return (UChar *)result;
    386             }
    387             ++s;
    388         }
    389     }
    390 }
    391 
    392 U_CAPI UChar * U_EXPORT2
    393 u_strrchr32(const UChar *s, UChar32 c) {
    394     if((uint32_t)c<=U_BMP_MAX) {
    395         /* find BMP code point */
    396         return u_strrchr(s, (UChar)c);
    397     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    398         /* find supplementary code point as surrogate pair */
    399         const UChar *result=NULL;
    400         UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
    401 
    402         while((cs=*s++)!=0) {
    403             if(cs==lead && *s==trail) {
    404                 result=s-1;
    405             }
    406         }
    407         return (UChar *)result;
    408     } else {
    409         /* not a Unicode code point, not findable */
    410         return NULL;
    411     }
    412 }
    413 
    414 U_CAPI UChar * U_EXPORT2
    415 u_memrchr(const UChar *s, UChar c, int32_t count) {
    416     if(count<=0) {
    417         return NULL; /* no string */
    418     } else if(U16_IS_SURROGATE(c)) {
    419         /* make sure to not find half of a surrogate pair */
    420         return u_strFindLast(s, count, &c, 1);
    421     } else {
    422         /* trivial search for a BMP code point */
    423         const UChar *limit=s+count;
    424         do {
    425             if(*(--limit)==c) {
    426                 return (UChar *)limit;
    427             }
    428         } while(s!=limit);
    429         return NULL;
    430     }
    431 }
    432 
    433 U_CAPI UChar * U_EXPORT2
    434 u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
    435     if((uint32_t)c<=U_BMP_MAX) {
    436         /* find BMP code point */
    437         return u_memrchr(s, (UChar)c, count);
    438     } else if(count<2) {
    439         /* too short for a surrogate pair */
    440         return NULL;
    441     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    442         /* find supplementary code point as surrogate pair */
    443         const UChar *limit=s+count-1;
    444         UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
    445 
    446         do {
    447             if(*limit==trail && *(limit-1)==lead) {
    448                 return (UChar *)(limit-1);
    449             }
    450         } while(s!=--limit);
    451         return NULL;
    452     } else {
    453         /* not a Unicode code point, not findable */
    454         return NULL;
    455     }
    456 }
    457 
    458 /* Tokenization functions --------------------------------------------------- */
    459 
    460 /*
    461  * Match each code point in a string against each code point in the matchSet.
    462  * Return the index of the first string code point that
    463  * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
    464  * Return -(string length)-1 if there is no such code point.
    465  */
    466 static int32_t
    467 _matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
    468     int32_t matchLen, matchBMPLen, strItr, matchItr;
    469     UChar32 stringCh, matchCh;
    470     UChar c, c2;
    471 
    472     /* first part of matchSet contains only BMP code points */
    473     matchBMPLen = 0;
    474     while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
    475         ++matchBMPLen;
    476     }
    477 
    478     /* second part of matchSet contains BMP and supplementary code points */
    479     matchLen = matchBMPLen;
    480     while(matchSet[matchLen] != 0) {
    481         ++matchLen;
    482     }
    483 
    484     for(strItr = 0; (c = string[strItr]) != 0;) {
    485         ++strItr;
    486         if(U16_IS_SINGLE(c)) {
    487             if(polarity) {
    488                 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
    489                     if(c == matchSet[matchItr]) {
    490                         return strItr - 1; /* one matches */
    491                     }
    492                 }
    493             } else {
    494                 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
    495                     if(c == matchSet[matchItr]) {
    496                         goto endloop;
    497                     }
    498                 }
    499                 return strItr - 1; /* none matches */
    500             }
    501         } else {
    502             /*
    503              * No need to check for string length before U16_IS_TRAIL
    504              * because c2 could at worst be the terminating NUL.
    505              */
    506             if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
    507                 ++strItr;
    508                 stringCh = U16_GET_SUPPLEMENTARY(c, c2);
    509             } else {
    510                 stringCh = c; /* unpaired trail surrogate */
    511             }
    512 
    513             if(polarity) {
    514                 for(matchItr = matchBMPLen; matchItr < matchLen;) {
    515                     U16_NEXT(matchSet, matchItr, matchLen, matchCh);
    516                     if(stringCh == matchCh) {
    517                         return strItr - U16_LENGTH(stringCh); /* one matches */
    518                     }
    519                 }
    520             } else {
    521                 for(matchItr = matchBMPLen; matchItr < matchLen;) {
    522                     U16_NEXT(matchSet, matchItr, matchLen, matchCh);
    523                     if(stringCh == matchCh) {
    524                         goto endloop;
    525                     }
    526                 }
    527                 return strItr - U16_LENGTH(stringCh); /* none matches */
    528             }
    529         }
    530 endloop:
    531         /* wish C had continue with labels like Java... */;
    532     }
    533 
    534     /* Didn't find it. */
    535     return -strItr-1;
    536 }
    537 
    538 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
    539 U_CAPI UChar * U_EXPORT2
    540 u_strpbrk(const UChar *string, const UChar *matchSet)
    541 {
    542     int32_t idx = _matchFromSet(string, matchSet, TRUE);
    543     if(idx >= 0) {
    544         return (UChar *)string + idx;
    545     } else {
    546         return NULL;
    547     }
    548 }
    549 
    550 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
    551 U_CAPI int32_t U_EXPORT2
    552 u_strcspn(const UChar *string, const UChar *matchSet)
    553 {
    554     int32_t idx = _matchFromSet(string, matchSet, TRUE);
    555     if(idx >= 0) {
    556         return idx;
    557     } else {
    558         return -idx - 1; /* == u_strlen(string) */
    559     }
    560 }
    561 
    562 /* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
    563 U_CAPI int32_t U_EXPORT2
    564 u_strspn(const UChar *string, const UChar *matchSet)
    565 {
    566     int32_t idx = _matchFromSet(string, matchSet, FALSE);
    567     if(idx >= 0) {
    568         return idx;
    569     } else {
    570         return -idx - 1; /* == u_strlen(string) */
    571     }
    572 }
    573 
    574 /* ----- Text manipulation functions --- */
    575 
    576 U_CAPI UChar* U_EXPORT2
    577 u_strtok_r(UChar    *src,
    578      const UChar    *delim,
    579            UChar   **saveState)
    580 {
    581     UChar *tokSource;
    582     UChar *nextToken;
    583     uint32_t nonDelimIdx;
    584 
    585     /* If saveState is NULL, the user messed up. */
    586     if (src != NULL) {
    587         tokSource = src;
    588         *saveState = src; /* Set to "src" in case there are no delimiters */
    589     }
    590     else if (*saveState) {
    591         tokSource = *saveState;
    592     }
    593     else {
    594         /* src == NULL && *saveState == NULL */
    595         /* This shouldn't happen. We already finished tokenizing. */
    596         return NULL;
    597     }
    598 
    599     /* Skip initial delimiters */
    600     nonDelimIdx = u_strspn(tokSource, delim);
    601     tokSource = &tokSource[nonDelimIdx];
    602 
    603     if (*tokSource) {
    604         nextToken = u_strpbrk(tokSource, delim);
    605         if (nextToken != NULL) {
    606             /* Create a token */
    607             *(nextToken++) = 0;
    608             *saveState = nextToken;
    609             return tokSource;
    610         }
    611         else if (*saveState) {
    612             /* Return the last token */
    613             *saveState = NULL;
    614             return tokSource;
    615         }
    616     }
    617     else {
    618         /* No tokens were found. Only delimiters were left. */
    619         *saveState = NULL;
    620     }
    621     return NULL;
    622 }
    623 
    624 /* Miscellaneous functions -------------------------------------------------- */
    625 
    626 U_CAPI UChar* U_EXPORT2
    627 u_strcat(UChar     *dst,
    628     const UChar     *src)
    629 {
    630     UChar *anchor = dst;            /* save a pointer to start of dst */
    631 
    632     while(*dst != 0) {              /* To end of first string          */
    633         ++dst;
    634     }
    635     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
    636     }
    637 
    638     return anchor;
    639 }
    640 
    641 U_CAPI UChar*  U_EXPORT2
    642 u_strncat(UChar     *dst,
    643      const UChar     *src,
    644      int32_t     n )
    645 {
    646     if(n > 0) {
    647         UChar *anchor = dst;            /* save a pointer to start of dst */
    648 
    649         while(*dst != 0) {              /* To end of first string          */
    650             ++dst;
    651         }
    652         while((*dst = *src) != 0) {     /* copy string 2 over              */
    653             ++dst;
    654             if(--n == 0) {
    655                 *dst = 0;
    656                 break;
    657             }
    658             ++src;
    659         }
    660 
    661         return anchor;
    662     } else {
    663         return dst;
    664     }
    665 }
    666 
    667 /* ----- Text property functions --- */
    668 
    669 U_CAPI int32_t   U_EXPORT2
    670 u_strcmp(const UChar *s1,
    671     const UChar *s2)
    672 {
    673     UChar  c1, c2;
    674 
    675     for(;;) {
    676         c1=*s1++;
    677         c2=*s2++;
    678         if (c1 != c2 || c1 == 0) {
    679             break;
    680         }
    681     }
    682     return (int32_t)c1 - (int32_t)c2;
    683 }
    684 
    685 U_CFUNC int32_t U_EXPORT2
    686 uprv_strCompare(const UChar *s1, int32_t length1,
    687                 const UChar *s2, int32_t length2,
    688                 UBool strncmpStyle, UBool codePointOrder) {
    689     const UChar *start1, *start2, *limit1, *limit2;
    690     UChar c1, c2;
    691 
    692     /* setup for fix-up */
    693     start1=s1;
    694     start2=s2;
    695 
    696     /* compare identical prefixes - they do not need to be fixed up */
    697     if(length1<0 && length2<0) {
    698         /* strcmp style, both NUL-terminated */
    699         if(s1==s2) {
    700             return 0;
    701         }
    702 
    703         for(;;) {
    704             c1=*s1;
    705             c2=*s2;
    706             if(c1!=c2) {
    707                 break;
    708             }
    709             if(c1==0) {
    710                 return 0;
    711             }
    712             ++s1;
    713             ++s2;
    714         }
    715 
    716         /* setup for fix-up */
    717         limit1=limit2=NULL;
    718     } else if(strncmpStyle) {
    719         /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
    720         if(s1==s2) {
    721             return 0;
    722         }
    723 
    724         limit1=start1+length1;
    725 
    726         for(;;) {
    727             /* both lengths are same, check only one limit */
    728             if(s1==limit1) {
    729                 return 0;
    730             }
    731 
    732             c1=*s1;
    733             c2=*s2;
    734             if(c1!=c2) {
    735                 break;
    736             }
    737             if(c1==0) {
    738                 return 0;
    739             }
    740             ++s1;
    741             ++s2;
    742         }
    743 
    744         /* setup for fix-up */
    745         limit2=start2+length1; /* use length1 here, too, to enforce assumption */
    746     } else {
    747         /* memcmp/UnicodeString style, both length-specified */
    748         int32_t lengthResult;
    749 
    750         if(length1<0) {
    751             length1=u_strlen(s1);
    752         }
    753         if(length2<0) {
    754             length2=u_strlen(s2);
    755         }
    756 
    757         /* limit1=start1+min(lenght1, length2) */
    758         if(length1<length2) {
    759             lengthResult=-1;
    760             limit1=start1+length1;
    761         } else if(length1==length2) {
    762             lengthResult=0;
    763             limit1=start1+length1;
    764         } else /* length1>length2 */ {
    765             lengthResult=1;
    766             limit1=start1+length2;
    767         }
    768 
    769         if(s1==s2) {
    770             return lengthResult;
    771         }
    772 
    773         for(;;) {
    774             /* check pseudo-limit */
    775             if(s1==limit1) {
    776                 return lengthResult;
    777             }
    778 
    779             c1=*s1;
    780             c2=*s2;
    781             if(c1!=c2) {
    782                 break;
    783             }
    784             ++s1;
    785             ++s2;
    786         }
    787 
    788         /* setup for fix-up */
    789         limit1=start1+length1;
    790         limit2=start2+length2;
    791     }
    792 
    793     /* if both values are in or above the surrogate range, fix them up */
    794     if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
    795         /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
    796         if(
    797             (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
    798             (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
    799         ) {
    800             /* part of a surrogate pair, leave >=d800 */
    801         } else {
    802             /* BMP code point - may be surrogate code point - make <d800 */
    803             c1-=0x2800;
    804         }
    805 
    806         if(
    807             (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
    808             (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
    809         ) {
    810             /* part of a surrogate pair, leave >=d800 */
    811         } else {
    812             /* BMP code point - may be surrogate code point - make <d800 */
    813             c2-=0x2800;
    814         }
    815     }
    816 
    817     /* now c1 and c2 are in the requested (code unit or code point) order */
    818     return (int32_t)c1-(int32_t)c2;
    819 }
    820 
    821 /*
    822  * Compare two strings as presented by UCharIterators.
    823  * Use code unit or code point order.
    824  * When the function returns, it is undefined where the iterators
    825  * have stopped.
    826  */
    827 U_CAPI int32_t U_EXPORT2
    828 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
    829     UChar32 c1, c2;
    830 
    831     /* argument checking */
    832     if(iter1==NULL || iter2==NULL) {
    833         return 0; /* bad arguments */
    834     }
    835     if(iter1==iter2) {
    836         return 0; /* identical iterators */
    837     }
    838 
    839     /* reset iterators to start? */
    840     iter1->move(iter1, 0, UITER_START);
    841     iter2->move(iter2, 0, UITER_START);
    842 
    843     /* compare identical prefixes - they do not need to be fixed up */
    844     for(;;) {
    845         c1=iter1->next(iter1);
    846         c2=iter2->next(iter2);
    847         if(c1!=c2) {
    848             break;
    849         }
    850         if(c1==-1) {
    851             return 0;
    852         }
    853     }
    854 
    855     /* if both values are in or above the surrogate range, fix them up */
    856     if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
    857         /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
    858         if(
    859             (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
    860             (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
    861         ) {
    862             /* part of a surrogate pair, leave >=d800 */
    863         } else {
    864             /* BMP code point - may be surrogate code point - make <d800 */
    865             c1-=0x2800;
    866         }
    867 
    868         if(
    869             (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
    870             (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
    871         ) {
    872             /* part of a surrogate pair, leave >=d800 */
    873         } else {
    874             /* BMP code point - may be surrogate code point - make <d800 */
    875             c2-=0x2800;
    876         }
    877     }
    878 
    879     /* now c1 and c2 are in the requested (code unit or code point) order */
    880     return (int32_t)c1-(int32_t)c2;
    881 }
    882 
    883 #if 0
    884 /*
    885  * u_strCompareIter() does not leave the iterators _on_ the different units.
    886  * This is possible but would cost a few extra indirect function calls to back
    887  * up if the last unit (c1 or c2 respectively) was >=0.
    888  *
    889  * Consistently leaving them _behind_ the different units is not an option
    890  * because the current "unit" is the end of the string if that is reached,
    891  * and in such a case the iterator does not move.
    892  * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
    893  * of their strings. Calling previous() on each does not move them to where
    894  * the comparison fails.
    895  *
    896  * So the simplest semantics is to not define where the iterators end up.
    897  *
    898  * The following fragment is part of what would need to be done for backing up.
    899  */
    900 void fragment {
    901         /* iff a surrogate is part of a surrogate pair, leave >=d800 */
    902         if(c1<=0xdbff) {
    903             if(!U16_IS_TRAIL(iter1->current(iter1))) {
    904                 /* lead surrogate code point - make <d800 */
    905                 c1-=0x2800;
    906             }
    907         } else if(c1<=0xdfff) {
    908             int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
    909             iter1->previous(iter1); /* ==c1 */
    910             if(!U16_IS_LEAD(iter1->previous(iter1))) {
    911                 /* trail surrogate code point - make <d800 */
    912                 c1-=0x2800;
    913             }
    914             /* go back to behind where the difference is */
    915             iter1->move(iter1, idx, UITER_ZERO);
    916         } else /* 0xe000<=c1<=0xffff */ {
    917             /* BMP code point - make <d800 */
    918             c1-=0x2800;
    919         }
    920 }
    921 #endif
    922 
    923 U_CAPI int32_t U_EXPORT2
    924 u_strCompare(const UChar *s1, int32_t length1,
    925              const UChar *s2, int32_t length2,
    926              UBool codePointOrder) {
    927     /* argument checking */
    928     if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
    929         return 0;
    930     }
    931     return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
    932 }
    933 
    934 /* String compare in code point order - u_strcmp() compares in code unit order. */
    935 U_CAPI int32_t U_EXPORT2
    936 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
    937     return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
    938 }
    939 
    940 U_CAPI int32_t   U_EXPORT2
    941 u_strncmp(const UChar     *s1,
    942      const UChar     *s2,
    943      int32_t     n)
    944 {
    945     if(n > 0) {
    946         int32_t rc;
    947         for(;;) {
    948             rc = (int32_t)*s1 - (int32_t)*s2;
    949             if(rc != 0 || *s1 == 0 || --n == 0) {
    950                 return rc;
    951             }
    952             ++s1;
    953             ++s2;
    954         }
    955     } else {
    956         return 0;
    957     }
    958 }
    959 
    960 U_CAPI int32_t U_EXPORT2
    961 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
    962     return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
    963 }
    964 
    965 U_CAPI UChar* U_EXPORT2
    966 u_strcpy(UChar     *dst,
    967     const UChar     *src)
    968 {
    969     UChar *anchor = dst;            /* save a pointer to start of dst */
    970 
    971     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
    972     }
    973 
    974     return anchor;
    975 }
    976 
    977 U_CAPI UChar*  U_EXPORT2
    978 u_strncpy(UChar     *dst,
    979      const UChar     *src,
    980      int32_t     n)
    981 {
    982     UChar *anchor = dst;            /* save a pointer to start of dst */
    983 
    984     /* copy string 2 over */
    985     while(n > 0 && (*(dst++) = *(src++)) != 0) {
    986         --n;
    987     }
    988 
    989     return anchor;
    990 }
    991 
    992 U_CAPI int32_t   U_EXPORT2
    993 u_strlen(const UChar *s)
    994 {
    995 #if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
    996     return (int32_t)uprv_wcslen((const wchar_t *)s);
    997 #else
    998     const UChar *t = s;
    999     while(*t != 0) {
   1000       ++t;
   1001     }
   1002     return t - s;
   1003 #endif
   1004 }
   1005 
   1006 U_CAPI int32_t U_EXPORT2
   1007 u_countChar32(const UChar *s, int32_t length) {
   1008     int32_t count;
   1009 
   1010     if(s==NULL || length<-1) {
   1011         return 0;
   1012     }
   1013 
   1014     count=0;
   1015     if(length>=0) {
   1016         while(length>0) {
   1017             ++count;
   1018             if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
   1019                 s+=2;
   1020                 length-=2;
   1021             } else {
   1022                 ++s;
   1023                 --length;
   1024             }
   1025         }
   1026     } else /* length==-1 */ {
   1027         UChar c;
   1028 
   1029         for(;;) {
   1030             if((c=*s++)==0) {
   1031                 break;
   1032             }
   1033             ++count;
   1034 
   1035             /*
   1036              * sufficient to look ahead one because of UTF-16;
   1037              * safe to look ahead one because at worst that would be the terminating NUL
   1038              */
   1039             if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
   1040                 ++s;
   1041             }
   1042         }
   1043     }
   1044     return count;
   1045 }
   1046 
   1047 U_CAPI UBool U_EXPORT2
   1048 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
   1049 
   1050     if(number<0) {
   1051         return TRUE;
   1052     }
   1053     if(s==NULL || length<-1) {
   1054         return FALSE;
   1055     }
   1056 
   1057     if(length==-1) {
   1058         /* s is NUL-terminated */
   1059         UChar c;
   1060 
   1061         /* count code points until they exceed */
   1062         for(;;) {
   1063             if((c=*s++)==0) {
   1064                 return FALSE;
   1065             }
   1066             if(number==0) {
   1067                 return TRUE;
   1068             }
   1069             if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
   1070                 ++s;
   1071             }
   1072             --number;
   1073         }
   1074     } else {
   1075         /* length>=0 known */
   1076         const UChar *limit;
   1077         int32_t maxSupplementary;
   1078 
   1079         /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
   1080         if(((length+1)/2)>number) {
   1081             return TRUE;
   1082         }
   1083 
   1084         /* check if s does not even contain enough UChars */
   1085         maxSupplementary=length-number;
   1086         if(maxSupplementary<=0) {
   1087             return FALSE;
   1088         }
   1089         /* there are maxSupplementary=length-number more UChars than asked-for code points */
   1090 
   1091         /*
   1092          * count code points until they exceed and also check that there are
   1093          * no more than maxSupplementary supplementary code points (UChar pairs)
   1094          */
   1095         limit=s+length;
   1096         for(;;) {
   1097             if(s==limit) {
   1098                 return FALSE;
   1099             }
   1100             if(number==0) {
   1101                 return TRUE;
   1102             }
   1103             if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
   1104                 ++s;
   1105                 if(--maxSupplementary<=0) {
   1106                     /* too many pairs - too few code points */
   1107                     return FALSE;
   1108                 }
   1109             }
   1110             --number;
   1111         }
   1112     }
   1113 }
   1114 
   1115 U_CAPI UChar * U_EXPORT2
   1116 u_memcpy(UChar *dest, const UChar *src, int32_t count) {
   1117     if(count > 0) {
   1118         uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR);
   1119     }
   1120     return dest;
   1121 }
   1122 
   1123 U_CAPI UChar * U_EXPORT2
   1124 u_memmove(UChar *dest, const UChar *src, int32_t count) {
   1125     if(count > 0) {
   1126         uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR);
   1127     }
   1128     return dest;
   1129 }
   1130 
   1131 U_CAPI UChar * U_EXPORT2
   1132 u_memset(UChar *dest, UChar c, int32_t count) {
   1133     if(count > 0) {
   1134         UChar *ptr = dest;
   1135         UChar *limit = dest + count;
   1136 
   1137         while (ptr < limit) {
   1138             *(ptr++) = c;
   1139         }
   1140     }
   1141     return dest;
   1142 }
   1143 
   1144 U_CAPI int32_t U_EXPORT2
   1145 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
   1146     if(count > 0) {
   1147         const UChar *limit = buf1 + count;
   1148         int32_t result;
   1149 
   1150         while (buf1 < limit) {
   1151             result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
   1152             if (result != 0) {
   1153                 return result;
   1154             }
   1155             buf1++;
   1156             buf2++;
   1157         }
   1158     }
   1159     return 0;
   1160 }
   1161 
   1162 U_CAPI int32_t U_EXPORT2
   1163 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
   1164     return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
   1165 }
   1166 
   1167 /* u_unescape & support fns ------------------------------------------------- */
   1168 
   1169 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
   1170 static const UChar UNESCAPE_MAP[] = {
   1171     /*"   0x22, 0x22 */
   1172     /*'   0x27, 0x27 */
   1173     /*?   0x3F, 0x3F */
   1174     /*\   0x5C, 0x5C */
   1175     /*a*/ 0x61, 0x07,
   1176     /*b*/ 0x62, 0x08,
   1177     /*e*/ 0x65, 0x1b,
   1178     /*f*/ 0x66, 0x0c,
   1179     /*n*/ 0x6E, 0x0a,
   1180     /*r*/ 0x72, 0x0d,
   1181     /*t*/ 0x74, 0x09,
   1182     /*v*/ 0x76, 0x0b
   1183 };
   1184 enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) };
   1185 
   1186 /* Convert one octal digit to a numeric value 0..7, or -1 on failure */
   1187 static int8_t _digit8(UChar c) {
   1188     if (c >= 0x0030 && c <= 0x0037) {
   1189         return (int8_t)(c - 0x0030);
   1190     }
   1191     return -1;
   1192 }
   1193 
   1194 /* Convert one hex digit to a numeric value 0..F, or -1 on failure */
   1195 static int8_t _digit16(UChar c) {
   1196     if (c >= 0x0030 && c <= 0x0039) {
   1197         return (int8_t)(c - 0x0030);
   1198     }
   1199     if (c >= 0x0041 && c <= 0x0046) {
   1200         return (int8_t)(c - (0x0041 - 10));
   1201     }
   1202     if (c >= 0x0061 && c <= 0x0066) {
   1203         return (int8_t)(c - (0x0061 - 10));
   1204     }
   1205     return -1;
   1206 }
   1207 
   1208 /* Parse a single escape sequence.  Although this method deals in
   1209  * UChars, it does not use C++ or UnicodeString.  This allows it to
   1210  * be used from C contexts. */
   1211 U_CAPI UChar32 U_EXPORT2
   1212 u_unescapeAt(UNESCAPE_CHAR_AT charAt,
   1213              int32_t *offset,
   1214              int32_t length,
   1215              void *context) {
   1216 
   1217     int32_t start = *offset;
   1218     UChar c;
   1219     UChar32 result = 0;
   1220     int8_t n = 0;
   1221     int8_t minDig = 0;
   1222     int8_t maxDig = 0;
   1223     int8_t bitsPerDigit = 4;
   1224     int8_t dig;
   1225     int32_t i;
   1226     UBool braces = FALSE;
   1227 
   1228     /* Check that offset is in range */
   1229     if (*offset < 0 || *offset >= length) {
   1230         goto err;
   1231     }
   1232 
   1233     /* Fetch first UChar after '\\' */
   1234     c = charAt((*offset)++, context);
   1235 
   1236     /* Convert hexadecimal and octal escapes */
   1237     switch (c) {
   1238     case 0x0075 /*'u'*/:
   1239         minDig = maxDig = 4;
   1240         break;
   1241     case 0x0055 /*'U'*/:
   1242         minDig = maxDig = 8;
   1243         break;
   1244     case 0x0078 /*'x'*/:
   1245         minDig = 1;
   1246         if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
   1247             ++(*offset);
   1248             braces = TRUE;
   1249             maxDig = 8;
   1250         } else {
   1251             maxDig = 2;
   1252         }
   1253         break;
   1254     default:
   1255         dig = _digit8(c);
   1256         if (dig >= 0) {
   1257             minDig = 1;
   1258             maxDig = 3;
   1259             n = 1; /* Already have first octal digit */
   1260             bitsPerDigit = 3;
   1261             result = dig;
   1262         }
   1263         break;
   1264     }
   1265     if (minDig != 0) {
   1266         while (*offset < length && n < maxDig) {
   1267             c = charAt(*offset, context);
   1268             dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
   1269             if (dig < 0) {
   1270                 break;
   1271             }
   1272             result = (result << bitsPerDigit) | dig;
   1273             ++(*offset);
   1274             ++n;
   1275         }
   1276         if (n < minDig) {
   1277             goto err;
   1278         }
   1279         if (braces) {
   1280             if (c != 0x7D /*}*/) {
   1281                 goto err;
   1282             }
   1283             ++(*offset);
   1284         }
   1285         if (result < 0 || result >= 0x110000) {
   1286             goto err;
   1287         }
   1288         /* If an escape sequence specifies a lead surrogate, see if
   1289          * there is a trail surrogate after it, either as an escape or
   1290          * as a literal.  If so, join them up into a supplementary.
   1291          */
   1292         if (*offset < length && U16_IS_LEAD(result)) {
   1293             int32_t ahead = *offset + 1;
   1294             c = charAt(*offset, context);
   1295             if (c == 0x5C /*'\\'*/ && ahead < length) {
   1296                 c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
   1297             }
   1298             if (U16_IS_TRAIL(c)) {
   1299                 *offset = ahead;
   1300                 result = U16_GET_SUPPLEMENTARY(result, c);
   1301             }
   1302         }
   1303         return result;
   1304     }
   1305 
   1306     /* Convert C-style escapes in table */
   1307     for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
   1308         if (c == UNESCAPE_MAP[i]) {
   1309             return UNESCAPE_MAP[i+1];
   1310         } else if (c < UNESCAPE_MAP[i]) {
   1311             break;
   1312         }
   1313     }
   1314 
   1315     /* Map \cX to control-X: X & 0x1F */
   1316     if (c == 0x0063 /*'c'*/ && *offset < length) {
   1317         c = charAt((*offset)++, context);
   1318         if (U16_IS_LEAD(c) && *offset < length) {
   1319             UChar c2 = charAt(*offset, context);
   1320             if (U16_IS_TRAIL(c2)) {
   1321                 ++(*offset);
   1322                 c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
   1323             }
   1324         }
   1325         return 0x1F & c;
   1326     }
   1327 
   1328     /* If no special forms are recognized, then consider
   1329      * the backslash to generically escape the next character.
   1330      * Deal with surrogate pairs. */
   1331     if (U16_IS_LEAD(c) && *offset < length) {
   1332         UChar c2 = charAt(*offset, context);
   1333         if (U16_IS_TRAIL(c2)) {
   1334             ++(*offset);
   1335             return U16_GET_SUPPLEMENTARY(c, c2);
   1336         }
   1337     }
   1338     return c;
   1339 
   1340  err:
   1341     /* Invalid escape sequence */
   1342     *offset = start; /* Reset to initial value */
   1343     return (UChar32)0xFFFFFFFF;
   1344 }
   1345 
   1346 /* u_unescapeAt() callback to return a UChar from a char* */
   1347 static UChar U_CALLCONV
   1348 _charPtr_charAt(int32_t offset, void *context) {
   1349     UChar c16;
   1350     /* It would be more efficient to access the invariant tables
   1351      * directly but there is no API for that. */
   1352     u_charsToUChars(((char*) context) + offset, &c16, 1);
   1353     return c16;
   1354 }
   1355 
   1356 /* Append an escape-free segment of the text; used by u_unescape() */
   1357 static void _appendUChars(UChar *dest, int32_t destCapacity,
   1358                           const char *src, int32_t srcLen) {
   1359     if (destCapacity < 0) {
   1360         destCapacity = 0;
   1361     }
   1362     if (srcLen > destCapacity) {
   1363         srcLen = destCapacity;
   1364     }
   1365     u_charsToUChars(src, dest, srcLen);
   1366 }
   1367 
   1368 /* Do an invariant conversion of char* -> UChar*, with escape parsing */
   1369 U_CAPI int32_t U_EXPORT2
   1370 u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
   1371     const char *segment = src;
   1372     int32_t i = 0;
   1373     char c;
   1374 
   1375     while ((c=*src) != 0) {
   1376         /* '\\' intentionally written as compiler-specific
   1377          * character constant to correspond to compiler-specific
   1378          * char* constants. */
   1379         if (c == '\\') {
   1380             int32_t lenParsed = 0;
   1381             UChar32 c32;
   1382             if (src != segment) {
   1383                 if (dest != NULL) {
   1384                     _appendUChars(dest + i, destCapacity - i,
   1385                                   segment, (int32_t)(src - segment));
   1386                 }
   1387                 i += (int32_t)(src - segment);
   1388             }
   1389             ++src; /* advance past '\\' */
   1390             c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
   1391             if (lenParsed == 0) {
   1392                 goto err;
   1393             }
   1394             src += lenParsed; /* advance past escape seq. */
   1395             if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
   1396                 U16_APPEND_UNSAFE(dest, i, c32);
   1397             } else {
   1398                 i += U16_LENGTH(c32);
   1399             }
   1400             segment = src;
   1401         } else {
   1402             ++src;
   1403         }
   1404     }
   1405     if (src != segment) {
   1406         if (dest != NULL) {
   1407             _appendUChars(dest + i, destCapacity - i,
   1408                           segment, (int32_t)(src - segment));
   1409         }
   1410         i += (int32_t)(src - segment);
   1411     }
   1412     if (dest != NULL && i < destCapacity) {
   1413         dest[i] = 0;
   1414     }
   1415     return i;
   1416 
   1417  err:
   1418     if (dest != NULL && destCapacity > 0) {
   1419         *dest = 0;
   1420     }
   1421     return 0;
   1422 }
   1423 
   1424 /* NUL-termination of strings ----------------------------------------------- */
   1425 
   1426 /**
   1427  * NUL-terminate a string no matter what its type.
   1428  * Set warning and error codes accordingly.
   1429  */
   1430 #define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode)      \
   1431     if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
   1432         /* not a public function, so no complete argument checking */   \
   1433                                                                         \
   1434         if(length<0) {                                                  \
   1435             /* assume that the caller handles this */                   \
   1436         } else if(length<destCapacity) {                                \
   1437             /* NUL-terminate the string, the NUL fits */                \
   1438             dest[length]=0;                                             \
   1439             /* unset the not-terminated warning but leave all others */ \
   1440             if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
   1441                 *pErrorCode=U_ZERO_ERROR;                               \
   1442             }                                                           \
   1443         } else if(length==destCapacity) {                               \
   1444             /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
   1445             *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;                \
   1446         } else /* length>destCapacity */ {                              \
   1447             /* even the string itself did not fit - set an error code */ \
   1448             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;                        \
   1449         }                                                               \
   1450     }
   1451 
   1452 U_CAPI int32_t U_EXPORT2
   1453 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1454     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1455     return length;
   1456 }
   1457 
   1458 U_CAPI int32_t U_EXPORT2
   1459 u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1460     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1461     return length;
   1462 }
   1463 
   1464 U_CAPI int32_t U_EXPORT2
   1465 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1466     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1467     return length;
   1468 }
   1469 
   1470 U_CAPI int32_t U_EXPORT2
   1471 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1472     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1473     return length;
   1474 }
   1475 
   1476 // Compute the hash code for a string -------------------------------------- ***
   1477 
   1478 // Moved here from uhash.c so that UnicodeString::hashCode() does not depend
   1479 // on UHashtable code.
   1480 
   1481 /*
   1482   Compute the hash by iterating sparsely over about 32 (up to 63)
   1483   characters spaced evenly through the string.  For each character,
   1484   multiply the previous hash value by a prime number and add the new
   1485   character in, like a linear congruential random number generator,
   1486   producing a pseudorandom deterministic value well distributed over
   1487   the output range. [LIU]
   1488 */
   1489 
   1490 #define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
   1491     uint32_t hash = 0;                        \
   1492     const TYPE *p = (const TYPE*) STR;        \
   1493     if (p != NULL) {                          \
   1494         int32_t len = (int32_t)(STRLEN);      \
   1495         int32_t inc = ((len - 32) / 32) + 1;  \
   1496         const TYPE *limit = p + len;          \
   1497         while (p<limit) {                     \
   1498             hash = (hash * 37) + DEREF;       \
   1499             p += inc;                         \
   1500         }                                     \
   1501     }                                         \
   1502     return static_cast<int32_t>(hash)
   1503 
   1504 /* Used by UnicodeString to compute its hashcode - Not public API. */
   1505 U_CAPI int32_t U_EXPORT2
   1506 ustr_hashUCharsN(const UChar *str, int32_t length) {
   1507     STRING_HASH(UChar, str, length, *p);
   1508 }
   1509 
   1510 U_CAPI int32_t U_EXPORT2
   1511 ustr_hashCharsN(const char *str, int32_t length) {
   1512     STRING_HASH(uint8_t, str, length, *p);
   1513 }
   1514 
   1515 U_CAPI int32_t U_EXPORT2
   1516 ustr_hashICharsN(const char *str, int32_t length) {
   1517     STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
   1518 }
   1519