Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1998-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *
     11 * File ustring.cpp
     12 *
     13 * Modification History:
     14 *
     15 *   Date        Name        Description
     16 *   12/07/98    bertrand    Creation.
     17 ******************************************************************************
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/putil.h"
     22 #include "unicode/uchar.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/utf16.h"
     25 #include "cstring.h"
     26 #include "cwchar.h"
     27 #include "cmemory.h"
     28 #include "ustr_imp.h"
     29 
     30 /* ANSI string.h - style functions ------------------------------------------ */
     31 
     32 /* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
     33 #define U_BMP_MAX 0xffff
     34 
     35 /* Forward binary string search functions ----------------------------------- */
     36 
     37 /*
     38  * Test if a substring match inside a string is at code point boundaries.
     39  * All pointers refer to the same buffer.
     40  * The limit pointer may be NULL, all others must be real pointers.
     41  */
     42 static inline UBool
     43 isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
     44     if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
     45         /* the leading edge of the match is in the middle of a surrogate pair */
     46         return FALSE;
     47     }
     48     if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
     49         /* the trailing edge of the match is in the middle of a surrogate pair */
     50         return FALSE;
     51     }
     52     return TRUE;
     53 }
     54 
     55 U_CAPI UChar * U_EXPORT2
     56 u_strFindFirst(const UChar *s, int32_t length,
     57                const UChar *sub, int32_t subLength) {
     58     const UChar *start, *p, *q, *subLimit;
     59     UChar c, cs, cq;
     60 
     61     if(sub==NULL || subLength<-1) {
     62         return (UChar *)s;
     63     }
     64     if(s==NULL || length<-1) {
     65         return NULL;
     66     }
     67 
     68     start=s;
     69 
     70     if(length<0 && subLength<0) {
     71         /* both strings are NUL-terminated */
     72         if((cs=*sub++)==0) {
     73             return (UChar *)s;
     74         }
     75         if(*sub==0 && !U16_IS_SURROGATE(cs)) {
     76             /* the substring consists of a single, non-surrogate BMP code point */
     77             return u_strchr(s, cs);
     78         }
     79 
     80         while((c=*s++)!=0) {
     81             if(c==cs) {
     82                 /* found first substring UChar, compare rest */
     83                 p=s;
     84                 q=sub;
     85                 for(;;) {
     86                     if((cq=*q)==0) {
     87                         if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
     88                             return (UChar *)(s-1); /* well-formed match */
     89                         } else {
     90                             break; /* no match because surrogate pair is split */
     91                         }
     92                     }
     93                     if((c=*p)==0) {
     94                         return NULL; /* no match, and none possible after s */
     95                     }
     96                     if(c!=cq) {
     97                         break; /* no match */
     98                     }
     99                     ++p;
    100                     ++q;
    101                 }
    102             }
    103         }
    104 
    105         /* not found */
    106         return NULL;
    107     }
    108 
    109     if(subLength<0) {
    110         subLength=u_strlen(sub);
    111     }
    112     if(subLength==0) {
    113         return (UChar *)s;
    114     }
    115 
    116     /* get sub[0] to search for it fast */
    117     cs=*sub++;
    118     --subLength;
    119     subLimit=sub+subLength;
    120 
    121     if(subLength==0 && !U16_IS_SURROGATE(cs)) {
    122         /* the substring consists of a single, non-surrogate BMP code point */
    123         return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
    124     }
    125 
    126     if(length<0) {
    127         /* s is NUL-terminated */
    128         while((c=*s++)!=0) {
    129             if(c==cs) {
    130                 /* found first substring UChar, compare rest */
    131                 p=s;
    132                 q=sub;
    133                 for(;;) {
    134                     if(q==subLimit) {
    135                         if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
    136                             return (UChar *)(s-1); /* well-formed match */
    137                         } else {
    138                             break; /* no match because surrogate pair is split */
    139                         }
    140                     }
    141                     if((c=*p)==0) {
    142                         return NULL; /* no match, and none possible after s */
    143                     }
    144                     if(c!=*q) {
    145                         break; /* no match */
    146                     }
    147                     ++p;
    148                     ++q;
    149                 }
    150             }
    151         }
    152     } else {
    153         const UChar *limit, *preLimit;
    154 
    155         /* subLength was decremented above */
    156         if(length<=subLength) {
    157             return NULL; /* s is shorter than sub */
    158         }
    159 
    160         limit=s+length;
    161 
    162         /* the substring must start before preLimit */
    163         preLimit=limit-subLength;
    164 
    165         while(s!=preLimit) {
    166             c=*s++;
    167             if(c==cs) {
    168                 /* found first substring UChar, compare rest */
    169                 p=s;
    170                 q=sub;
    171                 for(;;) {
    172                     if(q==subLimit) {
    173                         if(isMatchAtCPBoundary(start, s-1, p, limit)) {
    174                             return (UChar *)(s-1); /* well-formed match */
    175                         } else {
    176                             break; /* no match because surrogate pair is split */
    177                         }
    178                     }
    179                     if(*p!=*q) {
    180                         break; /* no match */
    181                     }
    182                     ++p;
    183                     ++q;
    184                 }
    185             }
    186         }
    187     }
    188 
    189     /* not found */
    190     return NULL;
    191 }
    192 
    193 U_CAPI UChar * U_EXPORT2
    194 u_strstr(const UChar *s, const UChar *substring) {
    195     return u_strFindFirst(s, -1, substring, -1);
    196 }
    197 
    198 U_CAPI UChar * U_EXPORT2
    199 u_strchr(const UChar *s, UChar c) {
    200     if(U16_IS_SURROGATE(c)) {
    201         /* make sure to not find half of a surrogate pair */
    202         return u_strFindFirst(s, -1, &c, 1);
    203     } else {
    204         UChar cs;
    205 
    206         /* trivial search for a BMP code point */
    207         for(;;) {
    208             if((cs=*s)==c) {
    209                 return (UChar *)s;
    210             }
    211             if(cs==0) {
    212                 return NULL;
    213             }
    214             ++s;
    215         }
    216     }
    217 }
    218 
    219 U_CAPI UChar * U_EXPORT2
    220 u_strchr32(const UChar *s, UChar32 c) {
    221     if((uint32_t)c<=U_BMP_MAX) {
    222         /* find BMP code point */
    223         return u_strchr(s, (UChar)c);
    224     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    225         /* find supplementary code point as surrogate pair */
    226         UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
    227 
    228         while((cs=*s++)!=0) {
    229             if(cs==lead && *s==trail) {
    230                 return (UChar *)(s-1);
    231             }
    232         }
    233         return NULL;
    234     } else {
    235         /* not a Unicode code point, not findable */
    236         return NULL;
    237     }
    238 }
    239 
    240 U_CAPI UChar * U_EXPORT2
    241 u_memchr(const UChar *s, UChar c, int32_t count) {
    242     if(count<=0) {
    243         return NULL; /* no string */
    244     } else if(U16_IS_SURROGATE(c)) {
    245         /* make sure to not find half of a surrogate pair */
    246         return u_strFindFirst(s, count, &c, 1);
    247     } else {
    248         /* trivial search for a BMP code point */
    249         const UChar *limit=s+count;
    250         do {
    251             if(*s==c) {
    252                 return (UChar *)s;
    253             }
    254         } while(++s!=limit);
    255         return NULL;
    256     }
    257 }
    258 
    259 U_CAPI UChar * U_EXPORT2
    260 u_memchr32(const UChar *s, UChar32 c, int32_t count) {
    261     if((uint32_t)c<=U_BMP_MAX) {
    262         /* find BMP code point */
    263         return u_memchr(s, (UChar)c, count);
    264     } else if(count<2) {
    265         /* too short for a surrogate pair */
    266         return NULL;
    267     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    268         /* find supplementary code point as surrogate pair */
    269         const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
    270         UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
    271 
    272         do {
    273             if(*s==lead && *(s+1)==trail) {
    274                 return (UChar *)s;
    275             }
    276         } while(++s!=limit);
    277         return NULL;
    278     } else {
    279         /* not a Unicode code point, not findable */
    280         return NULL;
    281     }
    282 }
    283 
    284 /* Backward binary string search functions ---------------------------------- */
    285 
    286 U_CAPI UChar * U_EXPORT2
    287 u_strFindLast(const UChar *s, int32_t length,
    288               const UChar *sub, int32_t subLength) {
    289     const UChar *start, *limit, *p, *q, *subLimit;
    290     UChar c, cs;
    291 
    292     if(sub==NULL || subLength<-1) {
    293         return (UChar *)s;
    294     }
    295     if(s==NULL || length<-1) {
    296         return NULL;
    297     }
    298 
    299     /*
    300      * This implementation is more lazy than the one for u_strFindFirst():
    301      * There is no special search code for NUL-terminated strings.
    302      * It does not seem to be worth it for searching substrings to
    303      * search forward and find all matches like in u_strrchr() and similar.
    304      * Therefore, we simply get both string lengths and search backward.
    305      *
    306      * markus 2002oct23
    307      */
    308 
    309     if(subLength<0) {
    310         subLength=u_strlen(sub);
    311     }
    312     if(subLength==0) {
    313         return (UChar *)s;
    314     }
    315 
    316     /* get sub[subLength-1] to search for it fast */
    317     subLimit=sub+subLength;
    318     cs=*(--subLimit);
    319     --subLength;
    320 
    321     if(subLength==0 && !U16_IS_SURROGATE(cs)) {
    322         /* the substring consists of a single, non-surrogate BMP code point */
    323         return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
    324     }
    325 
    326     if(length<0) {
    327         length=u_strlen(s);
    328     }
    329 
    330     /* subLength was decremented above */
    331     if(length<=subLength) {
    332         return NULL; /* s is shorter than sub */
    333     }
    334 
    335     start=s;
    336     limit=s+length;
    337 
    338     /* the substring must start no later than s+subLength */
    339     s+=subLength;
    340 
    341     while(s!=limit) {
    342         c=*(--limit);
    343         if(c==cs) {
    344             /* found last substring UChar, compare rest */
    345             p=limit;
    346             q=subLimit;
    347             for(;;) {
    348                 if(q==sub) {
    349                     if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
    350                         return (UChar *)p; /* well-formed match */
    351                     } else {
    352                         break; /* no match because surrogate pair is split */
    353                     }
    354                 }
    355                 if(*(--p)!=*(--q)) {
    356                     break; /* no match */
    357                 }
    358             }
    359         }
    360     }
    361 
    362     /* not found */
    363     return NULL;
    364 }
    365 
    366 U_CAPI UChar * U_EXPORT2
    367 u_strrstr(const UChar *s, const UChar *substring) {
    368     return u_strFindLast(s, -1, substring, -1);
    369 }
    370 
    371 U_CAPI UChar * U_EXPORT2
    372 u_strrchr(const UChar *s, UChar c) {
    373     if(U16_IS_SURROGATE(c)) {
    374         /* make sure to not find half of a surrogate pair */
    375         return u_strFindLast(s, -1, &c, 1);
    376     } else {
    377         const UChar *result=NULL;
    378         UChar cs;
    379 
    380         /* trivial search for a BMP code point */
    381         for(;;) {
    382             if((cs=*s)==c) {
    383                 result=s;
    384             }
    385             if(cs==0) {
    386                 return (UChar *)result;
    387             }
    388             ++s;
    389         }
    390     }
    391 }
    392 
    393 U_CAPI UChar * U_EXPORT2
    394 u_strrchr32(const UChar *s, UChar32 c) {
    395     if((uint32_t)c<=U_BMP_MAX) {
    396         /* find BMP code point */
    397         return u_strrchr(s, (UChar)c);
    398     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    399         /* find supplementary code point as surrogate pair */
    400         const UChar *result=NULL;
    401         UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
    402 
    403         while((cs=*s++)!=0) {
    404             if(cs==lead && *s==trail) {
    405                 result=s-1;
    406             }
    407         }
    408         return (UChar *)result;
    409     } else {
    410         /* not a Unicode code point, not findable */
    411         return NULL;
    412     }
    413 }
    414 
    415 U_CAPI UChar * U_EXPORT2
    416 u_memrchr(const UChar *s, UChar c, int32_t count) {
    417     if(count<=0) {
    418         return NULL; /* no string */
    419     } else if(U16_IS_SURROGATE(c)) {
    420         /* make sure to not find half of a surrogate pair */
    421         return u_strFindLast(s, count, &c, 1);
    422     } else {
    423         /* trivial search for a BMP code point */
    424         const UChar *limit=s+count;
    425         do {
    426             if(*(--limit)==c) {
    427                 return (UChar *)limit;
    428             }
    429         } while(s!=limit);
    430         return NULL;
    431     }
    432 }
    433 
    434 U_CAPI UChar * U_EXPORT2
    435 u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
    436     if((uint32_t)c<=U_BMP_MAX) {
    437         /* find BMP code point */
    438         return u_memrchr(s, (UChar)c, count);
    439     } else if(count<2) {
    440         /* too short for a surrogate pair */
    441         return NULL;
    442     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
    443         /* find supplementary code point as surrogate pair */
    444         const UChar *limit=s+count-1;
    445         UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
    446 
    447         do {
    448             if(*limit==trail && *(limit-1)==lead) {
    449                 return (UChar *)(limit-1);
    450             }
    451         } while(s!=--limit);
    452         return NULL;
    453     } else {
    454         /* not a Unicode code point, not findable */
    455         return NULL;
    456     }
    457 }
    458 
    459 /* Tokenization functions --------------------------------------------------- */
    460 
    461 /*
    462  * Match each code point in a string against each code point in the matchSet.
    463  * Return the index of the first string code point that
    464  * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
    465  * Return -(string length)-1 if there is no such code point.
    466  */
    467 static int32_t
    468 _matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
    469     int32_t matchLen, matchBMPLen, strItr, matchItr;
    470     UChar32 stringCh, matchCh;
    471     UChar c, c2;
    472 
    473     /* first part of matchSet contains only BMP code points */
    474     matchBMPLen = 0;
    475     while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
    476         ++matchBMPLen;
    477     }
    478 
    479     /* second part of matchSet contains BMP and supplementary code points */
    480     matchLen = matchBMPLen;
    481     while(matchSet[matchLen] != 0) {
    482         ++matchLen;
    483     }
    484 
    485     for(strItr = 0; (c = string[strItr]) != 0;) {
    486         ++strItr;
    487         if(U16_IS_SINGLE(c)) {
    488             if(polarity) {
    489                 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
    490                     if(c == matchSet[matchItr]) {
    491                         return strItr - 1; /* one matches */
    492                     }
    493                 }
    494             } else {
    495                 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
    496                     if(c == matchSet[matchItr]) {
    497                         goto endloop;
    498                     }
    499                 }
    500                 return strItr - 1; /* none matches */
    501             }
    502         } else {
    503             /*
    504              * No need to check for string length before U16_IS_TRAIL
    505              * because c2 could at worst be the terminating NUL.
    506              */
    507             if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
    508                 ++strItr;
    509                 stringCh = U16_GET_SUPPLEMENTARY(c, c2);
    510             } else {
    511                 stringCh = c; /* unpaired trail surrogate */
    512             }
    513 
    514             if(polarity) {
    515                 for(matchItr = matchBMPLen; matchItr < matchLen;) {
    516                     U16_NEXT(matchSet, matchItr, matchLen, matchCh);
    517                     if(stringCh == matchCh) {
    518                         return strItr - U16_LENGTH(stringCh); /* one matches */
    519                     }
    520                 }
    521             } else {
    522                 for(matchItr = matchBMPLen; matchItr < matchLen;) {
    523                     U16_NEXT(matchSet, matchItr, matchLen, matchCh);
    524                     if(stringCh == matchCh) {
    525                         goto endloop;
    526                     }
    527                 }
    528                 return strItr - U16_LENGTH(stringCh); /* none matches */
    529             }
    530         }
    531 endloop:
    532         /* wish C had continue with labels like Java... */;
    533     }
    534 
    535     /* Didn't find it. */
    536     return -strItr-1;
    537 }
    538 
    539 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
    540 U_CAPI UChar * U_EXPORT2
    541 u_strpbrk(const UChar *string, const UChar *matchSet)
    542 {
    543     int32_t idx = _matchFromSet(string, matchSet, TRUE);
    544     if(idx >= 0) {
    545         return (UChar *)string + idx;
    546     } else {
    547         return NULL;
    548     }
    549 }
    550 
    551 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
    552 U_CAPI int32_t U_EXPORT2
    553 u_strcspn(const UChar *string, const UChar *matchSet)
    554 {
    555     int32_t idx = _matchFromSet(string, matchSet, TRUE);
    556     if(idx >= 0) {
    557         return idx;
    558     } else {
    559         return -idx - 1; /* == u_strlen(string) */
    560     }
    561 }
    562 
    563 /* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
    564 U_CAPI int32_t U_EXPORT2
    565 u_strspn(const UChar *string, const UChar *matchSet)
    566 {
    567     int32_t idx = _matchFromSet(string, matchSet, FALSE);
    568     if(idx >= 0) {
    569         return idx;
    570     } else {
    571         return -idx - 1; /* == u_strlen(string) */
    572     }
    573 }
    574 
    575 /* ----- Text manipulation functions --- */
    576 
    577 U_CAPI UChar* U_EXPORT2
    578 u_strtok_r(UChar    *src,
    579      const UChar    *delim,
    580            UChar   **saveState)
    581 {
    582     UChar *tokSource;
    583     UChar *nextToken;
    584     uint32_t nonDelimIdx;
    585 
    586     /* If saveState is NULL, the user messed up. */
    587     if (src != NULL) {
    588         tokSource = src;
    589         *saveState = src; /* Set to "src" in case there are no delimiters */
    590     }
    591     else if (*saveState) {
    592         tokSource = *saveState;
    593     }
    594     else {
    595         /* src == NULL && *saveState == NULL */
    596         /* This shouldn't happen. We already finished tokenizing. */
    597         return NULL;
    598     }
    599 
    600     /* Skip initial delimiters */
    601     nonDelimIdx = u_strspn(tokSource, delim);
    602     tokSource = &tokSource[nonDelimIdx];
    603 
    604     if (*tokSource) {
    605         nextToken = u_strpbrk(tokSource, delim);
    606         if (nextToken != NULL) {
    607             /* Create a token */
    608             *(nextToken++) = 0;
    609             *saveState = nextToken;
    610             return tokSource;
    611         }
    612         else if (*saveState) {
    613             /* Return the last token */
    614             *saveState = NULL;
    615             return tokSource;
    616         }
    617     }
    618     else {
    619         /* No tokens were found. Only delimiters were left. */
    620         *saveState = NULL;
    621     }
    622     return NULL;
    623 }
    624 
    625 /* Miscellaneous functions -------------------------------------------------- */
    626 
    627 U_CAPI UChar* U_EXPORT2
    628 u_strcat(UChar     *dst,
    629     const UChar     *src)
    630 {
    631     UChar *anchor = dst;            /* save a pointer to start of dst */
    632 
    633     while(*dst != 0) {              /* To end of first string          */
    634         ++dst;
    635     }
    636     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
    637     }
    638 
    639     return anchor;
    640 }
    641 
    642 U_CAPI UChar*  U_EXPORT2
    643 u_strncat(UChar     *dst,
    644      const UChar     *src,
    645      int32_t     n )
    646 {
    647     if(n > 0) {
    648         UChar *anchor = dst;            /* save a pointer to start of dst */
    649 
    650         while(*dst != 0) {              /* To end of first string          */
    651             ++dst;
    652         }
    653         while((*dst = *src) != 0) {     /* copy string 2 over              */
    654             ++dst;
    655             if(--n == 0) {
    656                 *dst = 0;
    657                 break;
    658             }
    659             ++src;
    660         }
    661 
    662         return anchor;
    663     } else {
    664         return dst;
    665     }
    666 }
    667 
    668 /* ----- Text property functions --- */
    669 
    670 U_CAPI int32_t   U_EXPORT2
    671 u_strcmp(const UChar *s1,
    672     const UChar *s2)
    673 {
    674     UChar  c1, c2;
    675 
    676     for(;;) {
    677         c1=*s1++;
    678         c2=*s2++;
    679         if (c1 != c2 || c1 == 0) {
    680             break;
    681         }
    682     }
    683     return (int32_t)c1 - (int32_t)c2;
    684 }
    685 
    686 U_CFUNC int32_t U_EXPORT2
    687 uprv_strCompare(const UChar *s1, int32_t length1,
    688                 const UChar *s2, int32_t length2,
    689                 UBool strncmpStyle, UBool codePointOrder) {
    690     const UChar *start1, *start2, *limit1, *limit2;
    691     UChar c1, c2;
    692 
    693     /* setup for fix-up */
    694     start1=s1;
    695     start2=s2;
    696 
    697     /* compare identical prefixes - they do not need to be fixed up */
    698     if(length1<0 && length2<0) {
    699         /* strcmp style, both NUL-terminated */
    700         if(s1==s2) {
    701             return 0;
    702         }
    703 
    704         for(;;) {
    705             c1=*s1;
    706             c2=*s2;
    707             if(c1!=c2) {
    708                 break;
    709             }
    710             if(c1==0) {
    711                 return 0;
    712             }
    713             ++s1;
    714             ++s2;
    715         }
    716 
    717         /* setup for fix-up */
    718         limit1=limit2=NULL;
    719     } else if(strncmpStyle) {
    720         /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
    721         if(s1==s2) {
    722             return 0;
    723         }
    724 
    725         limit1=start1+length1;
    726 
    727         for(;;) {
    728             /* both lengths are same, check only one limit */
    729             if(s1==limit1) {
    730                 return 0;
    731             }
    732 
    733             c1=*s1;
    734             c2=*s2;
    735             if(c1!=c2) {
    736                 break;
    737             }
    738             if(c1==0) {
    739                 return 0;
    740             }
    741             ++s1;
    742             ++s2;
    743         }
    744 
    745         /* setup for fix-up */
    746         limit2=start2+length1; /* use length1 here, too, to enforce assumption */
    747     } else {
    748         /* memcmp/UnicodeString style, both length-specified */
    749         int32_t lengthResult;
    750 
    751         if(length1<0) {
    752             length1=u_strlen(s1);
    753         }
    754         if(length2<0) {
    755             length2=u_strlen(s2);
    756         }
    757 
    758         /* limit1=start1+min(lenght1, length2) */
    759         if(length1<length2) {
    760             lengthResult=-1;
    761             limit1=start1+length1;
    762         } else if(length1==length2) {
    763             lengthResult=0;
    764             limit1=start1+length1;
    765         } else /* length1>length2 */ {
    766             lengthResult=1;
    767             limit1=start1+length2;
    768         }
    769 
    770         if(s1==s2) {
    771             return lengthResult;
    772         }
    773 
    774         for(;;) {
    775             /* check pseudo-limit */
    776             if(s1==limit1) {
    777                 return lengthResult;
    778             }
    779 
    780             c1=*s1;
    781             c2=*s2;
    782             if(c1!=c2) {
    783                 break;
    784             }
    785             ++s1;
    786             ++s2;
    787         }
    788 
    789         /* setup for fix-up */
    790         limit1=start1+length1;
    791         limit2=start2+length2;
    792     }
    793 
    794     /* if both values are in or above the surrogate range, fix them up */
    795     if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
    796         /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
    797         if(
    798             (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
    799             (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
    800         ) {
    801             /* part of a surrogate pair, leave >=d800 */
    802         } else {
    803             /* BMP code point - may be surrogate code point - make <d800 */
    804             c1-=0x2800;
    805         }
    806 
    807         if(
    808             (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
    809             (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
    810         ) {
    811             /* part of a surrogate pair, leave >=d800 */
    812         } else {
    813             /* BMP code point - may be surrogate code point - make <d800 */
    814             c2-=0x2800;
    815         }
    816     }
    817 
    818     /* now c1 and c2 are in the requested (code unit or code point) order */
    819     return (int32_t)c1-(int32_t)c2;
    820 }
    821 
    822 /*
    823  * Compare two strings as presented by UCharIterators.
    824  * Use code unit or code point order.
    825  * When the function returns, it is undefined where the iterators
    826  * have stopped.
    827  */
    828 U_CAPI int32_t U_EXPORT2
    829 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
    830     UChar32 c1, c2;
    831 
    832     /* argument checking */
    833     if(iter1==NULL || iter2==NULL) {
    834         return 0; /* bad arguments */
    835     }
    836     if(iter1==iter2) {
    837         return 0; /* identical iterators */
    838     }
    839 
    840     /* reset iterators to start? */
    841     iter1->move(iter1, 0, UITER_START);
    842     iter2->move(iter2, 0, UITER_START);
    843 
    844     /* compare identical prefixes - they do not need to be fixed up */
    845     for(;;) {
    846         c1=iter1->next(iter1);
    847         c2=iter2->next(iter2);
    848         if(c1!=c2) {
    849             break;
    850         }
    851         if(c1==-1) {
    852             return 0;
    853         }
    854     }
    855 
    856     /* if both values are in or above the surrogate range, fix them up */
    857     if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
    858         /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
    859         if(
    860             (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
    861             (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
    862         ) {
    863             /* part of a surrogate pair, leave >=d800 */
    864         } else {
    865             /* BMP code point - may be surrogate code point - make <d800 */
    866             c1-=0x2800;
    867         }
    868 
    869         if(
    870             (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
    871             (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
    872         ) {
    873             /* part of a surrogate pair, leave >=d800 */
    874         } else {
    875             /* BMP code point - may be surrogate code point - make <d800 */
    876             c2-=0x2800;
    877         }
    878     }
    879 
    880     /* now c1 and c2 are in the requested (code unit or code point) order */
    881     return (int32_t)c1-(int32_t)c2;
    882 }
    883 
    884 #if 0
    885 /*
    886  * u_strCompareIter() does not leave the iterators _on_ the different units.
    887  * This is possible but would cost a few extra indirect function calls to back
    888  * up if the last unit (c1 or c2 respectively) was >=0.
    889  *
    890  * Consistently leaving them _behind_ the different units is not an option
    891  * because the current "unit" is the end of the string if that is reached,
    892  * and in such a case the iterator does not move.
    893  * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
    894  * of their strings. Calling previous() on each does not move them to where
    895  * the comparison fails.
    896  *
    897  * So the simplest semantics is to not define where the iterators end up.
    898  *
    899  * The following fragment is part of what would need to be done for backing up.
    900  */
    901 void fragment {
    902         /* iff a surrogate is part of a surrogate pair, leave >=d800 */
    903         if(c1<=0xdbff) {
    904             if(!U16_IS_TRAIL(iter1->current(iter1))) {
    905                 /* lead surrogate code point - make <d800 */
    906                 c1-=0x2800;
    907             }
    908         } else if(c1<=0xdfff) {
    909             int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
    910             iter1->previous(iter1); /* ==c1 */
    911             if(!U16_IS_LEAD(iter1->previous(iter1))) {
    912                 /* trail surrogate code point - make <d800 */
    913                 c1-=0x2800;
    914             }
    915             /* go back to behind where the difference is */
    916             iter1->move(iter1, idx, UITER_ZERO);
    917         } else /* 0xe000<=c1<=0xffff */ {
    918             /* BMP code point - make <d800 */
    919             c1-=0x2800;
    920         }
    921 }
    922 #endif
    923 
    924 U_CAPI int32_t U_EXPORT2
    925 u_strCompare(const UChar *s1, int32_t length1,
    926              const UChar *s2, int32_t length2,
    927              UBool codePointOrder) {
    928     /* argument checking */
    929     if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
    930         return 0;
    931     }
    932     return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
    933 }
    934 
    935 /* String compare in code point order - u_strcmp() compares in code unit order. */
    936 U_CAPI int32_t U_EXPORT2
    937 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
    938     return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
    939 }
    940 
    941 U_CAPI int32_t   U_EXPORT2
    942 u_strncmp(const UChar     *s1,
    943      const UChar     *s2,
    944      int32_t     n)
    945 {
    946     if(n > 0) {
    947         int32_t rc;
    948         for(;;) {
    949             rc = (int32_t)*s1 - (int32_t)*s2;
    950             if(rc != 0 || *s1 == 0 || --n == 0) {
    951                 return rc;
    952             }
    953             ++s1;
    954             ++s2;
    955         }
    956     } else {
    957         return 0;
    958     }
    959 }
    960 
    961 U_CAPI int32_t U_EXPORT2
    962 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
    963     return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
    964 }
    965 
    966 U_CAPI UChar* U_EXPORT2
    967 u_strcpy(UChar     *dst,
    968     const UChar     *src)
    969 {
    970     UChar *anchor = dst;            /* save a pointer to start of dst */
    971 
    972     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
    973     }
    974 
    975     return anchor;
    976 }
    977 
    978 U_CAPI UChar*  U_EXPORT2
    979 u_strncpy(UChar     *dst,
    980      const UChar     *src,
    981      int32_t     n)
    982 {
    983     UChar *anchor = dst;            /* save a pointer to start of dst */
    984 
    985     /* copy string 2 over */
    986     while(n > 0 && (*(dst++) = *(src++)) != 0) {
    987         --n;
    988     }
    989 
    990     return anchor;
    991 }
    992 
    993 U_CAPI int32_t   U_EXPORT2
    994 u_strlen(const UChar *s)
    995 {
    996 #if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
    997     return (int32_t)uprv_wcslen((const wchar_t *)s);
    998 #else
    999     const UChar *t = s;
   1000     while(*t != 0) {
   1001       ++t;
   1002     }
   1003     return t - s;
   1004 #endif
   1005 }
   1006 
   1007 U_CAPI int32_t U_EXPORT2
   1008 u_countChar32(const UChar *s, int32_t length) {
   1009     int32_t count;
   1010 
   1011     if(s==NULL || length<-1) {
   1012         return 0;
   1013     }
   1014 
   1015     count=0;
   1016     if(length>=0) {
   1017         while(length>0) {
   1018             ++count;
   1019             if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
   1020                 s+=2;
   1021                 length-=2;
   1022             } else {
   1023                 ++s;
   1024                 --length;
   1025             }
   1026         }
   1027     } else /* length==-1 */ {
   1028         UChar c;
   1029 
   1030         for(;;) {
   1031             if((c=*s++)==0) {
   1032                 break;
   1033             }
   1034             ++count;
   1035 
   1036             /*
   1037              * sufficient to look ahead one because of UTF-16;
   1038              * safe to look ahead one because at worst that would be the terminating NUL
   1039              */
   1040             if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
   1041                 ++s;
   1042             }
   1043         }
   1044     }
   1045     return count;
   1046 }
   1047 
   1048 U_CAPI UBool U_EXPORT2
   1049 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
   1050 
   1051     if(number<0) {
   1052         return TRUE;
   1053     }
   1054     if(s==NULL || length<-1) {
   1055         return FALSE;
   1056     }
   1057 
   1058     if(length==-1) {
   1059         /* s is NUL-terminated */
   1060         UChar c;
   1061 
   1062         /* count code points until they exceed */
   1063         for(;;) {
   1064             if((c=*s++)==0) {
   1065                 return FALSE;
   1066             }
   1067             if(number==0) {
   1068                 return TRUE;
   1069             }
   1070             if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
   1071                 ++s;
   1072             }
   1073             --number;
   1074         }
   1075     } else {
   1076         /* length>=0 known */
   1077         const UChar *limit;
   1078         int32_t maxSupplementary;
   1079 
   1080         /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
   1081         if(((length+1)/2)>number) {
   1082             return TRUE;
   1083         }
   1084 
   1085         /* check if s does not even contain enough UChars */
   1086         maxSupplementary=length-number;
   1087         if(maxSupplementary<=0) {
   1088             return FALSE;
   1089         }
   1090         /* there are maxSupplementary=length-number more UChars than asked-for code points */
   1091 
   1092         /*
   1093          * count code points until they exceed and also check that there are
   1094          * no more than maxSupplementary supplementary code points (UChar pairs)
   1095          */
   1096         limit=s+length;
   1097         for(;;) {
   1098             if(s==limit) {
   1099                 return FALSE;
   1100             }
   1101             if(number==0) {
   1102                 return TRUE;
   1103             }
   1104             if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
   1105                 ++s;
   1106                 if(--maxSupplementary<=0) {
   1107                     /* too many pairs - too few code points */
   1108                     return FALSE;
   1109                 }
   1110             }
   1111             --number;
   1112         }
   1113     }
   1114 }
   1115 
   1116 U_CAPI UChar * U_EXPORT2
   1117 u_memcpy(UChar *dest, const UChar *src, int32_t count) {
   1118     if(count > 0) {
   1119         uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR);
   1120     }
   1121     return dest;
   1122 }
   1123 
   1124 U_CAPI UChar * U_EXPORT2
   1125 u_memmove(UChar *dest, const UChar *src, int32_t count) {
   1126     if(count > 0) {
   1127         uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR);
   1128     }
   1129     return dest;
   1130 }
   1131 
   1132 U_CAPI UChar * U_EXPORT2
   1133 u_memset(UChar *dest, UChar c, int32_t count) {
   1134     if(count > 0) {
   1135         UChar *ptr = dest;
   1136         UChar *limit = dest + count;
   1137 
   1138         while (ptr < limit) {
   1139             *(ptr++) = c;
   1140         }
   1141     }
   1142     return dest;
   1143 }
   1144 
   1145 U_CAPI int32_t U_EXPORT2
   1146 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
   1147     if(count > 0) {
   1148         const UChar *limit = buf1 + count;
   1149         int32_t result;
   1150 
   1151         while (buf1 < limit) {
   1152             result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
   1153             if (result != 0) {
   1154                 return result;
   1155             }
   1156             buf1++;
   1157             buf2++;
   1158         }
   1159     }
   1160     return 0;
   1161 }
   1162 
   1163 U_CAPI int32_t U_EXPORT2
   1164 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
   1165     return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
   1166 }
   1167 
   1168 /* u_unescape & support fns ------------------------------------------------- */
   1169 
   1170 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
   1171 static const UChar UNESCAPE_MAP[] = {
   1172     /*"   0x22, 0x22 */
   1173     /*'   0x27, 0x27 */
   1174     /*?   0x3F, 0x3F */
   1175     /*\   0x5C, 0x5C */
   1176     /*a*/ 0x61, 0x07,
   1177     /*b*/ 0x62, 0x08,
   1178     /*e*/ 0x65, 0x1b,
   1179     /*f*/ 0x66, 0x0c,
   1180     /*n*/ 0x6E, 0x0a,
   1181     /*r*/ 0x72, 0x0d,
   1182     /*t*/ 0x74, 0x09,
   1183     /*v*/ 0x76, 0x0b
   1184 };
   1185 enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) };
   1186 
   1187 /* Convert one octal digit to a numeric value 0..7, or -1 on failure */
   1188 static int8_t _digit8(UChar c) {
   1189     if (c >= 0x0030 && c <= 0x0037) {
   1190         return (int8_t)(c - 0x0030);
   1191     }
   1192     return -1;
   1193 }
   1194 
   1195 /* Convert one hex digit to a numeric value 0..F, or -1 on failure */
   1196 static int8_t _digit16(UChar c) {
   1197     if (c >= 0x0030 && c <= 0x0039) {
   1198         return (int8_t)(c - 0x0030);
   1199     }
   1200     if (c >= 0x0041 && c <= 0x0046) {
   1201         return (int8_t)(c - (0x0041 - 10));
   1202     }
   1203     if (c >= 0x0061 && c <= 0x0066) {
   1204         return (int8_t)(c - (0x0061 - 10));
   1205     }
   1206     return -1;
   1207 }
   1208 
   1209 /* Parse a single escape sequence.  Although this method deals in
   1210  * UChars, it does not use C++ or UnicodeString.  This allows it to
   1211  * be used from C contexts. */
   1212 U_CAPI UChar32 U_EXPORT2
   1213 u_unescapeAt(UNESCAPE_CHAR_AT charAt,
   1214              int32_t *offset,
   1215              int32_t length,
   1216              void *context) {
   1217 
   1218     int32_t start = *offset;
   1219     UChar c;
   1220     UChar32 result = 0;
   1221     int8_t n = 0;
   1222     int8_t minDig = 0;
   1223     int8_t maxDig = 0;
   1224     int8_t bitsPerDigit = 4;
   1225     int8_t dig;
   1226     int32_t i;
   1227     UBool braces = FALSE;
   1228 
   1229     /* Check that offset is in range */
   1230     if (*offset < 0 || *offset >= length) {
   1231         goto err;
   1232     }
   1233 
   1234     /* Fetch first UChar after '\\' */
   1235     c = charAt((*offset)++, context);
   1236 
   1237     /* Convert hexadecimal and octal escapes */
   1238     switch (c) {
   1239     case 0x0075 /*'u'*/:
   1240         minDig = maxDig = 4;
   1241         break;
   1242     case 0x0055 /*'U'*/:
   1243         minDig = maxDig = 8;
   1244         break;
   1245     case 0x0078 /*'x'*/:
   1246         minDig = 1;
   1247         if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
   1248             ++(*offset);
   1249             braces = TRUE;
   1250             maxDig = 8;
   1251         } else {
   1252             maxDig = 2;
   1253         }
   1254         break;
   1255     default:
   1256         dig = _digit8(c);
   1257         if (dig >= 0) {
   1258             minDig = 1;
   1259             maxDig = 3;
   1260             n = 1; /* Already have first octal digit */
   1261             bitsPerDigit = 3;
   1262             result = dig;
   1263         }
   1264         break;
   1265     }
   1266     if (minDig != 0) {
   1267         while (*offset < length && n < maxDig) {
   1268             c = charAt(*offset, context);
   1269             dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
   1270             if (dig < 0) {
   1271                 break;
   1272             }
   1273             result = (result << bitsPerDigit) | dig;
   1274             ++(*offset);
   1275             ++n;
   1276         }
   1277         if (n < minDig) {
   1278             goto err;
   1279         }
   1280         if (braces) {
   1281             if (c != 0x7D /*}*/) {
   1282                 goto err;
   1283             }
   1284             ++(*offset);
   1285         }
   1286         if (result < 0 || result >= 0x110000) {
   1287             goto err;
   1288         }
   1289         /* If an escape sequence specifies a lead surrogate, see if
   1290          * there is a trail surrogate after it, either as an escape or
   1291          * as a literal.  If so, join them up into a supplementary.
   1292          */
   1293         if (*offset < length && U16_IS_LEAD(result)) {
   1294             int32_t ahead = *offset + 1;
   1295             c = charAt(*offset, context);
   1296             if (c == 0x5C /*'\\'*/ && ahead < length) {
   1297                 c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
   1298             }
   1299             if (U16_IS_TRAIL(c)) {
   1300                 *offset = ahead;
   1301                 result = U16_GET_SUPPLEMENTARY(result, c);
   1302             }
   1303         }
   1304         return result;
   1305     }
   1306 
   1307     /* Convert C-style escapes in table */
   1308     for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
   1309         if (c == UNESCAPE_MAP[i]) {
   1310             return UNESCAPE_MAP[i+1];
   1311         } else if (c < UNESCAPE_MAP[i]) {
   1312             break;
   1313         }
   1314     }
   1315 
   1316     /* Map \cX to control-X: X & 0x1F */
   1317     if (c == 0x0063 /*'c'*/ && *offset < length) {
   1318         c = charAt((*offset)++, context);
   1319         if (U16_IS_LEAD(c) && *offset < length) {
   1320             UChar c2 = charAt(*offset, context);
   1321             if (U16_IS_TRAIL(c2)) {
   1322                 ++(*offset);
   1323                 c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
   1324             }
   1325         }
   1326         return 0x1F & c;
   1327     }
   1328 
   1329     /* If no special forms are recognized, then consider
   1330      * the backslash to generically escape the next character.
   1331      * Deal with surrogate pairs. */
   1332     if (U16_IS_LEAD(c) && *offset < length) {
   1333         UChar c2 = charAt(*offset, context);
   1334         if (U16_IS_TRAIL(c2)) {
   1335             ++(*offset);
   1336             return U16_GET_SUPPLEMENTARY(c, c2);
   1337         }
   1338     }
   1339     return c;
   1340 
   1341  err:
   1342     /* Invalid escape sequence */
   1343     *offset = start; /* Reset to initial value */
   1344     return (UChar32)0xFFFFFFFF;
   1345 }
   1346 
   1347 /* u_unescapeAt() callback to return a UChar from a char* */
   1348 static UChar U_CALLCONV
   1349 _charPtr_charAt(int32_t offset, void *context) {
   1350     UChar c16;
   1351     /* It would be more efficient to access the invariant tables
   1352      * directly but there is no API for that. */
   1353     u_charsToUChars(((char*) context) + offset, &c16, 1);
   1354     return c16;
   1355 }
   1356 
   1357 /* Append an escape-free segment of the text; used by u_unescape() */
   1358 static void _appendUChars(UChar *dest, int32_t destCapacity,
   1359                           const char *src, int32_t srcLen) {
   1360     if (destCapacity < 0) {
   1361         destCapacity = 0;
   1362     }
   1363     if (srcLen > destCapacity) {
   1364         srcLen = destCapacity;
   1365     }
   1366     u_charsToUChars(src, dest, srcLen);
   1367 }
   1368 
   1369 /* Do an invariant conversion of char* -> UChar*, with escape parsing */
   1370 U_CAPI int32_t U_EXPORT2
   1371 u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
   1372     const char *segment = src;
   1373     int32_t i = 0;
   1374     char c;
   1375 
   1376     while ((c=*src) != 0) {
   1377         /* '\\' intentionally written as compiler-specific
   1378          * character constant to correspond to compiler-specific
   1379          * char* constants. */
   1380         if (c == '\\') {
   1381             int32_t lenParsed = 0;
   1382             UChar32 c32;
   1383             if (src != segment) {
   1384                 if (dest != NULL) {
   1385                     _appendUChars(dest + i, destCapacity - i,
   1386                                   segment, (int32_t)(src - segment));
   1387                 }
   1388                 i += (int32_t)(src - segment);
   1389             }
   1390             ++src; /* advance past '\\' */
   1391             c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
   1392             if (lenParsed == 0) {
   1393                 goto err;
   1394             }
   1395             src += lenParsed; /* advance past escape seq. */
   1396             if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
   1397                 U16_APPEND_UNSAFE(dest, i, c32);
   1398             } else {
   1399                 i += U16_LENGTH(c32);
   1400             }
   1401             segment = src;
   1402         } else {
   1403             ++src;
   1404         }
   1405     }
   1406     if (src != segment) {
   1407         if (dest != NULL) {
   1408             _appendUChars(dest + i, destCapacity - i,
   1409                           segment, (int32_t)(src - segment));
   1410         }
   1411         i += (int32_t)(src - segment);
   1412     }
   1413     if (dest != NULL && i < destCapacity) {
   1414         dest[i] = 0;
   1415     }
   1416     return i;
   1417 
   1418  err:
   1419     if (dest != NULL && destCapacity > 0) {
   1420         *dest = 0;
   1421     }
   1422     return 0;
   1423 }
   1424 
   1425 /* NUL-termination of strings ----------------------------------------------- */
   1426 
   1427 /**
   1428  * NUL-terminate a string no matter what its type.
   1429  * Set warning and error codes accordingly.
   1430  */
   1431 #define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode)      \
   1432     if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
   1433         /* not a public function, so no complete argument checking */   \
   1434                                                                         \
   1435         if(length<0) {                                                  \
   1436             /* assume that the caller handles this */                   \
   1437         } else if(length<destCapacity) {                                \
   1438             /* NUL-terminate the string, the NUL fits */                \
   1439             dest[length]=0;                                             \
   1440             /* unset the not-terminated warning but leave all others */ \
   1441             if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
   1442                 *pErrorCode=U_ZERO_ERROR;                               \
   1443             }                                                           \
   1444         } else if(length==destCapacity) {                               \
   1445             /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
   1446             *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;                \
   1447         } else /* length>destCapacity */ {                              \
   1448             /* even the string itself did not fit - set an error code */ \
   1449             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;                        \
   1450         }                                                               \
   1451     }
   1452 
   1453 U_CAPI int32_t U_EXPORT2
   1454 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1455     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1456     return length;
   1457 }
   1458 
   1459 U_CAPI int32_t U_EXPORT2
   1460 u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1461     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1462     return length;
   1463 }
   1464 
   1465 U_CAPI int32_t U_EXPORT2
   1466 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1467     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1468     return length;
   1469 }
   1470 
   1471 U_CAPI int32_t U_EXPORT2
   1472 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
   1473     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
   1474     return length;
   1475 }
   1476 
   1477 // Compute the hash code for a string -------------------------------------- ***
   1478 
   1479 // Moved here from uhash.c so that UnicodeString::hashCode() does not depend
   1480 // on UHashtable code.
   1481 
   1482 /*
   1483   Compute the hash by iterating sparsely over about 32 (up to 63)
   1484   characters spaced evenly through the string.  For each character,
   1485   multiply the previous hash value by a prime number and add the new
   1486   character in, like a linear congruential random number generator,
   1487   producing a pseudorandom deterministic value well distributed over
   1488   the output range. [LIU]
   1489 */
   1490 
   1491 #define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
   1492     uint32_t hash = 0;                        \
   1493     const TYPE *p = (const TYPE*) STR;        \
   1494     if (p != NULL) {                          \
   1495         int32_t len = (int32_t)(STRLEN);      \
   1496         int32_t inc = ((len - 32) / 32) + 1;  \
   1497         const TYPE *limit = p + len;          \
   1498         while (p<limit) {                     \
   1499             hash = (hash * 37) + DEREF;       \
   1500             p += inc;                         \
   1501         }                                     \
   1502     }                                         \
   1503     return static_cast<int32_t>(hash)
   1504 
   1505 /* Used by UnicodeString to compute its hashcode - Not public API. */
   1506 U_CAPI int32_t U_EXPORT2
   1507 ustr_hashUCharsN(const UChar *str, int32_t length) {
   1508     STRING_HASH(UChar, str, length, *p);
   1509 }
   1510 
   1511 U_CAPI int32_t U_EXPORT2
   1512 ustr_hashCharsN(const char *str, int32_t length) {
   1513     STRING_HASH(uint8_t, str, length, *p);
   1514 }
   1515 
   1516 U_CAPI int32_t U_EXPORT2
   1517 ustr_hashICharsN(const char *str, int32_t length) {
   1518     STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
   1519 }
   1520