1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 1998-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ****************************************************************************** 10 * 11 * File ustring.cpp 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 12/07/98 bertrand Creation. 17 ****************************************************************************** 18 */ 19 20 #include "unicode/utypes.h" 21 #include "unicode/putil.h" 22 #include "unicode/ustring.h" 23 #include "unicode/utf16.h" 24 #include "cstring.h" 25 #include "cwchar.h" 26 #include "cmemory.h" 27 #include "ustr_imp.h" 28 29 /* ANSI string.h - style functions ------------------------------------------ */ 30 31 /* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */ 32 #define U_BMP_MAX 0xffff 33 34 /* Forward binary string search functions ----------------------------------- */ 35 36 /* 37 * Test if a substring match inside a string is at code point boundaries. 38 * All pointers refer to the same buffer. 39 * The limit pointer may be NULL, all others must be real pointers. 40 */ 41 static inline UBool 42 isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) { 43 if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) { 44 /* the leading edge of the match is in the middle of a surrogate pair */ 45 return FALSE; 46 } 47 if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) { 48 /* the trailing edge of the match is in the middle of a surrogate pair */ 49 return FALSE; 50 } 51 return TRUE; 52 } 53 54 U_CAPI UChar * U_EXPORT2 55 u_strFindFirst(const UChar *s, int32_t length, 56 const UChar *sub, int32_t subLength) { 57 const UChar *start, *p, *q, *subLimit; 58 UChar c, cs, cq; 59 60 if(sub==NULL || subLength<-1) { 61 return (UChar *)s; 62 } 63 if(s==NULL || length<-1) { 64 return NULL; 65 } 66 67 start=s; 68 69 if(length<0 && subLength<0) { 70 /* both strings are NUL-terminated */ 71 if((cs=*sub++)==0) { 72 return (UChar *)s; 73 } 74 if(*sub==0 && !U16_IS_SURROGATE(cs)) { 75 /* the substring consists of a single, non-surrogate BMP code point */ 76 return u_strchr(s, cs); 77 } 78 79 while((c=*s++)!=0) { 80 if(c==cs) { 81 /* found first substring UChar, compare rest */ 82 p=s; 83 q=sub; 84 for(;;) { 85 if((cq=*q)==0) { 86 if(isMatchAtCPBoundary(start, s-1, p, NULL)) { 87 return (UChar *)(s-1); /* well-formed match */ 88 } else { 89 break; /* no match because surrogate pair is split */ 90 } 91 } 92 if((c=*p)==0) { 93 return NULL; /* no match, and none possible after s */ 94 } 95 if(c!=cq) { 96 break; /* no match */ 97 } 98 ++p; 99 ++q; 100 } 101 } 102 } 103 104 /* not found */ 105 return NULL; 106 } 107 108 if(subLength<0) { 109 subLength=u_strlen(sub); 110 } 111 if(subLength==0) { 112 return (UChar *)s; 113 } 114 115 /* get sub[0] to search for it fast */ 116 cs=*sub++; 117 --subLength; 118 subLimit=sub+subLength; 119 120 if(subLength==0 && !U16_IS_SURROGATE(cs)) { 121 /* the substring consists of a single, non-surrogate BMP code point */ 122 return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length); 123 } 124 125 if(length<0) { 126 /* s is NUL-terminated */ 127 while((c=*s++)!=0) { 128 if(c==cs) { 129 /* found first substring UChar, compare rest */ 130 p=s; 131 q=sub; 132 for(;;) { 133 if(q==subLimit) { 134 if(isMatchAtCPBoundary(start, s-1, p, NULL)) { 135 return (UChar *)(s-1); /* well-formed match */ 136 } else { 137 break; /* no match because surrogate pair is split */ 138 } 139 } 140 if((c=*p)==0) { 141 return NULL; /* no match, and none possible after s */ 142 } 143 if(c!=*q) { 144 break; /* no match */ 145 } 146 ++p; 147 ++q; 148 } 149 } 150 } 151 } else { 152 const UChar *limit, *preLimit; 153 154 /* subLength was decremented above */ 155 if(length<=subLength) { 156 return NULL; /* s is shorter than sub */ 157 } 158 159 limit=s+length; 160 161 /* the substring must start before preLimit */ 162 preLimit=limit-subLength; 163 164 while(s!=preLimit) { 165 c=*s++; 166 if(c==cs) { 167 /* found first substring UChar, compare rest */ 168 p=s; 169 q=sub; 170 for(;;) { 171 if(q==subLimit) { 172 if(isMatchAtCPBoundary(start, s-1, p, limit)) { 173 return (UChar *)(s-1); /* well-formed match */ 174 } else { 175 break; /* no match because surrogate pair is split */ 176 } 177 } 178 if(*p!=*q) { 179 break; /* no match */ 180 } 181 ++p; 182 ++q; 183 } 184 } 185 } 186 } 187 188 /* not found */ 189 return NULL; 190 } 191 192 U_CAPI UChar * U_EXPORT2 193 u_strstr(const UChar *s, const UChar *substring) { 194 return u_strFindFirst(s, -1, substring, -1); 195 } 196 197 U_CAPI UChar * U_EXPORT2 198 u_strchr(const UChar *s, UChar c) { 199 if(U16_IS_SURROGATE(c)) { 200 /* make sure to not find half of a surrogate pair */ 201 return u_strFindFirst(s, -1, &c, 1); 202 } else { 203 UChar cs; 204 205 /* trivial search for a BMP code point */ 206 for(;;) { 207 if((cs=*s)==c) { 208 return (UChar *)s; 209 } 210 if(cs==0) { 211 return NULL; 212 } 213 ++s; 214 } 215 } 216 } 217 218 U_CAPI UChar * U_EXPORT2 219 u_strchr32(const UChar *s, UChar32 c) { 220 if((uint32_t)c<=U_BMP_MAX) { 221 /* find BMP code point */ 222 return u_strchr(s, (UChar)c); 223 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 224 /* find supplementary code point as surrogate pair */ 225 UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c); 226 227 while((cs=*s++)!=0) { 228 if(cs==lead && *s==trail) { 229 return (UChar *)(s-1); 230 } 231 } 232 return NULL; 233 } else { 234 /* not a Unicode code point, not findable */ 235 return NULL; 236 } 237 } 238 239 U_CAPI UChar * U_EXPORT2 240 u_memchr(const UChar *s, UChar c, int32_t count) { 241 if(count<=0) { 242 return NULL; /* no string */ 243 } else if(U16_IS_SURROGATE(c)) { 244 /* make sure to not find half of a surrogate pair */ 245 return u_strFindFirst(s, count, &c, 1); 246 } else { 247 /* trivial search for a BMP code point */ 248 const UChar *limit=s+count; 249 do { 250 if(*s==c) { 251 return (UChar *)s; 252 } 253 } while(++s!=limit); 254 return NULL; 255 } 256 } 257 258 U_CAPI UChar * U_EXPORT2 259 u_memchr32(const UChar *s, UChar32 c, int32_t count) { 260 if((uint32_t)c<=U_BMP_MAX) { 261 /* find BMP code point */ 262 return u_memchr(s, (UChar)c, count); 263 } else if(count<2) { 264 /* too short for a surrogate pair */ 265 return NULL; 266 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 267 /* find supplementary code point as surrogate pair */ 268 const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */ 269 UChar lead=U16_LEAD(c), trail=U16_TRAIL(c); 270 271 do { 272 if(*s==lead && *(s+1)==trail) { 273 return (UChar *)s; 274 } 275 } while(++s!=limit); 276 return NULL; 277 } else { 278 /* not a Unicode code point, not findable */ 279 return NULL; 280 } 281 } 282 283 /* Backward binary string search functions ---------------------------------- */ 284 285 U_CAPI UChar * U_EXPORT2 286 u_strFindLast(const UChar *s, int32_t length, 287 const UChar *sub, int32_t subLength) { 288 const UChar *start, *limit, *p, *q, *subLimit; 289 UChar c, cs; 290 291 if(sub==NULL || subLength<-1) { 292 return (UChar *)s; 293 } 294 if(s==NULL || length<-1) { 295 return NULL; 296 } 297 298 /* 299 * This implementation is more lazy than the one for u_strFindFirst(): 300 * There is no special search code for NUL-terminated strings. 301 * It does not seem to be worth it for searching substrings to 302 * search forward and find all matches like in u_strrchr() and similar. 303 * Therefore, we simply get both string lengths and search backward. 304 * 305 * markus 2002oct23 306 */ 307 308 if(subLength<0) { 309 subLength=u_strlen(sub); 310 } 311 if(subLength==0) { 312 return (UChar *)s; 313 } 314 315 /* get sub[subLength-1] to search for it fast */ 316 subLimit=sub+subLength; 317 cs=*(--subLimit); 318 --subLength; 319 320 if(subLength==0 && !U16_IS_SURROGATE(cs)) { 321 /* the substring consists of a single, non-surrogate BMP code point */ 322 return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length); 323 } 324 325 if(length<0) { 326 length=u_strlen(s); 327 } 328 329 /* subLength was decremented above */ 330 if(length<=subLength) { 331 return NULL; /* s is shorter than sub */ 332 } 333 334 start=s; 335 limit=s+length; 336 337 /* the substring must start no later than s+subLength */ 338 s+=subLength; 339 340 while(s!=limit) { 341 c=*(--limit); 342 if(c==cs) { 343 /* found last substring UChar, compare rest */ 344 p=limit; 345 q=subLimit; 346 for(;;) { 347 if(q==sub) { 348 if(isMatchAtCPBoundary(start, p, limit+1, start+length)) { 349 return (UChar *)p; /* well-formed match */ 350 } else { 351 break; /* no match because surrogate pair is split */ 352 } 353 } 354 if(*(--p)!=*(--q)) { 355 break; /* no match */ 356 } 357 } 358 } 359 } 360 361 /* not found */ 362 return NULL; 363 } 364 365 U_CAPI UChar * U_EXPORT2 366 u_strrstr(const UChar *s, const UChar *substring) { 367 return u_strFindLast(s, -1, substring, -1); 368 } 369 370 U_CAPI UChar * U_EXPORT2 371 u_strrchr(const UChar *s, UChar c) { 372 if(U16_IS_SURROGATE(c)) { 373 /* make sure to not find half of a surrogate pair */ 374 return u_strFindLast(s, -1, &c, 1); 375 } else { 376 const UChar *result=NULL; 377 UChar cs; 378 379 /* trivial search for a BMP code point */ 380 for(;;) { 381 if((cs=*s)==c) { 382 result=s; 383 } 384 if(cs==0) { 385 return (UChar *)result; 386 } 387 ++s; 388 } 389 } 390 } 391 392 U_CAPI UChar * U_EXPORT2 393 u_strrchr32(const UChar *s, UChar32 c) { 394 if((uint32_t)c<=U_BMP_MAX) { 395 /* find BMP code point */ 396 return u_strrchr(s, (UChar)c); 397 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 398 /* find supplementary code point as surrogate pair */ 399 const UChar *result=NULL; 400 UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c); 401 402 while((cs=*s++)!=0) { 403 if(cs==lead && *s==trail) { 404 result=s-1; 405 } 406 } 407 return (UChar *)result; 408 } else { 409 /* not a Unicode code point, not findable */ 410 return NULL; 411 } 412 } 413 414 U_CAPI UChar * U_EXPORT2 415 u_memrchr(const UChar *s, UChar c, int32_t count) { 416 if(count<=0) { 417 return NULL; /* no string */ 418 } else if(U16_IS_SURROGATE(c)) { 419 /* make sure to not find half of a surrogate pair */ 420 return u_strFindLast(s, count, &c, 1); 421 } else { 422 /* trivial search for a BMP code point */ 423 const UChar *limit=s+count; 424 do { 425 if(*(--limit)==c) { 426 return (UChar *)limit; 427 } 428 } while(s!=limit); 429 return NULL; 430 } 431 } 432 433 U_CAPI UChar * U_EXPORT2 434 u_memrchr32(const UChar *s, UChar32 c, int32_t count) { 435 if((uint32_t)c<=U_BMP_MAX) { 436 /* find BMP code point */ 437 return u_memrchr(s, (UChar)c, count); 438 } else if(count<2) { 439 /* too short for a surrogate pair */ 440 return NULL; 441 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 442 /* find supplementary code point as surrogate pair */ 443 const UChar *limit=s+count-1; 444 UChar lead=U16_LEAD(c), trail=U16_TRAIL(c); 445 446 do { 447 if(*limit==trail && *(limit-1)==lead) { 448 return (UChar *)(limit-1); 449 } 450 } while(s!=--limit); 451 return NULL; 452 } else { 453 /* not a Unicode code point, not findable */ 454 return NULL; 455 } 456 } 457 458 /* Tokenization functions --------------------------------------------------- */ 459 460 /* 461 * Match each code point in a string against each code point in the matchSet. 462 * Return the index of the first string code point that 463 * is (polarity==TRUE) or is not (FALSE) contained in the matchSet. 464 * Return -(string length)-1 if there is no such code point. 465 */ 466 static int32_t 467 _matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) { 468 int32_t matchLen, matchBMPLen, strItr, matchItr; 469 UChar32 stringCh, matchCh; 470 UChar c, c2; 471 472 /* first part of matchSet contains only BMP code points */ 473 matchBMPLen = 0; 474 while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) { 475 ++matchBMPLen; 476 } 477 478 /* second part of matchSet contains BMP and supplementary code points */ 479 matchLen = matchBMPLen; 480 while(matchSet[matchLen] != 0) { 481 ++matchLen; 482 } 483 484 for(strItr = 0; (c = string[strItr]) != 0;) { 485 ++strItr; 486 if(U16_IS_SINGLE(c)) { 487 if(polarity) { 488 for(matchItr = 0; matchItr < matchLen; ++matchItr) { 489 if(c == matchSet[matchItr]) { 490 return strItr - 1; /* one matches */ 491 } 492 } 493 } else { 494 for(matchItr = 0; matchItr < matchLen; ++matchItr) { 495 if(c == matchSet[matchItr]) { 496 goto endloop; 497 } 498 } 499 return strItr - 1; /* none matches */ 500 } 501 } else { 502 /* 503 * No need to check for string length before U16_IS_TRAIL 504 * because c2 could at worst be the terminating NUL. 505 */ 506 if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) { 507 ++strItr; 508 stringCh = U16_GET_SUPPLEMENTARY(c, c2); 509 } else { 510 stringCh = c; /* unpaired trail surrogate */ 511 } 512 513 if(polarity) { 514 for(matchItr = matchBMPLen; matchItr < matchLen;) { 515 U16_NEXT(matchSet, matchItr, matchLen, matchCh); 516 if(stringCh == matchCh) { 517 return strItr - U16_LENGTH(stringCh); /* one matches */ 518 } 519 } 520 } else { 521 for(matchItr = matchBMPLen; matchItr < matchLen;) { 522 U16_NEXT(matchSet, matchItr, matchLen, matchCh); 523 if(stringCh == matchCh) { 524 goto endloop; 525 } 526 } 527 return strItr - U16_LENGTH(stringCh); /* none matches */ 528 } 529 } 530 endloop: 531 /* wish C had continue with labels like Java... */; 532 } 533 534 /* Didn't find it. */ 535 return -strItr-1; 536 } 537 538 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */ 539 U_CAPI UChar * U_EXPORT2 540 u_strpbrk(const UChar *string, const UChar *matchSet) 541 { 542 int32_t idx = _matchFromSet(string, matchSet, TRUE); 543 if(idx >= 0) { 544 return (UChar *)string + idx; 545 } else { 546 return NULL; 547 } 548 } 549 550 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */ 551 U_CAPI int32_t U_EXPORT2 552 u_strcspn(const UChar *string, const UChar *matchSet) 553 { 554 int32_t idx = _matchFromSet(string, matchSet, TRUE); 555 if(idx >= 0) { 556 return idx; 557 } else { 558 return -idx - 1; /* == u_strlen(string) */ 559 } 560 } 561 562 /* Search for a codepoint in a string that does not match one of the matchSet codepoints. */ 563 U_CAPI int32_t U_EXPORT2 564 u_strspn(const UChar *string, const UChar *matchSet) 565 { 566 int32_t idx = _matchFromSet(string, matchSet, FALSE); 567 if(idx >= 0) { 568 return idx; 569 } else { 570 return -idx - 1; /* == u_strlen(string) */ 571 } 572 } 573 574 /* ----- Text manipulation functions --- */ 575 576 U_CAPI UChar* U_EXPORT2 577 u_strtok_r(UChar *src, 578 const UChar *delim, 579 UChar **saveState) 580 { 581 UChar *tokSource; 582 UChar *nextToken; 583 uint32_t nonDelimIdx; 584 585 /* If saveState is NULL, the user messed up. */ 586 if (src != NULL) { 587 tokSource = src; 588 *saveState = src; /* Set to "src" in case there are no delimiters */ 589 } 590 else if (*saveState) { 591 tokSource = *saveState; 592 } 593 else { 594 /* src == NULL && *saveState == NULL */ 595 /* This shouldn't happen. We already finished tokenizing. */ 596 return NULL; 597 } 598 599 /* Skip initial delimiters */ 600 nonDelimIdx = u_strspn(tokSource, delim); 601 tokSource = &tokSource[nonDelimIdx]; 602 603 if (*tokSource) { 604 nextToken = u_strpbrk(tokSource, delim); 605 if (nextToken != NULL) { 606 /* Create a token */ 607 *(nextToken++) = 0; 608 *saveState = nextToken; 609 return tokSource; 610 } 611 else if (*saveState) { 612 /* Return the last token */ 613 *saveState = NULL; 614 return tokSource; 615 } 616 } 617 else { 618 /* No tokens were found. Only delimiters were left. */ 619 *saveState = NULL; 620 } 621 return NULL; 622 } 623 624 /* Miscellaneous functions -------------------------------------------------- */ 625 626 U_CAPI UChar* U_EXPORT2 627 u_strcat(UChar *dst, 628 const UChar *src) 629 { 630 UChar *anchor = dst; /* save a pointer to start of dst */ 631 632 while(*dst != 0) { /* To end of first string */ 633 ++dst; 634 } 635 while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */ 636 } 637 638 return anchor; 639 } 640 641 U_CAPI UChar* U_EXPORT2 642 u_strncat(UChar *dst, 643 const UChar *src, 644 int32_t n ) 645 { 646 if(n > 0) { 647 UChar *anchor = dst; /* save a pointer to start of dst */ 648 649 while(*dst != 0) { /* To end of first string */ 650 ++dst; 651 } 652 while((*dst = *src) != 0) { /* copy string 2 over */ 653 ++dst; 654 if(--n == 0) { 655 *dst = 0; 656 break; 657 } 658 ++src; 659 } 660 661 return anchor; 662 } else { 663 return dst; 664 } 665 } 666 667 /* ----- Text property functions --- */ 668 669 U_CAPI int32_t U_EXPORT2 670 u_strcmp(const UChar *s1, 671 const UChar *s2) 672 { 673 UChar c1, c2; 674 675 for(;;) { 676 c1=*s1++; 677 c2=*s2++; 678 if (c1 != c2 || c1 == 0) { 679 break; 680 } 681 } 682 return (int32_t)c1 - (int32_t)c2; 683 } 684 685 U_CFUNC int32_t U_EXPORT2 686 uprv_strCompare(const UChar *s1, int32_t length1, 687 const UChar *s2, int32_t length2, 688 UBool strncmpStyle, UBool codePointOrder) { 689 const UChar *start1, *start2, *limit1, *limit2; 690 UChar c1, c2; 691 692 /* setup for fix-up */ 693 start1=s1; 694 start2=s2; 695 696 /* compare identical prefixes - they do not need to be fixed up */ 697 if(length1<0 && length2<0) { 698 /* strcmp style, both NUL-terminated */ 699 if(s1==s2) { 700 return 0; 701 } 702 703 for(;;) { 704 c1=*s1; 705 c2=*s2; 706 if(c1!=c2) { 707 break; 708 } 709 if(c1==0) { 710 return 0; 711 } 712 ++s1; 713 ++s2; 714 } 715 716 /* setup for fix-up */ 717 limit1=limit2=NULL; 718 } else if(strncmpStyle) { 719 /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */ 720 if(s1==s2) { 721 return 0; 722 } 723 724 limit1=start1+length1; 725 726 for(;;) { 727 /* both lengths are same, check only one limit */ 728 if(s1==limit1) { 729 return 0; 730 } 731 732 c1=*s1; 733 c2=*s2; 734 if(c1!=c2) { 735 break; 736 } 737 if(c1==0) { 738 return 0; 739 } 740 ++s1; 741 ++s2; 742 } 743 744 /* setup for fix-up */ 745 limit2=start2+length1; /* use length1 here, too, to enforce assumption */ 746 } else { 747 /* memcmp/UnicodeString style, both length-specified */ 748 int32_t lengthResult; 749 750 if(length1<0) { 751 length1=u_strlen(s1); 752 } 753 if(length2<0) { 754 length2=u_strlen(s2); 755 } 756 757 /* limit1=start1+min(lenght1, length2) */ 758 if(length1<length2) { 759 lengthResult=-1; 760 limit1=start1+length1; 761 } else if(length1==length2) { 762 lengthResult=0; 763 limit1=start1+length1; 764 } else /* length1>length2 */ { 765 lengthResult=1; 766 limit1=start1+length2; 767 } 768 769 if(s1==s2) { 770 return lengthResult; 771 } 772 773 for(;;) { 774 /* check pseudo-limit */ 775 if(s1==limit1) { 776 return lengthResult; 777 } 778 779 c1=*s1; 780 c2=*s2; 781 if(c1!=c2) { 782 break; 783 } 784 ++s1; 785 ++s2; 786 } 787 788 /* setup for fix-up */ 789 limit1=start1+length1; 790 limit2=start2+length2; 791 } 792 793 /* if both values are in or above the surrogate range, fix them up */ 794 if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { 795 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 796 if( 797 (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) || 798 (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1))) 799 ) { 800 /* part of a surrogate pair, leave >=d800 */ 801 } else { 802 /* BMP code point - may be surrogate code point - make <d800 */ 803 c1-=0x2800; 804 } 805 806 if( 807 (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) || 808 (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1))) 809 ) { 810 /* part of a surrogate pair, leave >=d800 */ 811 } else { 812 /* BMP code point - may be surrogate code point - make <d800 */ 813 c2-=0x2800; 814 } 815 } 816 817 /* now c1 and c2 are in the requested (code unit or code point) order */ 818 return (int32_t)c1-(int32_t)c2; 819 } 820 821 /* 822 * Compare two strings as presented by UCharIterators. 823 * Use code unit or code point order. 824 * When the function returns, it is undefined where the iterators 825 * have stopped. 826 */ 827 U_CAPI int32_t U_EXPORT2 828 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) { 829 UChar32 c1, c2; 830 831 /* argument checking */ 832 if(iter1==NULL || iter2==NULL) { 833 return 0; /* bad arguments */ 834 } 835 if(iter1==iter2) { 836 return 0; /* identical iterators */ 837 } 838 839 /* reset iterators to start? */ 840 iter1->move(iter1, 0, UITER_START); 841 iter2->move(iter2, 0, UITER_START); 842 843 /* compare identical prefixes - they do not need to be fixed up */ 844 for(;;) { 845 c1=iter1->next(iter1); 846 c2=iter2->next(iter2); 847 if(c1!=c2) { 848 break; 849 } 850 if(c1==-1) { 851 return 0; 852 } 853 } 854 855 /* if both values are in or above the surrogate range, fix them up */ 856 if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { 857 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 858 if( 859 (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) || 860 (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1)))) 861 ) { 862 /* part of a surrogate pair, leave >=d800 */ 863 } else { 864 /* BMP code point - may be surrogate code point - make <d800 */ 865 c1-=0x2800; 866 } 867 868 if( 869 (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) || 870 (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2)))) 871 ) { 872 /* part of a surrogate pair, leave >=d800 */ 873 } else { 874 /* BMP code point - may be surrogate code point - make <d800 */ 875 c2-=0x2800; 876 } 877 } 878 879 /* now c1 and c2 are in the requested (code unit or code point) order */ 880 return (int32_t)c1-(int32_t)c2; 881 } 882 883 #if 0 884 /* 885 * u_strCompareIter() does not leave the iterators _on_ the different units. 886 * This is possible but would cost a few extra indirect function calls to back 887 * up if the last unit (c1 or c2 respectively) was >=0. 888 * 889 * Consistently leaving them _behind_ the different units is not an option 890 * because the current "unit" is the end of the string if that is reached, 891 * and in such a case the iterator does not move. 892 * For example, when comparing "ab" with "abc", both iterators rest _on_ the end 893 * of their strings. Calling previous() on each does not move them to where 894 * the comparison fails. 895 * 896 * So the simplest semantics is to not define where the iterators end up. 897 * 898 * The following fragment is part of what would need to be done for backing up. 899 */ 900 void fragment { 901 /* iff a surrogate is part of a surrogate pair, leave >=d800 */ 902 if(c1<=0xdbff) { 903 if(!U16_IS_TRAIL(iter1->current(iter1))) { 904 /* lead surrogate code point - make <d800 */ 905 c1-=0x2800; 906 } 907 } else if(c1<=0xdfff) { 908 int32_t idx=iter1->getIndex(iter1, UITER_CURRENT); 909 iter1->previous(iter1); /* ==c1 */ 910 if(!U16_IS_LEAD(iter1->previous(iter1))) { 911 /* trail surrogate code point - make <d800 */ 912 c1-=0x2800; 913 } 914 /* go back to behind where the difference is */ 915 iter1->move(iter1, idx, UITER_ZERO); 916 } else /* 0xe000<=c1<=0xffff */ { 917 /* BMP code point - make <d800 */ 918 c1-=0x2800; 919 } 920 } 921 #endif 922 923 U_CAPI int32_t U_EXPORT2 924 u_strCompare(const UChar *s1, int32_t length1, 925 const UChar *s2, int32_t length2, 926 UBool codePointOrder) { 927 /* argument checking */ 928 if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { 929 return 0; 930 } 931 return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder); 932 } 933 934 /* String compare in code point order - u_strcmp() compares in code unit order. */ 935 U_CAPI int32_t U_EXPORT2 936 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) { 937 return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE); 938 } 939 940 U_CAPI int32_t U_EXPORT2 941 u_strncmp(const UChar *s1, 942 const UChar *s2, 943 int32_t n) 944 { 945 if(n > 0) { 946 int32_t rc; 947 for(;;) { 948 rc = (int32_t)*s1 - (int32_t)*s2; 949 if(rc != 0 || *s1 == 0 || --n == 0) { 950 return rc; 951 } 952 ++s1; 953 ++s2; 954 } 955 } else { 956 return 0; 957 } 958 } 959 960 U_CAPI int32_t U_EXPORT2 961 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) { 962 return uprv_strCompare(s1, n, s2, n, TRUE, TRUE); 963 } 964 965 U_CAPI UChar* U_EXPORT2 966 u_strcpy(UChar *dst, 967 const UChar *src) 968 { 969 UChar *anchor = dst; /* save a pointer to start of dst */ 970 971 while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */ 972 } 973 974 return anchor; 975 } 976 977 U_CAPI UChar* U_EXPORT2 978 u_strncpy(UChar *dst, 979 const UChar *src, 980 int32_t n) 981 { 982 UChar *anchor = dst; /* save a pointer to start of dst */ 983 984 /* copy string 2 over */ 985 while(n > 0 && (*(dst++) = *(src++)) != 0) { 986 --n; 987 } 988 989 return anchor; 990 } 991 992 U_CAPI int32_t U_EXPORT2 993 u_strlen(const UChar *s) 994 { 995 #if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR 996 return (int32_t)uprv_wcslen((const wchar_t *)s); 997 #else 998 const UChar *t = s; 999 while(*t != 0) { 1000 ++t; 1001 } 1002 return t - s; 1003 #endif 1004 } 1005 1006 U_CAPI int32_t U_EXPORT2 1007 u_countChar32(const UChar *s, int32_t length) { 1008 int32_t count; 1009 1010 if(s==NULL || length<-1) { 1011 return 0; 1012 } 1013 1014 count=0; 1015 if(length>=0) { 1016 while(length>0) { 1017 ++count; 1018 if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) { 1019 s+=2; 1020 length-=2; 1021 } else { 1022 ++s; 1023 --length; 1024 } 1025 } 1026 } else /* length==-1 */ { 1027 UChar c; 1028 1029 for(;;) { 1030 if((c=*s++)==0) { 1031 break; 1032 } 1033 ++count; 1034 1035 /* 1036 * sufficient to look ahead one because of UTF-16; 1037 * safe to look ahead one because at worst that would be the terminating NUL 1038 */ 1039 if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { 1040 ++s; 1041 } 1042 } 1043 } 1044 return count; 1045 } 1046 1047 U_CAPI UBool U_EXPORT2 1048 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) { 1049 1050 if(number<0) { 1051 return TRUE; 1052 } 1053 if(s==NULL || length<-1) { 1054 return FALSE; 1055 } 1056 1057 if(length==-1) { 1058 /* s is NUL-terminated */ 1059 UChar c; 1060 1061 /* count code points until they exceed */ 1062 for(;;) { 1063 if((c=*s++)==0) { 1064 return FALSE; 1065 } 1066 if(number==0) { 1067 return TRUE; 1068 } 1069 if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { 1070 ++s; 1071 } 1072 --number; 1073 } 1074 } else { 1075 /* length>=0 known */ 1076 const UChar *limit; 1077 int32_t maxSupplementary; 1078 1079 /* s contains at least (length+1)/2 code points: <=2 UChars per cp */ 1080 if(((length+1)/2)>number) { 1081 return TRUE; 1082 } 1083 1084 /* check if s does not even contain enough UChars */ 1085 maxSupplementary=length-number; 1086 if(maxSupplementary<=0) { 1087 return FALSE; 1088 } 1089 /* there are maxSupplementary=length-number more UChars than asked-for code points */ 1090 1091 /* 1092 * count code points until they exceed and also check that there are 1093 * no more than maxSupplementary supplementary code points (UChar pairs) 1094 */ 1095 limit=s+length; 1096 for(;;) { 1097 if(s==limit) { 1098 return FALSE; 1099 } 1100 if(number==0) { 1101 return TRUE; 1102 } 1103 if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) { 1104 ++s; 1105 if(--maxSupplementary<=0) { 1106 /* too many pairs - too few code points */ 1107 return FALSE; 1108 } 1109 } 1110 --number; 1111 } 1112 } 1113 } 1114 1115 U_CAPI UChar * U_EXPORT2 1116 u_memcpy(UChar *dest, const UChar *src, int32_t count) { 1117 if(count > 0) { 1118 uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR); 1119 } 1120 return dest; 1121 } 1122 1123 U_CAPI UChar * U_EXPORT2 1124 u_memmove(UChar *dest, const UChar *src, int32_t count) { 1125 if(count > 0) { 1126 uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR); 1127 } 1128 return dest; 1129 } 1130 1131 U_CAPI UChar * U_EXPORT2 1132 u_memset(UChar *dest, UChar c, int32_t count) { 1133 if(count > 0) { 1134 UChar *ptr = dest; 1135 UChar *limit = dest + count; 1136 1137 while (ptr < limit) { 1138 *(ptr++) = c; 1139 } 1140 } 1141 return dest; 1142 } 1143 1144 U_CAPI int32_t U_EXPORT2 1145 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) { 1146 if(count > 0) { 1147 const UChar *limit = buf1 + count; 1148 int32_t result; 1149 1150 while (buf1 < limit) { 1151 result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2; 1152 if (result != 0) { 1153 return result; 1154 } 1155 buf1++; 1156 buf2++; 1157 } 1158 } 1159 return 0; 1160 } 1161 1162 U_CAPI int32_t U_EXPORT2 1163 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) { 1164 return uprv_strCompare(s1, count, s2, count, FALSE, TRUE); 1165 } 1166 1167 /* u_unescape & support fns ------------------------------------------------- */ 1168 1169 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ 1170 static const UChar UNESCAPE_MAP[] = { 1171 /*" 0x22, 0x22 */ 1172 /*' 0x27, 0x27 */ 1173 /*? 0x3F, 0x3F */ 1174 /*\ 0x5C, 0x5C */ 1175 /*a*/ 0x61, 0x07, 1176 /*b*/ 0x62, 0x08, 1177 /*e*/ 0x65, 0x1b, 1178 /*f*/ 0x66, 0x0c, 1179 /*n*/ 0x6E, 0x0a, 1180 /*r*/ 0x72, 0x0d, 1181 /*t*/ 0x74, 0x09, 1182 /*v*/ 0x76, 0x0b 1183 }; 1184 enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) }; 1185 1186 /* Convert one octal digit to a numeric value 0..7, or -1 on failure */ 1187 static int8_t _digit8(UChar c) { 1188 if (c >= 0x0030 && c <= 0x0037) { 1189 return (int8_t)(c - 0x0030); 1190 } 1191 return -1; 1192 } 1193 1194 /* Convert one hex digit to a numeric value 0..F, or -1 on failure */ 1195 static int8_t _digit16(UChar c) { 1196 if (c >= 0x0030 && c <= 0x0039) { 1197 return (int8_t)(c - 0x0030); 1198 } 1199 if (c >= 0x0041 && c <= 0x0046) { 1200 return (int8_t)(c - (0x0041 - 10)); 1201 } 1202 if (c >= 0x0061 && c <= 0x0066) { 1203 return (int8_t)(c - (0x0061 - 10)); 1204 } 1205 return -1; 1206 } 1207 1208 /* Parse a single escape sequence. Although this method deals in 1209 * UChars, it does not use C++ or UnicodeString. This allows it to 1210 * be used from C contexts. */ 1211 U_CAPI UChar32 U_EXPORT2 1212 u_unescapeAt(UNESCAPE_CHAR_AT charAt, 1213 int32_t *offset, 1214 int32_t length, 1215 void *context) { 1216 1217 int32_t start = *offset; 1218 UChar c; 1219 UChar32 result = 0; 1220 int8_t n = 0; 1221 int8_t minDig = 0; 1222 int8_t maxDig = 0; 1223 int8_t bitsPerDigit = 4; 1224 int8_t dig; 1225 int32_t i; 1226 UBool braces = FALSE; 1227 1228 /* Check that offset is in range */ 1229 if (*offset < 0 || *offset >= length) { 1230 goto err; 1231 } 1232 1233 /* Fetch first UChar after '\\' */ 1234 c = charAt((*offset)++, context); 1235 1236 /* Convert hexadecimal and octal escapes */ 1237 switch (c) { 1238 case 0x0075 /*'u'*/: 1239 minDig = maxDig = 4; 1240 break; 1241 case 0x0055 /*'U'*/: 1242 minDig = maxDig = 8; 1243 break; 1244 case 0x0078 /*'x'*/: 1245 minDig = 1; 1246 if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) { 1247 ++(*offset); 1248 braces = TRUE; 1249 maxDig = 8; 1250 } else { 1251 maxDig = 2; 1252 } 1253 break; 1254 default: 1255 dig = _digit8(c); 1256 if (dig >= 0) { 1257 minDig = 1; 1258 maxDig = 3; 1259 n = 1; /* Already have first octal digit */ 1260 bitsPerDigit = 3; 1261 result = dig; 1262 } 1263 break; 1264 } 1265 if (minDig != 0) { 1266 while (*offset < length && n < maxDig) { 1267 c = charAt(*offset, context); 1268 dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c)); 1269 if (dig < 0) { 1270 break; 1271 } 1272 result = (result << bitsPerDigit) | dig; 1273 ++(*offset); 1274 ++n; 1275 } 1276 if (n < minDig) { 1277 goto err; 1278 } 1279 if (braces) { 1280 if (c != 0x7D /*}*/) { 1281 goto err; 1282 } 1283 ++(*offset); 1284 } 1285 if (result < 0 || result >= 0x110000) { 1286 goto err; 1287 } 1288 /* If an escape sequence specifies a lead surrogate, see if 1289 * there is a trail surrogate after it, either as an escape or 1290 * as a literal. If so, join them up into a supplementary. 1291 */ 1292 if (*offset < length && U16_IS_LEAD(result)) { 1293 int32_t ahead = *offset + 1; 1294 c = charAt(*offset, context); 1295 if (c == 0x5C /*'\\'*/ && ahead < length) { 1296 c = (UChar) u_unescapeAt(charAt, &ahead, length, context); 1297 } 1298 if (U16_IS_TRAIL(c)) { 1299 *offset = ahead; 1300 result = U16_GET_SUPPLEMENTARY(result, c); 1301 } 1302 } 1303 return result; 1304 } 1305 1306 /* Convert C-style escapes in table */ 1307 for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) { 1308 if (c == UNESCAPE_MAP[i]) { 1309 return UNESCAPE_MAP[i+1]; 1310 } else if (c < UNESCAPE_MAP[i]) { 1311 break; 1312 } 1313 } 1314 1315 /* Map \cX to control-X: X & 0x1F */ 1316 if (c == 0x0063 /*'c'*/ && *offset < length) { 1317 c = charAt((*offset)++, context); 1318 if (U16_IS_LEAD(c) && *offset < length) { 1319 UChar c2 = charAt(*offset, context); 1320 if (U16_IS_TRAIL(c2)) { 1321 ++(*offset); 1322 c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */ 1323 } 1324 } 1325 return 0x1F & c; 1326 } 1327 1328 /* If no special forms are recognized, then consider 1329 * the backslash to generically escape the next character. 1330 * Deal with surrogate pairs. */ 1331 if (U16_IS_LEAD(c) && *offset < length) { 1332 UChar c2 = charAt(*offset, context); 1333 if (U16_IS_TRAIL(c2)) { 1334 ++(*offset); 1335 return U16_GET_SUPPLEMENTARY(c, c2); 1336 } 1337 } 1338 return c; 1339 1340 err: 1341 /* Invalid escape sequence */ 1342 *offset = start; /* Reset to initial value */ 1343 return (UChar32)0xFFFFFFFF; 1344 } 1345 1346 /* u_unescapeAt() callback to return a UChar from a char* */ 1347 static UChar U_CALLCONV 1348 _charPtr_charAt(int32_t offset, void *context) { 1349 UChar c16; 1350 /* It would be more efficient to access the invariant tables 1351 * directly but there is no API for that. */ 1352 u_charsToUChars(((char*) context) + offset, &c16, 1); 1353 return c16; 1354 } 1355 1356 /* Append an escape-free segment of the text; used by u_unescape() */ 1357 static void _appendUChars(UChar *dest, int32_t destCapacity, 1358 const char *src, int32_t srcLen) { 1359 if (destCapacity < 0) { 1360 destCapacity = 0; 1361 } 1362 if (srcLen > destCapacity) { 1363 srcLen = destCapacity; 1364 } 1365 u_charsToUChars(src, dest, srcLen); 1366 } 1367 1368 /* Do an invariant conversion of char* -> UChar*, with escape parsing */ 1369 U_CAPI int32_t U_EXPORT2 1370 u_unescape(const char *src, UChar *dest, int32_t destCapacity) { 1371 const char *segment = src; 1372 int32_t i = 0; 1373 char c; 1374 1375 while ((c=*src) != 0) { 1376 /* '\\' intentionally written as compiler-specific 1377 * character constant to correspond to compiler-specific 1378 * char* constants. */ 1379 if (c == '\\') { 1380 int32_t lenParsed = 0; 1381 UChar32 c32; 1382 if (src != segment) { 1383 if (dest != NULL) { 1384 _appendUChars(dest + i, destCapacity - i, 1385 segment, (int32_t)(src - segment)); 1386 } 1387 i += (int32_t)(src - segment); 1388 } 1389 ++src; /* advance past '\\' */ 1390 c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src); 1391 if (lenParsed == 0) { 1392 goto err; 1393 } 1394 src += lenParsed; /* advance past escape seq. */ 1395 if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) { 1396 U16_APPEND_UNSAFE(dest, i, c32); 1397 } else { 1398 i += U16_LENGTH(c32); 1399 } 1400 segment = src; 1401 } else { 1402 ++src; 1403 } 1404 } 1405 if (src != segment) { 1406 if (dest != NULL) { 1407 _appendUChars(dest + i, destCapacity - i, 1408 segment, (int32_t)(src - segment)); 1409 } 1410 i += (int32_t)(src - segment); 1411 } 1412 if (dest != NULL && i < destCapacity) { 1413 dest[i] = 0; 1414 } 1415 return i; 1416 1417 err: 1418 if (dest != NULL && destCapacity > 0) { 1419 *dest = 0; 1420 } 1421 return 0; 1422 } 1423 1424 /* NUL-termination of strings ----------------------------------------------- */ 1425 1426 /** 1427 * NUL-terminate a string no matter what its type. 1428 * Set warning and error codes accordingly. 1429 */ 1430 #define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) \ 1431 if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \ 1432 /* not a public function, so no complete argument checking */ \ 1433 \ 1434 if(length<0) { \ 1435 /* assume that the caller handles this */ \ 1436 } else if(length<destCapacity) { \ 1437 /* NUL-terminate the string, the NUL fits */ \ 1438 dest[length]=0; \ 1439 /* unset the not-terminated warning but leave all others */ \ 1440 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \ 1441 *pErrorCode=U_ZERO_ERROR; \ 1442 } \ 1443 } else if(length==destCapacity) { \ 1444 /* unable to NUL-terminate, but the string itself fit - set a warning code */ \ 1445 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \ 1446 } else /* length>destCapacity */ { \ 1447 /* even the string itself did not fit - set an error code */ \ 1448 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; \ 1449 } \ 1450 } 1451 1452 U_CAPI int32_t U_EXPORT2 1453 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1454 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1455 return length; 1456 } 1457 1458 U_CAPI int32_t U_EXPORT2 1459 u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1460 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1461 return length; 1462 } 1463 1464 U_CAPI int32_t U_EXPORT2 1465 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1466 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1467 return length; 1468 } 1469 1470 U_CAPI int32_t U_EXPORT2 1471 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1472 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1473 return length; 1474 } 1475 1476 // Compute the hash code for a string -------------------------------------- *** 1477 1478 // Moved here from uhash.c so that UnicodeString::hashCode() does not depend 1479 // on UHashtable code. 1480 1481 /* 1482 Compute the hash by iterating sparsely over about 32 (up to 63) 1483 characters spaced evenly through the string. For each character, 1484 multiply the previous hash value by a prime number and add the new 1485 character in, like a linear congruential random number generator, 1486 producing a pseudorandom deterministic value well distributed over 1487 the output range. [LIU] 1488 */ 1489 1490 #define STRING_HASH(TYPE, STR, STRLEN, DEREF) \ 1491 uint32_t hash = 0; \ 1492 const TYPE *p = (const TYPE*) STR; \ 1493 if (p != NULL) { \ 1494 int32_t len = (int32_t)(STRLEN); \ 1495 int32_t inc = ((len - 32) / 32) + 1; \ 1496 const TYPE *limit = p + len; \ 1497 while (p<limit) { \ 1498 hash = (hash * 37) + DEREF; \ 1499 p += inc; \ 1500 } \ 1501 } \ 1502 return static_cast<int32_t>(hash) 1503 1504 /* Used by UnicodeString to compute its hashcode - Not public API. */ 1505 U_CAPI int32_t U_EXPORT2 1506 ustr_hashUCharsN(const UChar *str, int32_t length) { 1507 STRING_HASH(UChar, str, length, *p); 1508 } 1509 1510 U_CAPI int32_t U_EXPORT2 1511 ustr_hashCharsN(const char *str, int32_t length) { 1512 STRING_HASH(uint8_t, str, length, *p); 1513 } 1514 1515 U_CAPI int32_t U_EXPORT2 1516 ustr_hashICharsN(const char *str, int32_t length) { 1517 STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p)); 1518 } 1519