1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 1998-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ****************************************************************************** 10 * 11 * File ustring.cpp 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 12/07/98 bertrand Creation. 17 ****************************************************************************** 18 */ 19 20 #include "unicode/utypes.h" 21 #include "unicode/putil.h" 22 #include "unicode/uchar.h" 23 #include "unicode/ustring.h" 24 #include "unicode/utf16.h" 25 #include "cstring.h" 26 #include "cwchar.h" 27 #include "cmemory.h" 28 #include "ustr_imp.h" 29 30 /* ANSI string.h - style functions ------------------------------------------ */ 31 32 /* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */ 33 #define U_BMP_MAX 0xffff 34 35 /* Forward binary string search functions ----------------------------------- */ 36 37 /* 38 * Test if a substring match inside a string is at code point boundaries. 39 * All pointers refer to the same buffer. 40 * The limit pointer may be NULL, all others must be real pointers. 41 */ 42 static inline UBool 43 isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) { 44 if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) { 45 /* the leading edge of the match is in the middle of a surrogate pair */ 46 return FALSE; 47 } 48 if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) { 49 /* the trailing edge of the match is in the middle of a surrogate pair */ 50 return FALSE; 51 } 52 return TRUE; 53 } 54 55 U_CAPI UChar * U_EXPORT2 56 u_strFindFirst(const UChar *s, int32_t length, 57 const UChar *sub, int32_t subLength) { 58 const UChar *start, *p, *q, *subLimit; 59 UChar c, cs, cq; 60 61 if(sub==NULL || subLength<-1) { 62 return (UChar *)s; 63 } 64 if(s==NULL || length<-1) { 65 return NULL; 66 } 67 68 start=s; 69 70 if(length<0 && subLength<0) { 71 /* both strings are NUL-terminated */ 72 if((cs=*sub++)==0) { 73 return (UChar *)s; 74 } 75 if(*sub==0 && !U16_IS_SURROGATE(cs)) { 76 /* the substring consists of a single, non-surrogate BMP code point */ 77 return u_strchr(s, cs); 78 } 79 80 while((c=*s++)!=0) { 81 if(c==cs) { 82 /* found first substring UChar, compare rest */ 83 p=s; 84 q=sub; 85 for(;;) { 86 if((cq=*q)==0) { 87 if(isMatchAtCPBoundary(start, s-1, p, NULL)) { 88 return (UChar *)(s-1); /* well-formed match */ 89 } else { 90 break; /* no match because surrogate pair is split */ 91 } 92 } 93 if((c=*p)==0) { 94 return NULL; /* no match, and none possible after s */ 95 } 96 if(c!=cq) { 97 break; /* no match */ 98 } 99 ++p; 100 ++q; 101 } 102 } 103 } 104 105 /* not found */ 106 return NULL; 107 } 108 109 if(subLength<0) { 110 subLength=u_strlen(sub); 111 } 112 if(subLength==0) { 113 return (UChar *)s; 114 } 115 116 /* get sub[0] to search for it fast */ 117 cs=*sub++; 118 --subLength; 119 subLimit=sub+subLength; 120 121 if(subLength==0 && !U16_IS_SURROGATE(cs)) { 122 /* the substring consists of a single, non-surrogate BMP code point */ 123 return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length); 124 } 125 126 if(length<0) { 127 /* s is NUL-terminated */ 128 while((c=*s++)!=0) { 129 if(c==cs) { 130 /* found first substring UChar, compare rest */ 131 p=s; 132 q=sub; 133 for(;;) { 134 if(q==subLimit) { 135 if(isMatchAtCPBoundary(start, s-1, p, NULL)) { 136 return (UChar *)(s-1); /* well-formed match */ 137 } else { 138 break; /* no match because surrogate pair is split */ 139 } 140 } 141 if((c=*p)==0) { 142 return NULL; /* no match, and none possible after s */ 143 } 144 if(c!=*q) { 145 break; /* no match */ 146 } 147 ++p; 148 ++q; 149 } 150 } 151 } 152 } else { 153 const UChar *limit, *preLimit; 154 155 /* subLength was decremented above */ 156 if(length<=subLength) { 157 return NULL; /* s is shorter than sub */ 158 } 159 160 limit=s+length; 161 162 /* the substring must start before preLimit */ 163 preLimit=limit-subLength; 164 165 while(s!=preLimit) { 166 c=*s++; 167 if(c==cs) { 168 /* found first substring UChar, compare rest */ 169 p=s; 170 q=sub; 171 for(;;) { 172 if(q==subLimit) { 173 if(isMatchAtCPBoundary(start, s-1, p, limit)) { 174 return (UChar *)(s-1); /* well-formed match */ 175 } else { 176 break; /* no match because surrogate pair is split */ 177 } 178 } 179 if(*p!=*q) { 180 break; /* no match */ 181 } 182 ++p; 183 ++q; 184 } 185 } 186 } 187 } 188 189 /* not found */ 190 return NULL; 191 } 192 193 U_CAPI UChar * U_EXPORT2 194 u_strstr(const UChar *s, const UChar *substring) { 195 return u_strFindFirst(s, -1, substring, -1); 196 } 197 198 U_CAPI UChar * U_EXPORT2 199 u_strchr(const UChar *s, UChar c) { 200 if(U16_IS_SURROGATE(c)) { 201 /* make sure to not find half of a surrogate pair */ 202 return u_strFindFirst(s, -1, &c, 1); 203 } else { 204 UChar cs; 205 206 /* trivial search for a BMP code point */ 207 for(;;) { 208 if((cs=*s)==c) { 209 return (UChar *)s; 210 } 211 if(cs==0) { 212 return NULL; 213 } 214 ++s; 215 } 216 } 217 } 218 219 U_CAPI UChar * U_EXPORT2 220 u_strchr32(const UChar *s, UChar32 c) { 221 if((uint32_t)c<=U_BMP_MAX) { 222 /* find BMP code point */ 223 return u_strchr(s, (UChar)c); 224 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 225 /* find supplementary code point as surrogate pair */ 226 UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c); 227 228 while((cs=*s++)!=0) { 229 if(cs==lead && *s==trail) { 230 return (UChar *)(s-1); 231 } 232 } 233 return NULL; 234 } else { 235 /* not a Unicode code point, not findable */ 236 return NULL; 237 } 238 } 239 240 U_CAPI UChar * U_EXPORT2 241 u_memchr(const UChar *s, UChar c, int32_t count) { 242 if(count<=0) { 243 return NULL; /* no string */ 244 } else if(U16_IS_SURROGATE(c)) { 245 /* make sure to not find half of a surrogate pair */ 246 return u_strFindFirst(s, count, &c, 1); 247 } else { 248 /* trivial search for a BMP code point */ 249 const UChar *limit=s+count; 250 do { 251 if(*s==c) { 252 return (UChar *)s; 253 } 254 } while(++s!=limit); 255 return NULL; 256 } 257 } 258 259 U_CAPI UChar * U_EXPORT2 260 u_memchr32(const UChar *s, UChar32 c, int32_t count) { 261 if((uint32_t)c<=U_BMP_MAX) { 262 /* find BMP code point */ 263 return u_memchr(s, (UChar)c, count); 264 } else if(count<2) { 265 /* too short for a surrogate pair */ 266 return NULL; 267 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 268 /* find supplementary code point as surrogate pair */ 269 const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */ 270 UChar lead=U16_LEAD(c), trail=U16_TRAIL(c); 271 272 do { 273 if(*s==lead && *(s+1)==trail) { 274 return (UChar *)s; 275 } 276 } while(++s!=limit); 277 return NULL; 278 } else { 279 /* not a Unicode code point, not findable */ 280 return NULL; 281 } 282 } 283 284 /* Backward binary string search functions ---------------------------------- */ 285 286 U_CAPI UChar * U_EXPORT2 287 u_strFindLast(const UChar *s, int32_t length, 288 const UChar *sub, int32_t subLength) { 289 const UChar *start, *limit, *p, *q, *subLimit; 290 UChar c, cs; 291 292 if(sub==NULL || subLength<-1) { 293 return (UChar *)s; 294 } 295 if(s==NULL || length<-1) { 296 return NULL; 297 } 298 299 /* 300 * This implementation is more lazy than the one for u_strFindFirst(): 301 * There is no special search code for NUL-terminated strings. 302 * It does not seem to be worth it for searching substrings to 303 * search forward and find all matches like in u_strrchr() and similar. 304 * Therefore, we simply get both string lengths and search backward. 305 * 306 * markus 2002oct23 307 */ 308 309 if(subLength<0) { 310 subLength=u_strlen(sub); 311 } 312 if(subLength==0) { 313 return (UChar *)s; 314 } 315 316 /* get sub[subLength-1] to search for it fast */ 317 subLimit=sub+subLength; 318 cs=*(--subLimit); 319 --subLength; 320 321 if(subLength==0 && !U16_IS_SURROGATE(cs)) { 322 /* the substring consists of a single, non-surrogate BMP code point */ 323 return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length); 324 } 325 326 if(length<0) { 327 length=u_strlen(s); 328 } 329 330 /* subLength was decremented above */ 331 if(length<=subLength) { 332 return NULL; /* s is shorter than sub */ 333 } 334 335 start=s; 336 limit=s+length; 337 338 /* the substring must start no later than s+subLength */ 339 s+=subLength; 340 341 while(s!=limit) { 342 c=*(--limit); 343 if(c==cs) { 344 /* found last substring UChar, compare rest */ 345 p=limit; 346 q=subLimit; 347 for(;;) { 348 if(q==sub) { 349 if(isMatchAtCPBoundary(start, p, limit+1, start+length)) { 350 return (UChar *)p; /* well-formed match */ 351 } else { 352 break; /* no match because surrogate pair is split */ 353 } 354 } 355 if(*(--p)!=*(--q)) { 356 break; /* no match */ 357 } 358 } 359 } 360 } 361 362 /* not found */ 363 return NULL; 364 } 365 366 U_CAPI UChar * U_EXPORT2 367 u_strrstr(const UChar *s, const UChar *substring) { 368 return u_strFindLast(s, -1, substring, -1); 369 } 370 371 U_CAPI UChar * U_EXPORT2 372 u_strrchr(const UChar *s, UChar c) { 373 if(U16_IS_SURROGATE(c)) { 374 /* make sure to not find half of a surrogate pair */ 375 return u_strFindLast(s, -1, &c, 1); 376 } else { 377 const UChar *result=NULL; 378 UChar cs; 379 380 /* trivial search for a BMP code point */ 381 for(;;) { 382 if((cs=*s)==c) { 383 result=s; 384 } 385 if(cs==0) { 386 return (UChar *)result; 387 } 388 ++s; 389 } 390 } 391 } 392 393 U_CAPI UChar * U_EXPORT2 394 u_strrchr32(const UChar *s, UChar32 c) { 395 if((uint32_t)c<=U_BMP_MAX) { 396 /* find BMP code point */ 397 return u_strrchr(s, (UChar)c); 398 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 399 /* find supplementary code point as surrogate pair */ 400 const UChar *result=NULL; 401 UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c); 402 403 while((cs=*s++)!=0) { 404 if(cs==lead && *s==trail) { 405 result=s-1; 406 } 407 } 408 return (UChar *)result; 409 } else { 410 /* not a Unicode code point, not findable */ 411 return NULL; 412 } 413 } 414 415 U_CAPI UChar * U_EXPORT2 416 u_memrchr(const UChar *s, UChar c, int32_t count) { 417 if(count<=0) { 418 return NULL; /* no string */ 419 } else if(U16_IS_SURROGATE(c)) { 420 /* make sure to not find half of a surrogate pair */ 421 return u_strFindLast(s, count, &c, 1); 422 } else { 423 /* trivial search for a BMP code point */ 424 const UChar *limit=s+count; 425 do { 426 if(*(--limit)==c) { 427 return (UChar *)limit; 428 } 429 } while(s!=limit); 430 return NULL; 431 } 432 } 433 434 U_CAPI UChar * U_EXPORT2 435 u_memrchr32(const UChar *s, UChar32 c, int32_t count) { 436 if((uint32_t)c<=U_BMP_MAX) { 437 /* find BMP code point */ 438 return u_memrchr(s, (UChar)c, count); 439 } else if(count<2) { 440 /* too short for a surrogate pair */ 441 return NULL; 442 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 443 /* find supplementary code point as surrogate pair */ 444 const UChar *limit=s+count-1; 445 UChar lead=U16_LEAD(c), trail=U16_TRAIL(c); 446 447 do { 448 if(*limit==trail && *(limit-1)==lead) { 449 return (UChar *)(limit-1); 450 } 451 } while(s!=--limit); 452 return NULL; 453 } else { 454 /* not a Unicode code point, not findable */ 455 return NULL; 456 } 457 } 458 459 /* Tokenization functions --------------------------------------------------- */ 460 461 /* 462 * Match each code point in a string against each code point in the matchSet. 463 * Return the index of the first string code point that 464 * is (polarity==TRUE) or is not (FALSE) contained in the matchSet. 465 * Return -(string length)-1 if there is no such code point. 466 */ 467 static int32_t 468 _matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) { 469 int32_t matchLen, matchBMPLen, strItr, matchItr; 470 UChar32 stringCh, matchCh; 471 UChar c, c2; 472 473 /* first part of matchSet contains only BMP code points */ 474 matchBMPLen = 0; 475 while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) { 476 ++matchBMPLen; 477 } 478 479 /* second part of matchSet contains BMP and supplementary code points */ 480 matchLen = matchBMPLen; 481 while(matchSet[matchLen] != 0) { 482 ++matchLen; 483 } 484 485 for(strItr = 0; (c = string[strItr]) != 0;) { 486 ++strItr; 487 if(U16_IS_SINGLE(c)) { 488 if(polarity) { 489 for(matchItr = 0; matchItr < matchLen; ++matchItr) { 490 if(c == matchSet[matchItr]) { 491 return strItr - 1; /* one matches */ 492 } 493 } 494 } else { 495 for(matchItr = 0; matchItr < matchLen; ++matchItr) { 496 if(c == matchSet[matchItr]) { 497 goto endloop; 498 } 499 } 500 return strItr - 1; /* none matches */ 501 } 502 } else { 503 /* 504 * No need to check for string length before U16_IS_TRAIL 505 * because c2 could at worst be the terminating NUL. 506 */ 507 if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) { 508 ++strItr; 509 stringCh = U16_GET_SUPPLEMENTARY(c, c2); 510 } else { 511 stringCh = c; /* unpaired trail surrogate */ 512 } 513 514 if(polarity) { 515 for(matchItr = matchBMPLen; matchItr < matchLen;) { 516 U16_NEXT(matchSet, matchItr, matchLen, matchCh); 517 if(stringCh == matchCh) { 518 return strItr - U16_LENGTH(stringCh); /* one matches */ 519 } 520 } 521 } else { 522 for(matchItr = matchBMPLen; matchItr < matchLen;) { 523 U16_NEXT(matchSet, matchItr, matchLen, matchCh); 524 if(stringCh == matchCh) { 525 goto endloop; 526 } 527 } 528 return strItr - U16_LENGTH(stringCh); /* none matches */ 529 } 530 } 531 endloop: 532 /* wish C had continue with labels like Java... */; 533 } 534 535 /* Didn't find it. */ 536 return -strItr-1; 537 } 538 539 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */ 540 U_CAPI UChar * U_EXPORT2 541 u_strpbrk(const UChar *string, const UChar *matchSet) 542 { 543 int32_t idx = _matchFromSet(string, matchSet, TRUE); 544 if(idx >= 0) { 545 return (UChar *)string + idx; 546 } else { 547 return NULL; 548 } 549 } 550 551 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */ 552 U_CAPI int32_t U_EXPORT2 553 u_strcspn(const UChar *string, const UChar *matchSet) 554 { 555 int32_t idx = _matchFromSet(string, matchSet, TRUE); 556 if(idx >= 0) { 557 return idx; 558 } else { 559 return -idx - 1; /* == u_strlen(string) */ 560 } 561 } 562 563 /* Search for a codepoint in a string that does not match one of the matchSet codepoints. */ 564 U_CAPI int32_t U_EXPORT2 565 u_strspn(const UChar *string, const UChar *matchSet) 566 { 567 int32_t idx = _matchFromSet(string, matchSet, FALSE); 568 if(idx >= 0) { 569 return idx; 570 } else { 571 return -idx - 1; /* == u_strlen(string) */ 572 } 573 } 574 575 /* ----- Text manipulation functions --- */ 576 577 U_CAPI UChar* U_EXPORT2 578 u_strtok_r(UChar *src, 579 const UChar *delim, 580 UChar **saveState) 581 { 582 UChar *tokSource; 583 UChar *nextToken; 584 uint32_t nonDelimIdx; 585 586 /* If saveState is NULL, the user messed up. */ 587 if (src != NULL) { 588 tokSource = src; 589 *saveState = src; /* Set to "src" in case there are no delimiters */ 590 } 591 else if (*saveState) { 592 tokSource = *saveState; 593 } 594 else { 595 /* src == NULL && *saveState == NULL */ 596 /* This shouldn't happen. We already finished tokenizing. */ 597 return NULL; 598 } 599 600 /* Skip initial delimiters */ 601 nonDelimIdx = u_strspn(tokSource, delim); 602 tokSource = &tokSource[nonDelimIdx]; 603 604 if (*tokSource) { 605 nextToken = u_strpbrk(tokSource, delim); 606 if (nextToken != NULL) { 607 /* Create a token */ 608 *(nextToken++) = 0; 609 *saveState = nextToken; 610 return tokSource; 611 } 612 else if (*saveState) { 613 /* Return the last token */ 614 *saveState = NULL; 615 return tokSource; 616 } 617 } 618 else { 619 /* No tokens were found. Only delimiters were left. */ 620 *saveState = NULL; 621 } 622 return NULL; 623 } 624 625 /* Miscellaneous functions -------------------------------------------------- */ 626 627 U_CAPI UChar* U_EXPORT2 628 u_strcat(UChar *dst, 629 const UChar *src) 630 { 631 UChar *anchor = dst; /* save a pointer to start of dst */ 632 633 while(*dst != 0) { /* To end of first string */ 634 ++dst; 635 } 636 while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */ 637 } 638 639 return anchor; 640 } 641 642 U_CAPI UChar* U_EXPORT2 643 u_strncat(UChar *dst, 644 const UChar *src, 645 int32_t n ) 646 { 647 if(n > 0) { 648 UChar *anchor = dst; /* save a pointer to start of dst */ 649 650 while(*dst != 0) { /* To end of first string */ 651 ++dst; 652 } 653 while((*dst = *src) != 0) { /* copy string 2 over */ 654 ++dst; 655 if(--n == 0) { 656 *dst = 0; 657 break; 658 } 659 ++src; 660 } 661 662 return anchor; 663 } else { 664 return dst; 665 } 666 } 667 668 /* ----- Text property functions --- */ 669 670 U_CAPI int32_t U_EXPORT2 671 u_strcmp(const UChar *s1, 672 const UChar *s2) 673 { 674 UChar c1, c2; 675 676 for(;;) { 677 c1=*s1++; 678 c2=*s2++; 679 if (c1 != c2 || c1 == 0) { 680 break; 681 } 682 } 683 return (int32_t)c1 - (int32_t)c2; 684 } 685 686 U_CFUNC int32_t U_EXPORT2 687 uprv_strCompare(const UChar *s1, int32_t length1, 688 const UChar *s2, int32_t length2, 689 UBool strncmpStyle, UBool codePointOrder) { 690 const UChar *start1, *start2, *limit1, *limit2; 691 UChar c1, c2; 692 693 /* setup for fix-up */ 694 start1=s1; 695 start2=s2; 696 697 /* compare identical prefixes - they do not need to be fixed up */ 698 if(length1<0 && length2<0) { 699 /* strcmp style, both NUL-terminated */ 700 if(s1==s2) { 701 return 0; 702 } 703 704 for(;;) { 705 c1=*s1; 706 c2=*s2; 707 if(c1!=c2) { 708 break; 709 } 710 if(c1==0) { 711 return 0; 712 } 713 ++s1; 714 ++s2; 715 } 716 717 /* setup for fix-up */ 718 limit1=limit2=NULL; 719 } else if(strncmpStyle) { 720 /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */ 721 if(s1==s2) { 722 return 0; 723 } 724 725 limit1=start1+length1; 726 727 for(;;) { 728 /* both lengths are same, check only one limit */ 729 if(s1==limit1) { 730 return 0; 731 } 732 733 c1=*s1; 734 c2=*s2; 735 if(c1!=c2) { 736 break; 737 } 738 if(c1==0) { 739 return 0; 740 } 741 ++s1; 742 ++s2; 743 } 744 745 /* setup for fix-up */ 746 limit2=start2+length1; /* use length1 here, too, to enforce assumption */ 747 } else { 748 /* memcmp/UnicodeString style, both length-specified */ 749 int32_t lengthResult; 750 751 if(length1<0) { 752 length1=u_strlen(s1); 753 } 754 if(length2<0) { 755 length2=u_strlen(s2); 756 } 757 758 /* limit1=start1+min(lenght1, length2) */ 759 if(length1<length2) { 760 lengthResult=-1; 761 limit1=start1+length1; 762 } else if(length1==length2) { 763 lengthResult=0; 764 limit1=start1+length1; 765 } else /* length1>length2 */ { 766 lengthResult=1; 767 limit1=start1+length2; 768 } 769 770 if(s1==s2) { 771 return lengthResult; 772 } 773 774 for(;;) { 775 /* check pseudo-limit */ 776 if(s1==limit1) { 777 return lengthResult; 778 } 779 780 c1=*s1; 781 c2=*s2; 782 if(c1!=c2) { 783 break; 784 } 785 ++s1; 786 ++s2; 787 } 788 789 /* setup for fix-up */ 790 limit1=start1+length1; 791 limit2=start2+length2; 792 } 793 794 /* if both values are in or above the surrogate range, fix them up */ 795 if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { 796 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 797 if( 798 (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) || 799 (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1))) 800 ) { 801 /* part of a surrogate pair, leave >=d800 */ 802 } else { 803 /* BMP code point - may be surrogate code point - make <d800 */ 804 c1-=0x2800; 805 } 806 807 if( 808 (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) || 809 (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1))) 810 ) { 811 /* part of a surrogate pair, leave >=d800 */ 812 } else { 813 /* BMP code point - may be surrogate code point - make <d800 */ 814 c2-=0x2800; 815 } 816 } 817 818 /* now c1 and c2 are in the requested (code unit or code point) order */ 819 return (int32_t)c1-(int32_t)c2; 820 } 821 822 /* 823 * Compare two strings as presented by UCharIterators. 824 * Use code unit or code point order. 825 * When the function returns, it is undefined where the iterators 826 * have stopped. 827 */ 828 U_CAPI int32_t U_EXPORT2 829 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) { 830 UChar32 c1, c2; 831 832 /* argument checking */ 833 if(iter1==NULL || iter2==NULL) { 834 return 0; /* bad arguments */ 835 } 836 if(iter1==iter2) { 837 return 0; /* identical iterators */ 838 } 839 840 /* reset iterators to start? */ 841 iter1->move(iter1, 0, UITER_START); 842 iter2->move(iter2, 0, UITER_START); 843 844 /* compare identical prefixes - they do not need to be fixed up */ 845 for(;;) { 846 c1=iter1->next(iter1); 847 c2=iter2->next(iter2); 848 if(c1!=c2) { 849 break; 850 } 851 if(c1==-1) { 852 return 0; 853 } 854 } 855 856 /* if both values are in or above the surrogate range, fix them up */ 857 if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { 858 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 859 if( 860 (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) || 861 (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1)))) 862 ) { 863 /* part of a surrogate pair, leave >=d800 */ 864 } else { 865 /* BMP code point - may be surrogate code point - make <d800 */ 866 c1-=0x2800; 867 } 868 869 if( 870 (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) || 871 (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2)))) 872 ) { 873 /* part of a surrogate pair, leave >=d800 */ 874 } else { 875 /* BMP code point - may be surrogate code point - make <d800 */ 876 c2-=0x2800; 877 } 878 } 879 880 /* now c1 and c2 are in the requested (code unit or code point) order */ 881 return (int32_t)c1-(int32_t)c2; 882 } 883 884 #if 0 885 /* 886 * u_strCompareIter() does not leave the iterators _on_ the different units. 887 * This is possible but would cost a few extra indirect function calls to back 888 * up if the last unit (c1 or c2 respectively) was >=0. 889 * 890 * Consistently leaving them _behind_ the different units is not an option 891 * because the current "unit" is the end of the string if that is reached, 892 * and in such a case the iterator does not move. 893 * For example, when comparing "ab" with "abc", both iterators rest _on_ the end 894 * of their strings. Calling previous() on each does not move them to where 895 * the comparison fails. 896 * 897 * So the simplest semantics is to not define where the iterators end up. 898 * 899 * The following fragment is part of what would need to be done for backing up. 900 */ 901 void fragment { 902 /* iff a surrogate is part of a surrogate pair, leave >=d800 */ 903 if(c1<=0xdbff) { 904 if(!U16_IS_TRAIL(iter1->current(iter1))) { 905 /* lead surrogate code point - make <d800 */ 906 c1-=0x2800; 907 } 908 } else if(c1<=0xdfff) { 909 int32_t idx=iter1->getIndex(iter1, UITER_CURRENT); 910 iter1->previous(iter1); /* ==c1 */ 911 if(!U16_IS_LEAD(iter1->previous(iter1))) { 912 /* trail surrogate code point - make <d800 */ 913 c1-=0x2800; 914 } 915 /* go back to behind where the difference is */ 916 iter1->move(iter1, idx, UITER_ZERO); 917 } else /* 0xe000<=c1<=0xffff */ { 918 /* BMP code point - make <d800 */ 919 c1-=0x2800; 920 } 921 } 922 #endif 923 924 U_CAPI int32_t U_EXPORT2 925 u_strCompare(const UChar *s1, int32_t length1, 926 const UChar *s2, int32_t length2, 927 UBool codePointOrder) { 928 /* argument checking */ 929 if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { 930 return 0; 931 } 932 return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder); 933 } 934 935 /* String compare in code point order - u_strcmp() compares in code unit order. */ 936 U_CAPI int32_t U_EXPORT2 937 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) { 938 return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE); 939 } 940 941 U_CAPI int32_t U_EXPORT2 942 u_strncmp(const UChar *s1, 943 const UChar *s2, 944 int32_t n) 945 { 946 if(n > 0) { 947 int32_t rc; 948 for(;;) { 949 rc = (int32_t)*s1 - (int32_t)*s2; 950 if(rc != 0 || *s1 == 0 || --n == 0) { 951 return rc; 952 } 953 ++s1; 954 ++s2; 955 } 956 } else { 957 return 0; 958 } 959 } 960 961 U_CAPI int32_t U_EXPORT2 962 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) { 963 return uprv_strCompare(s1, n, s2, n, TRUE, TRUE); 964 } 965 966 U_CAPI UChar* U_EXPORT2 967 u_strcpy(UChar *dst, 968 const UChar *src) 969 { 970 UChar *anchor = dst; /* save a pointer to start of dst */ 971 972 while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */ 973 } 974 975 return anchor; 976 } 977 978 U_CAPI UChar* U_EXPORT2 979 u_strncpy(UChar *dst, 980 const UChar *src, 981 int32_t n) 982 { 983 UChar *anchor = dst; /* save a pointer to start of dst */ 984 985 /* copy string 2 over */ 986 while(n > 0 && (*(dst++) = *(src++)) != 0) { 987 --n; 988 } 989 990 return anchor; 991 } 992 993 U_CAPI int32_t U_EXPORT2 994 u_strlen(const UChar *s) 995 { 996 #if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR 997 return (int32_t)uprv_wcslen((const wchar_t *)s); 998 #else 999 const UChar *t = s; 1000 while(*t != 0) { 1001 ++t; 1002 } 1003 return t - s; 1004 #endif 1005 } 1006 1007 U_CAPI int32_t U_EXPORT2 1008 u_countChar32(const UChar *s, int32_t length) { 1009 int32_t count; 1010 1011 if(s==NULL || length<-1) { 1012 return 0; 1013 } 1014 1015 count=0; 1016 if(length>=0) { 1017 while(length>0) { 1018 ++count; 1019 if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) { 1020 s+=2; 1021 length-=2; 1022 } else { 1023 ++s; 1024 --length; 1025 } 1026 } 1027 } else /* length==-1 */ { 1028 UChar c; 1029 1030 for(;;) { 1031 if((c=*s++)==0) { 1032 break; 1033 } 1034 ++count; 1035 1036 /* 1037 * sufficient to look ahead one because of UTF-16; 1038 * safe to look ahead one because at worst that would be the terminating NUL 1039 */ 1040 if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { 1041 ++s; 1042 } 1043 } 1044 } 1045 return count; 1046 } 1047 1048 U_CAPI UBool U_EXPORT2 1049 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) { 1050 1051 if(number<0) { 1052 return TRUE; 1053 } 1054 if(s==NULL || length<-1) { 1055 return FALSE; 1056 } 1057 1058 if(length==-1) { 1059 /* s is NUL-terminated */ 1060 UChar c; 1061 1062 /* count code points until they exceed */ 1063 for(;;) { 1064 if((c=*s++)==0) { 1065 return FALSE; 1066 } 1067 if(number==0) { 1068 return TRUE; 1069 } 1070 if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { 1071 ++s; 1072 } 1073 --number; 1074 } 1075 } else { 1076 /* length>=0 known */ 1077 const UChar *limit; 1078 int32_t maxSupplementary; 1079 1080 /* s contains at least (length+1)/2 code points: <=2 UChars per cp */ 1081 if(((length+1)/2)>number) { 1082 return TRUE; 1083 } 1084 1085 /* check if s does not even contain enough UChars */ 1086 maxSupplementary=length-number; 1087 if(maxSupplementary<=0) { 1088 return FALSE; 1089 } 1090 /* there are maxSupplementary=length-number more UChars than asked-for code points */ 1091 1092 /* 1093 * count code points until they exceed and also check that there are 1094 * no more than maxSupplementary supplementary code points (UChar pairs) 1095 */ 1096 limit=s+length; 1097 for(;;) { 1098 if(s==limit) { 1099 return FALSE; 1100 } 1101 if(number==0) { 1102 return TRUE; 1103 } 1104 if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) { 1105 ++s; 1106 if(--maxSupplementary<=0) { 1107 /* too many pairs - too few code points */ 1108 return FALSE; 1109 } 1110 } 1111 --number; 1112 } 1113 } 1114 } 1115 1116 U_CAPI UChar * U_EXPORT2 1117 u_memcpy(UChar *dest, const UChar *src, int32_t count) { 1118 if(count > 0) { 1119 uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR); 1120 } 1121 return dest; 1122 } 1123 1124 U_CAPI UChar * U_EXPORT2 1125 u_memmove(UChar *dest, const UChar *src, int32_t count) { 1126 if(count > 0) { 1127 uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR); 1128 } 1129 return dest; 1130 } 1131 1132 U_CAPI UChar * U_EXPORT2 1133 u_memset(UChar *dest, UChar c, int32_t count) { 1134 if(count > 0) { 1135 UChar *ptr = dest; 1136 UChar *limit = dest + count; 1137 1138 while (ptr < limit) { 1139 *(ptr++) = c; 1140 } 1141 } 1142 return dest; 1143 } 1144 1145 U_CAPI int32_t U_EXPORT2 1146 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) { 1147 if(count > 0) { 1148 const UChar *limit = buf1 + count; 1149 int32_t result; 1150 1151 while (buf1 < limit) { 1152 result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2; 1153 if (result != 0) { 1154 return result; 1155 } 1156 buf1++; 1157 buf2++; 1158 } 1159 } 1160 return 0; 1161 } 1162 1163 U_CAPI int32_t U_EXPORT2 1164 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) { 1165 return uprv_strCompare(s1, count, s2, count, FALSE, TRUE); 1166 } 1167 1168 /* u_unescape & support fns ------------------------------------------------- */ 1169 1170 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ 1171 static const UChar UNESCAPE_MAP[] = { 1172 /*" 0x22, 0x22 */ 1173 /*' 0x27, 0x27 */ 1174 /*? 0x3F, 0x3F */ 1175 /*\ 0x5C, 0x5C */ 1176 /*a*/ 0x61, 0x07, 1177 /*b*/ 0x62, 0x08, 1178 /*e*/ 0x65, 0x1b, 1179 /*f*/ 0x66, 0x0c, 1180 /*n*/ 0x6E, 0x0a, 1181 /*r*/ 0x72, 0x0d, 1182 /*t*/ 0x74, 0x09, 1183 /*v*/ 0x76, 0x0b 1184 }; 1185 enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) }; 1186 1187 /* Convert one octal digit to a numeric value 0..7, or -1 on failure */ 1188 static int8_t _digit8(UChar c) { 1189 if (c >= 0x0030 && c <= 0x0037) { 1190 return (int8_t)(c - 0x0030); 1191 } 1192 return -1; 1193 } 1194 1195 /* Convert one hex digit to a numeric value 0..F, or -1 on failure */ 1196 static int8_t _digit16(UChar c) { 1197 if (c >= 0x0030 && c <= 0x0039) { 1198 return (int8_t)(c - 0x0030); 1199 } 1200 if (c >= 0x0041 && c <= 0x0046) { 1201 return (int8_t)(c - (0x0041 - 10)); 1202 } 1203 if (c >= 0x0061 && c <= 0x0066) { 1204 return (int8_t)(c - (0x0061 - 10)); 1205 } 1206 return -1; 1207 } 1208 1209 /* Parse a single escape sequence. Although this method deals in 1210 * UChars, it does not use C++ or UnicodeString. This allows it to 1211 * be used from C contexts. */ 1212 U_CAPI UChar32 U_EXPORT2 1213 u_unescapeAt(UNESCAPE_CHAR_AT charAt, 1214 int32_t *offset, 1215 int32_t length, 1216 void *context) { 1217 1218 int32_t start = *offset; 1219 UChar c; 1220 UChar32 result = 0; 1221 int8_t n = 0; 1222 int8_t minDig = 0; 1223 int8_t maxDig = 0; 1224 int8_t bitsPerDigit = 4; 1225 int8_t dig; 1226 int32_t i; 1227 UBool braces = FALSE; 1228 1229 /* Check that offset is in range */ 1230 if (*offset < 0 || *offset >= length) { 1231 goto err; 1232 } 1233 1234 /* Fetch first UChar after '\\' */ 1235 c = charAt((*offset)++, context); 1236 1237 /* Convert hexadecimal and octal escapes */ 1238 switch (c) { 1239 case 0x0075 /*'u'*/: 1240 minDig = maxDig = 4; 1241 break; 1242 case 0x0055 /*'U'*/: 1243 minDig = maxDig = 8; 1244 break; 1245 case 0x0078 /*'x'*/: 1246 minDig = 1; 1247 if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) { 1248 ++(*offset); 1249 braces = TRUE; 1250 maxDig = 8; 1251 } else { 1252 maxDig = 2; 1253 } 1254 break; 1255 default: 1256 dig = _digit8(c); 1257 if (dig >= 0) { 1258 minDig = 1; 1259 maxDig = 3; 1260 n = 1; /* Already have first octal digit */ 1261 bitsPerDigit = 3; 1262 result = dig; 1263 } 1264 break; 1265 } 1266 if (minDig != 0) { 1267 while (*offset < length && n < maxDig) { 1268 c = charAt(*offset, context); 1269 dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c)); 1270 if (dig < 0) { 1271 break; 1272 } 1273 result = (result << bitsPerDigit) | dig; 1274 ++(*offset); 1275 ++n; 1276 } 1277 if (n < minDig) { 1278 goto err; 1279 } 1280 if (braces) { 1281 if (c != 0x7D /*}*/) { 1282 goto err; 1283 } 1284 ++(*offset); 1285 } 1286 if (result < 0 || result >= 0x110000) { 1287 goto err; 1288 } 1289 /* If an escape sequence specifies a lead surrogate, see if 1290 * there is a trail surrogate after it, either as an escape or 1291 * as a literal. If so, join them up into a supplementary. 1292 */ 1293 if (*offset < length && U16_IS_LEAD(result)) { 1294 int32_t ahead = *offset + 1; 1295 c = charAt(*offset, context); 1296 if (c == 0x5C /*'\\'*/ && ahead < length) { 1297 c = (UChar) u_unescapeAt(charAt, &ahead, length, context); 1298 } 1299 if (U16_IS_TRAIL(c)) { 1300 *offset = ahead; 1301 result = U16_GET_SUPPLEMENTARY(result, c); 1302 } 1303 } 1304 return result; 1305 } 1306 1307 /* Convert C-style escapes in table */ 1308 for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) { 1309 if (c == UNESCAPE_MAP[i]) { 1310 return UNESCAPE_MAP[i+1]; 1311 } else if (c < UNESCAPE_MAP[i]) { 1312 break; 1313 } 1314 } 1315 1316 /* Map \cX to control-X: X & 0x1F */ 1317 if (c == 0x0063 /*'c'*/ && *offset < length) { 1318 c = charAt((*offset)++, context); 1319 if (U16_IS_LEAD(c) && *offset < length) { 1320 UChar c2 = charAt(*offset, context); 1321 if (U16_IS_TRAIL(c2)) { 1322 ++(*offset); 1323 c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */ 1324 } 1325 } 1326 return 0x1F & c; 1327 } 1328 1329 /* If no special forms are recognized, then consider 1330 * the backslash to generically escape the next character. 1331 * Deal with surrogate pairs. */ 1332 if (U16_IS_LEAD(c) && *offset < length) { 1333 UChar c2 = charAt(*offset, context); 1334 if (U16_IS_TRAIL(c2)) { 1335 ++(*offset); 1336 return U16_GET_SUPPLEMENTARY(c, c2); 1337 } 1338 } 1339 return c; 1340 1341 err: 1342 /* Invalid escape sequence */ 1343 *offset = start; /* Reset to initial value */ 1344 return (UChar32)0xFFFFFFFF; 1345 } 1346 1347 /* u_unescapeAt() callback to return a UChar from a char* */ 1348 static UChar U_CALLCONV 1349 _charPtr_charAt(int32_t offset, void *context) { 1350 UChar c16; 1351 /* It would be more efficient to access the invariant tables 1352 * directly but there is no API for that. */ 1353 u_charsToUChars(((char*) context) + offset, &c16, 1); 1354 return c16; 1355 } 1356 1357 /* Append an escape-free segment of the text; used by u_unescape() */ 1358 static void _appendUChars(UChar *dest, int32_t destCapacity, 1359 const char *src, int32_t srcLen) { 1360 if (destCapacity < 0) { 1361 destCapacity = 0; 1362 } 1363 if (srcLen > destCapacity) { 1364 srcLen = destCapacity; 1365 } 1366 u_charsToUChars(src, dest, srcLen); 1367 } 1368 1369 /* Do an invariant conversion of char* -> UChar*, with escape parsing */ 1370 U_CAPI int32_t U_EXPORT2 1371 u_unescape(const char *src, UChar *dest, int32_t destCapacity) { 1372 const char *segment = src; 1373 int32_t i = 0; 1374 char c; 1375 1376 while ((c=*src) != 0) { 1377 /* '\\' intentionally written as compiler-specific 1378 * character constant to correspond to compiler-specific 1379 * char* constants. */ 1380 if (c == '\\') { 1381 int32_t lenParsed = 0; 1382 UChar32 c32; 1383 if (src != segment) { 1384 if (dest != NULL) { 1385 _appendUChars(dest + i, destCapacity - i, 1386 segment, (int32_t)(src - segment)); 1387 } 1388 i += (int32_t)(src - segment); 1389 } 1390 ++src; /* advance past '\\' */ 1391 c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src); 1392 if (lenParsed == 0) { 1393 goto err; 1394 } 1395 src += lenParsed; /* advance past escape seq. */ 1396 if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) { 1397 U16_APPEND_UNSAFE(dest, i, c32); 1398 } else { 1399 i += U16_LENGTH(c32); 1400 } 1401 segment = src; 1402 } else { 1403 ++src; 1404 } 1405 } 1406 if (src != segment) { 1407 if (dest != NULL) { 1408 _appendUChars(dest + i, destCapacity - i, 1409 segment, (int32_t)(src - segment)); 1410 } 1411 i += (int32_t)(src - segment); 1412 } 1413 if (dest != NULL && i < destCapacity) { 1414 dest[i] = 0; 1415 } 1416 return i; 1417 1418 err: 1419 if (dest != NULL && destCapacity > 0) { 1420 *dest = 0; 1421 } 1422 return 0; 1423 } 1424 1425 /* NUL-termination of strings ----------------------------------------------- */ 1426 1427 /** 1428 * NUL-terminate a string no matter what its type. 1429 * Set warning and error codes accordingly. 1430 */ 1431 #define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) \ 1432 if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \ 1433 /* not a public function, so no complete argument checking */ \ 1434 \ 1435 if(length<0) { \ 1436 /* assume that the caller handles this */ \ 1437 } else if(length<destCapacity) { \ 1438 /* NUL-terminate the string, the NUL fits */ \ 1439 dest[length]=0; \ 1440 /* unset the not-terminated warning but leave all others */ \ 1441 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \ 1442 *pErrorCode=U_ZERO_ERROR; \ 1443 } \ 1444 } else if(length==destCapacity) { \ 1445 /* unable to NUL-terminate, but the string itself fit - set a warning code */ \ 1446 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \ 1447 } else /* length>destCapacity */ { \ 1448 /* even the string itself did not fit - set an error code */ \ 1449 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; \ 1450 } \ 1451 } 1452 1453 U_CAPI int32_t U_EXPORT2 1454 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1455 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1456 return length; 1457 } 1458 1459 U_CAPI int32_t U_EXPORT2 1460 u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1461 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1462 return length; 1463 } 1464 1465 U_CAPI int32_t U_EXPORT2 1466 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1467 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1468 return length; 1469 } 1470 1471 U_CAPI int32_t U_EXPORT2 1472 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1473 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1474 return length; 1475 } 1476 1477 // Compute the hash code for a string -------------------------------------- *** 1478 1479 // Moved here from uhash.c so that UnicodeString::hashCode() does not depend 1480 // on UHashtable code. 1481 1482 /* 1483 Compute the hash by iterating sparsely over about 32 (up to 63) 1484 characters spaced evenly through the string. For each character, 1485 multiply the previous hash value by a prime number and add the new 1486 character in, like a linear congruential random number generator, 1487 producing a pseudorandom deterministic value well distributed over 1488 the output range. [LIU] 1489 */ 1490 1491 #define STRING_HASH(TYPE, STR, STRLEN, DEREF) \ 1492 uint32_t hash = 0; \ 1493 const TYPE *p = (const TYPE*) STR; \ 1494 if (p != NULL) { \ 1495 int32_t len = (int32_t)(STRLEN); \ 1496 int32_t inc = ((len - 32) / 32) + 1; \ 1497 const TYPE *limit = p + len; \ 1498 while (p<limit) { \ 1499 hash = (hash * 37) + DEREF; \ 1500 p += inc; \ 1501 } \ 1502 } \ 1503 return static_cast<int32_t>(hash) 1504 1505 /* Used by UnicodeString to compute its hashcode - Not public API. */ 1506 U_CAPI int32_t U_EXPORT2 1507 ustr_hashUCharsN(const UChar *str, int32_t length) { 1508 STRING_HASH(UChar, str, length, *p); 1509 } 1510 1511 U_CAPI int32_t U_EXPORT2 1512 ustr_hashCharsN(const char *str, int32_t length) { 1513 STRING_HASH(uint8_t, str, length, *p); 1514 } 1515 1516 U_CAPI int32_t U_EXPORT2 1517 ustr_hashICharsN(const char *str, int32_t length) { 1518 STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p)); 1519 } 1520