1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2001-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: ustrcase.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2002feb20 14 * created by: Markus W. Scherer 15 * 16 * Implementation file for string casing C API functions. 17 * Uses functions from uchar.c for basic functionality that requires access 18 * to the Unicode Character Database (uprops.dat). 19 */ 20 21 #include "unicode/utypes.h" 22 #include "unicode/brkiter.h" 23 #include "unicode/ustring.h" 24 #include "unicode/ucasemap.h" 25 #include "unicode/ubrk.h" 26 #include "unicode/utf.h" 27 #include "unicode/utf16.h" 28 #include "cmemory.h" 29 #include "ucase.h" 30 #include "ustr_imp.h" 31 32 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 33 34 U_NAMESPACE_USE 35 36 /* string casing ------------------------------------------------------------ */ 37 38 /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ 39 static inline int32_t 40 appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, 41 int32_t result, const UChar *s) { 42 UChar32 c; 43 int32_t length; 44 45 /* decode the result */ 46 if(result<0) { 47 /* (not) original code point */ 48 c=~result; 49 length=-1; 50 } else if(result<=UCASE_MAX_STRING_LENGTH) { 51 c=U_SENTINEL; 52 length=result; 53 } else { 54 c=result; 55 length=-1; 56 } 57 58 if(destIndex<destCapacity) { 59 /* append the result */ 60 if(length<0) { 61 /* code point */ 62 UBool isError=FALSE; 63 U16_APPEND(dest, destIndex, destCapacity, c, isError); 64 if(isError) { 65 /* overflow, nothing written */ 66 destIndex+=U16_LENGTH(c); 67 } 68 } else { 69 /* string */ 70 if((destIndex+length)<=destCapacity) { 71 while(length>0) { 72 dest[destIndex++]=*s++; 73 --length; 74 } 75 } else { 76 /* overflow */ 77 destIndex+=length; 78 } 79 } 80 } else { 81 /* preflight */ 82 if(length<0) { 83 destIndex+=U16_LENGTH(c); 84 } else { 85 destIndex+=length; 86 } 87 } 88 return destIndex; 89 } 90 91 static UChar32 U_CALLCONV 92 utf16_caseContextIterator(void *context, int8_t dir) { 93 UCaseContext *csc=(UCaseContext *)context; 94 UChar32 c; 95 96 if(dir<0) { 97 /* reset for backward iteration */ 98 csc->index=csc->cpStart; 99 csc->dir=dir; 100 } else if(dir>0) { 101 /* reset for forward iteration */ 102 csc->index=csc->cpLimit; 103 csc->dir=dir; 104 } else { 105 /* continue current iteration direction */ 106 dir=csc->dir; 107 } 108 109 if(dir<0) { 110 if(csc->start<csc->index) { 111 U16_PREV((const UChar *)csc->p, csc->start, csc->index, c); 112 return c; 113 } 114 } else { 115 if(csc->index<csc->limit) { 116 U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c); 117 return c; 118 } 119 } 120 return U_SENTINEL; 121 } 122 123 /* 124 * Case-maps [srcStart..srcLimit[ but takes 125 * context [0..srcLength[ into account. 126 */ 127 static int32_t 128 _caseMap(const UCaseMap *csm, UCaseMapFull *map, 129 UChar *dest, int32_t destCapacity, 130 const UChar *src, UCaseContext *csc, 131 int32_t srcStart, int32_t srcLimit, 132 UErrorCode *pErrorCode) { 133 const UChar *s; 134 UChar32 c, c2 = 0; 135 int32_t srcIndex, destIndex; 136 int32_t locCache; 137 138 locCache=csm->locCache; 139 140 /* case mapping loop */ 141 srcIndex=srcStart; 142 destIndex=0; 143 while(srcIndex<srcLimit) { 144 csc->cpStart=srcIndex; 145 U16_NEXT(src, srcIndex, srcLimit, c); 146 csc->cpLimit=srcIndex; 147 c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache); 148 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 149 /* fast path version of appendResult() for BMP results */ 150 dest[destIndex++]=(UChar)c2; 151 } else { 152 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 153 } 154 } 155 156 if(destIndex>destCapacity) { 157 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 158 } 159 return destIndex; 160 } 161 162 #if !UCONFIG_NO_BREAK_ITERATION 163 164 U_CFUNC int32_t U_CALLCONV 165 ustrcase_internalToTitle(const UCaseMap *csm, 166 UChar *dest, int32_t destCapacity, 167 const UChar *src, int32_t srcLength, 168 UErrorCode *pErrorCode) { 169 const UChar *s; 170 UChar32 c; 171 int32_t prev, titleStart, titleLimit, idx, destIndex, length; 172 UBool isFirstIndex; 173 174 if(U_FAILURE(*pErrorCode)) { 175 return 0; 176 } 177 178 // Use the C++ abstract base class to minimize dependencies. 179 // TODO: Change UCaseMap.iter to store a BreakIterator directly. 180 BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter); 181 182 /* set up local variables */ 183 int32_t locCache=csm->locCache; 184 UCaseContext csc=UCASECONTEXT_INITIALIZER; 185 csc.p=(void *)src; 186 csc.limit=srcLength; 187 destIndex=0; 188 prev=0; 189 isFirstIndex=TRUE; 190 191 /* titlecasing loop */ 192 while(prev<srcLength) { 193 /* find next index where to titlecase */ 194 if(isFirstIndex) { 195 isFirstIndex=FALSE; 196 idx=bi->first(); 197 } else { 198 idx=bi->next(); 199 } 200 if(idx==UBRK_DONE || idx>srcLength) { 201 idx=srcLength; 202 } 203 204 /* 205 * Unicode 4 & 5 section 3.13 Default Case Operations: 206 * 207 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 208 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 209 * cased character F. If F exists, map F to default_title(F); then map each 210 * subsequent character C to default_lower(C). 211 * 212 * In this implementation, segment [prev..index[ into 3 parts: 213 * a) uncased characters (copy as-is) [prev..titleStart[ 214 * b) first case letter (titlecase) [titleStart..titleLimit[ 215 * c) subsequent characters (lowercase) [titleLimit..index[ 216 */ 217 if(prev<idx) { 218 /* find and copy uncased characters [prev..titleStart[ */ 219 titleStart=titleLimit=prev; 220 U16_NEXT(src, titleLimit, idx, c); 221 if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { 222 /* Adjust the titlecasing index (titleStart) to the next cased character. */ 223 for(;;) { 224 titleStart=titleLimit; 225 if(titleLimit==idx) { 226 /* 227 * only uncased characters in [prev..index[ 228 * stop with titleStart==titleLimit==index 229 */ 230 break; 231 } 232 U16_NEXT(src, titleLimit, idx, c); 233 if(UCASE_NONE!=ucase_getType(csm->csp, c)) { 234 break; /* cased letter at [titleStart..titleLimit[ */ 235 } 236 } 237 length=titleStart-prev; 238 if(length>0) { 239 if((destIndex+length)<=destCapacity) { 240 uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR); 241 } 242 destIndex+=length; 243 } 244 } 245 246 if(titleStart<titleLimit) { 247 /* titlecase c which is from [titleStart..titleLimit[ */ 248 csc.cpStart=titleStart; 249 csc.cpLimit=titleLimit; 250 c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache); 251 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 252 253 /* Special case Dutch IJ titlecasing */ 254 if ( titleStart+1 < idx && 255 ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && 256 ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) && 257 ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { 258 c=(UChar32) 0x004A; 259 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 260 titleLimit++; 261 } 262 263 /* lowercase [titleLimit..index[ */ 264 if(titleLimit<idx) { 265 if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) { 266 /* Normal operation: Lowercase the rest of the word. */ 267 destIndex+= 268 _caseMap( 269 csm, ucase_toFullLower, 270 dest+destIndex, destCapacity-destIndex, 271 src, &csc, 272 titleLimit, idx, 273 pErrorCode); 274 } else { 275 /* Optionally just copy the rest of the word unchanged. */ 276 length=idx-titleLimit; 277 if((destIndex+length)<=destCapacity) { 278 uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR); 279 } 280 destIndex+=length; 281 } 282 } 283 } 284 } 285 286 prev=idx; 287 } 288 289 if(destIndex>destCapacity) { 290 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 291 } 292 return destIndex; 293 } 294 295 #endif // !UCONFIG_NO_BREAK_ITERATION 296 297 /* functions available in the common library (for unistr_case.cpp) */ 298 299 U_CFUNC int32_t U_CALLCONV 300 ustrcase_internalToLower(const UCaseMap *csm, 301 UChar *dest, int32_t destCapacity, 302 const UChar *src, int32_t srcLength, 303 UErrorCode *pErrorCode) { 304 UCaseContext csc=UCASECONTEXT_INITIALIZER; 305 csc.p=(void *)src; 306 csc.limit=srcLength; 307 return _caseMap( 308 csm, ucase_toFullLower, 309 dest, destCapacity, 310 src, &csc, 0, srcLength, 311 pErrorCode); 312 } 313 314 U_CFUNC int32_t U_CALLCONV 315 ustrcase_internalToUpper(const UCaseMap *csm, 316 UChar *dest, int32_t destCapacity, 317 const UChar *src, int32_t srcLength, 318 UErrorCode *pErrorCode) { 319 UCaseContext csc=UCASECONTEXT_INITIALIZER; 320 csc.p=(void *)src; 321 csc.limit=srcLength; 322 return _caseMap( 323 csm, ucase_toFullUpper, 324 dest, destCapacity, 325 src, &csc, 0, srcLength, 326 pErrorCode); 327 } 328 329 static int32_t 330 ustr_foldCase(const UCaseProps *csp, 331 UChar *dest, int32_t destCapacity, 332 const UChar *src, int32_t srcLength, 333 uint32_t options, 334 UErrorCode *pErrorCode) { 335 int32_t srcIndex, destIndex; 336 337 const UChar *s; 338 UChar32 c, c2 = 0; 339 340 /* case mapping loop */ 341 srcIndex=destIndex=0; 342 while(srcIndex<srcLength) { 343 U16_NEXT(src, srcIndex, srcLength, c); 344 c=ucase_toFullFolding(csp, c, &s, options); 345 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 346 /* fast path version of appendResult() for BMP results */ 347 dest[destIndex++]=(UChar)c2; 348 } else { 349 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 350 } 351 } 352 353 if(destIndex>destCapacity) { 354 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 355 } 356 return destIndex; 357 } 358 359 U_CFUNC int32_t U_CALLCONV 360 ustrcase_internalFold(const UCaseMap *csm, 361 UChar *dest, int32_t destCapacity, 362 const UChar *src, int32_t srcLength, 363 UErrorCode *pErrorCode) { 364 return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode); 365 } 366 367 U_CFUNC int32_t 368 ustrcase_map(const UCaseMap *csm, 369 UChar *dest, int32_t destCapacity, 370 const UChar *src, int32_t srcLength, 371 UStringCaseMapper *stringCaseMapper, 372 UErrorCode *pErrorCode) { 373 UChar buffer[300]; 374 UChar *temp; 375 376 int32_t destLength; 377 378 /* check argument values */ 379 if(U_FAILURE(*pErrorCode)) { 380 return 0; 381 } 382 if( destCapacity<0 || 383 (dest==NULL && destCapacity>0) || 384 src==NULL || 385 srcLength<-1 386 ) { 387 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 388 return 0; 389 } 390 391 /* get the string length */ 392 if(srcLength==-1) { 393 srcLength=u_strlen(src); 394 } 395 396 /* check for overlapping source and destination */ 397 if( dest!=NULL && 398 ((src>=dest && src<(dest+destCapacity)) || 399 (dest>=src && dest<(src+srcLength))) 400 ) { 401 /* overlap: provide a temporary destination buffer and later copy the result */ 402 if(destCapacity<=LENGTHOF(buffer)) { 403 /* the stack buffer is large enough */ 404 temp=buffer; 405 } else { 406 /* allocate a buffer */ 407 temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); 408 if(temp==NULL) { 409 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 410 return 0; 411 } 412 } 413 } else { 414 temp=dest; 415 } 416 417 destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode); 418 if(temp!=dest) { 419 /* copy the result string to the destination buffer */ 420 if(destLength>0) { 421 int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; 422 if(copyLength>0) { 423 uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); 424 } 425 } 426 if(temp!=buffer) { 427 uprv_free(temp); 428 } 429 } 430 431 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 432 } 433 434 /* public API functions */ 435 436 U_CAPI int32_t U_EXPORT2 437 u_strFoldCase(UChar *dest, int32_t destCapacity, 438 const UChar *src, int32_t srcLength, 439 uint32_t options, 440 UErrorCode *pErrorCode) { 441 UCaseMap csm=UCASEMAP_INITIALIZER; 442 csm.csp=ucase_getSingleton(); 443 csm.options=options; 444 return ustrcase_map( 445 &csm, 446 dest, destCapacity, 447 src, srcLength, 448 ustrcase_internalFold, pErrorCode); 449 } 450 451 /* case-insensitive string comparisons -------------------------------------- */ 452 453 /* 454 * This function is a copy of unorm_cmpEquivFold() minus the parts for 455 * canonical equivalence. 456 * Keep the functions in sync, and see there for how this works. 457 * The duplication is for modularization: 458 * It makes caseless (but not canonical caseless) matches independent of 459 * the normalization code. 460 */ 461 462 /* stack element for previous-level source/decomposition pointers */ 463 struct CmpEquivLevel { 464 const UChar *start, *s, *limit; 465 }; 466 typedef struct CmpEquivLevel CmpEquivLevel; 467 468 /* internal function */ 469 U_CFUNC int32_t 470 u_strcmpFold(const UChar *s1, int32_t length1, 471 const UChar *s2, int32_t length2, 472 uint32_t options, 473 UErrorCode *pErrorCode) { 474 const UCaseProps *csp; 475 476 /* current-level start/limit - s1/s2 as current */ 477 const UChar *start1, *start2, *limit1, *limit2; 478 479 /* case folding variables */ 480 const UChar *p; 481 int32_t length; 482 483 /* stacks of previous-level start/current/limit */ 484 CmpEquivLevel stack1[2], stack2[2]; 485 486 /* case folding buffers, only use current-level start/limit */ 487 UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; 488 489 /* track which is the current level per string */ 490 int32_t level1, level2; 491 492 /* current code units, and code points for lookups */ 493 UChar32 c1, c2, cp1, cp2; 494 495 /* no argument error checking because this itself is not an API */ 496 497 /* 498 * assume that at least the option U_COMPARE_IGNORE_CASE is set 499 * otherwise this function would have to behave exactly as uprv_strCompare() 500 */ 501 csp=ucase_getSingleton(); 502 if(U_FAILURE(*pErrorCode)) { 503 return 0; 504 } 505 506 /* initialize */ 507 start1=s1; 508 if(length1==-1) { 509 limit1=NULL; 510 } else { 511 limit1=s1+length1; 512 } 513 514 start2=s2; 515 if(length2==-1) { 516 limit2=NULL; 517 } else { 518 limit2=s2+length2; 519 } 520 521 level1=level2=0; 522 c1=c2=-1; 523 524 /* comparison loop */ 525 for(;;) { 526 /* 527 * here a code unit value of -1 means "get another code unit" 528 * below it will mean "this source is finished" 529 */ 530 531 if(c1<0) { 532 /* get next code unit from string 1, post-increment */ 533 for(;;) { 534 if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { 535 if(level1==0) { 536 c1=-1; 537 break; 538 } 539 } else { 540 ++s1; 541 break; 542 } 543 544 /* reached end of level buffer, pop one level */ 545 do { 546 --level1; 547 start1=stack1[level1].start; /*Not uninitialized*/ 548 } while(start1==NULL); 549 s1=stack1[level1].s; /*Not uninitialized*/ 550 limit1=stack1[level1].limit; /*Not uninitialized*/ 551 } 552 } 553 554 if(c2<0) { 555 /* get next code unit from string 2, post-increment */ 556 for(;;) { 557 if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { 558 if(level2==0) { 559 c2=-1; 560 break; 561 } 562 } else { 563 ++s2; 564 break; 565 } 566 567 /* reached end of level buffer, pop one level */ 568 do { 569 --level2; 570 start2=stack2[level2].start; /*Not uninitialized*/ 571 } while(start2==NULL); 572 s2=stack2[level2].s; /*Not uninitialized*/ 573 limit2=stack2[level2].limit; /*Not uninitialized*/ 574 } 575 } 576 577 /* 578 * compare c1 and c2 579 * either variable c1, c2 is -1 only if the corresponding string is finished 580 */ 581 if(c1==c2) { 582 if(c1<0) { 583 return 0; /* c1==c2==-1 indicating end of strings */ 584 } 585 c1=c2=-1; /* make us fetch new code units */ 586 continue; 587 } else if(c1<0) { 588 return -1; /* string 1 ends before string 2 */ 589 } else if(c2<0) { 590 return 1; /* string 2 ends before string 1 */ 591 } 592 /* c1!=c2 && c1>=0 && c2>=0 */ 593 594 /* get complete code points for c1, c2 for lookups if either is a surrogate */ 595 cp1=c1; 596 if(U_IS_SURROGATE(c1)) { 597 UChar c; 598 599 if(U_IS_SURROGATE_LEAD(c1)) { 600 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { 601 /* advance ++s1; only below if cp1 decomposes/case-folds */ 602 cp1=U16_GET_SUPPLEMENTARY(c1, c); 603 } 604 } else /* isTrail(c1) */ { 605 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { 606 cp1=U16_GET_SUPPLEMENTARY(c, c1); 607 } 608 } 609 } 610 611 cp2=c2; 612 if(U_IS_SURROGATE(c2)) { 613 UChar c; 614 615 if(U_IS_SURROGATE_LEAD(c2)) { 616 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { 617 /* advance ++s2; only below if cp2 decomposes/case-folds */ 618 cp2=U16_GET_SUPPLEMENTARY(c2, c); 619 } 620 } else /* isTrail(c2) */ { 621 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { 622 cp2=U16_GET_SUPPLEMENTARY(c, c2); 623 } 624 } 625 } 626 627 /* 628 * go down one level for each string 629 * continue with the main loop as soon as there is a real change 630 */ 631 632 if( level1==0 && 633 (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 634 ) { 635 /* cp1 case-folds to the code point "length" or to p[length] */ 636 if(U_IS_SURROGATE(c1)) { 637 if(U_IS_SURROGATE_LEAD(c1)) { 638 /* advance beyond source surrogate pair if it case-folds */ 639 ++s1; 640 } else /* isTrail(c1) */ { 641 /* 642 * we got a supplementary code point when hitting its trail surrogate, 643 * therefore the lead surrogate must have been the same as in the other string; 644 * compare this decomposition with the lead surrogate in the other string 645 * remember that this simulates bulk text replacement: 646 * the decomposition would replace the entire code point 647 */ 648 --s2; 649 c2=*(s2-1); 650 } 651 } 652 653 /* push current level pointers */ 654 stack1[0].start=start1; 655 stack1[0].s=s1; 656 stack1[0].limit=limit1; 657 ++level1; 658 659 /* copy the folding result to fold1[] */ 660 if(length<=UCASE_MAX_STRING_LENGTH) { 661 u_memcpy(fold1, p, length); 662 } else { 663 int32_t i=0; 664 U16_APPEND_UNSAFE(fold1, i, length); 665 length=i; 666 } 667 668 /* set next level pointers to case folding */ 669 start1=s1=fold1; 670 limit1=fold1+length; 671 672 /* get ready to read from decomposition, continue with loop */ 673 c1=-1; 674 continue; 675 } 676 677 if( level2==0 && 678 (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 679 ) { 680 /* cp2 case-folds to the code point "length" or to p[length] */ 681 if(U_IS_SURROGATE(c2)) { 682 if(U_IS_SURROGATE_LEAD(c2)) { 683 /* advance beyond source surrogate pair if it case-folds */ 684 ++s2; 685 } else /* isTrail(c2) */ { 686 /* 687 * we got a supplementary code point when hitting its trail surrogate, 688 * therefore the lead surrogate must have been the same as in the other string; 689 * compare this decomposition with the lead surrogate in the other string 690 * remember that this simulates bulk text replacement: 691 * the decomposition would replace the entire code point 692 */ 693 --s1; 694 c1=*(s1-1); 695 } 696 } 697 698 /* push current level pointers */ 699 stack2[0].start=start2; 700 stack2[0].s=s2; 701 stack2[0].limit=limit2; 702 ++level2; 703 704 /* copy the folding result to fold2[] */ 705 if(length<=UCASE_MAX_STRING_LENGTH) { 706 u_memcpy(fold2, p, length); 707 } else { 708 int32_t i=0; 709 U16_APPEND_UNSAFE(fold2, i, length); 710 length=i; 711 } 712 713 /* set next level pointers to case folding */ 714 start2=s2=fold2; 715 limit2=fold2+length; 716 717 /* get ready to read from decomposition, continue with loop */ 718 c2=-1; 719 continue; 720 } 721 722 /* 723 * no decomposition/case folding, max level for both sides: 724 * return difference result 725 * 726 * code point order comparison must not just return cp1-cp2 727 * because when single surrogates are present then the surrogate pairs 728 * that formed cp1 and cp2 may be from different string indexes 729 * 730 * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units 731 * c1=d800 cp1=10001 c2=dc00 cp2=10000 732 * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } 733 * 734 * therefore, use same fix-up as in ustring.c/uprv_strCompare() 735 * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ 736 * so we have slightly different pointer/start/limit comparisons here 737 */ 738 739 if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { 740 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 741 if( 742 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || 743 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) 744 ) { 745 /* part of a surrogate pair, leave >=d800 */ 746 } else { 747 /* BMP code point - may be surrogate code point - make <d800 */ 748 c1-=0x2800; 749 } 750 751 if( 752 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) || 753 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2))) 754 ) { 755 /* part of a surrogate pair, leave >=d800 */ 756 } else { 757 /* BMP code point - may be surrogate code point - make <d800 */ 758 c2-=0x2800; 759 } 760 } 761 762 return c1-c2; 763 } 764 } 765 766 /* public API functions */ 767 768 U_CAPI int32_t U_EXPORT2 769 u_strCaseCompare(const UChar *s1, int32_t length1, 770 const UChar *s2, int32_t length2, 771 uint32_t options, 772 UErrorCode *pErrorCode) { 773 /* argument checking */ 774 if(pErrorCode==0 || U_FAILURE(*pErrorCode)) { 775 return 0; 776 } 777 if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { 778 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 779 return 0; 780 } 781 return u_strcmpFold(s1, length1, s2, length2, 782 options|U_COMPARE_IGNORE_CASE, 783 pErrorCode); 784 } 785 786 U_CAPI int32_t U_EXPORT2 787 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) { 788 UErrorCode errorCode=U_ZERO_ERROR; 789 return u_strcmpFold(s1, -1, s2, -1, 790 options|U_COMPARE_IGNORE_CASE, 791 &errorCode); 792 } 793 794 U_CAPI int32_t U_EXPORT2 795 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) { 796 UErrorCode errorCode=U_ZERO_ERROR; 797 return u_strcmpFold(s1, length, s2, length, 798 options|U_COMPARE_IGNORE_CASE, 799 &errorCode); 800 } 801 802 U_CAPI int32_t U_EXPORT2 803 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { 804 UErrorCode errorCode=U_ZERO_ERROR; 805 return u_strcmpFold(s1, n, s2, n, 806 options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), 807 &errorCode); 808 } 809