1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2005-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: ucasemap.cpp 11 * encoding: US-ASCII 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2005may06 16 * created by: Markus W. Scherer 17 * 18 * Case mapping service object and functions using it. 19 */ 20 21 #include "unicode/utypes.h" 22 #include "unicode/brkiter.h" 23 #include "unicode/ubrk.h" 24 #include "unicode/uloc.h" 25 #include "unicode/ustring.h" 26 #include "unicode/ucasemap.h" 27 #if !UCONFIG_NO_BREAK_ITERATION 28 #include "unicode/utext.h" 29 #endif 30 #include "unicode/utf.h" 31 #include "unicode/utf8.h" 32 #include "unicode/utf16.h" 33 #include "cmemory.h" 34 #include "cstring.h" 35 #include "ucase.h" 36 #include "ustr_imp.h" 37 38 U_NAMESPACE_USE 39 40 /* UCaseMap service object -------------------------------------------------- */ 41 42 U_CAPI UCaseMap * U_EXPORT2 43 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) { 44 UCaseMap *csm; 45 46 if(U_FAILURE(*pErrorCode)) { 47 return NULL; 48 } 49 50 csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap)); 51 if(csm==NULL) { 52 return NULL; 53 } 54 uprv_memset(csm, 0, sizeof(UCaseMap)); 55 56 csm->csp=ucase_getSingleton(); 57 ucasemap_setLocale(csm, locale, pErrorCode); 58 if(U_FAILURE(*pErrorCode)) { 59 uprv_free(csm); 60 return NULL; 61 } 62 63 csm->options=options; 64 return csm; 65 } 66 67 U_CAPI void U_EXPORT2 68 ucasemap_close(UCaseMap *csm) { 69 if(csm!=NULL) { 70 #if !UCONFIG_NO_BREAK_ITERATION 71 // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code. 72 delete reinterpret_cast<BreakIterator *>(csm->iter); 73 #endif 74 uprv_free(csm); 75 } 76 } 77 78 U_CAPI const char * U_EXPORT2 79 ucasemap_getLocale(const UCaseMap *csm) { 80 return csm->locale; 81 } 82 83 U_CAPI uint32_t U_EXPORT2 84 ucasemap_getOptions(const UCaseMap *csm) { 85 return csm->options; 86 } 87 88 U_CAPI void U_EXPORT2 89 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { 90 int32_t length; 91 92 if(U_FAILURE(*pErrorCode)) { 93 return; 94 } 95 96 length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); 97 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) { 98 *pErrorCode=U_ZERO_ERROR; 99 /* we only really need the language code for case mappings */ 100 length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); 101 } 102 if(length==sizeof(csm->locale)) { 103 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 104 } 105 csm->locCache=0; 106 if(U_SUCCESS(*pErrorCode)) { 107 ucase_getCaseLocale(csm->locale, &csm->locCache); 108 } else { 109 csm->locale[0]=0; 110 } 111 } 112 113 U_CAPI void U_EXPORT2 114 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/) { 115 csm->options=options; 116 } 117 118 /* UTF-8 string case mappings ----------------------------------------------- */ 119 120 /* TODO(markus): Move to a new, separate utf8case.c file. */ 121 122 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ 123 static inline int32_t 124 appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, 125 int32_t result, const UChar *s) { 126 UChar32 c; 127 int32_t length; 128 UErrorCode errorCode; 129 130 /* decode the result */ 131 if(result<0) { 132 /* (not) original code point */ 133 c=~result; 134 length=U8_LENGTH(c); 135 } else if(result<=UCASE_MAX_STRING_LENGTH) { 136 c=U_SENTINEL; 137 length=result; 138 } else { 139 c=result; 140 length=U8_LENGTH(c); 141 } 142 if(length>(INT32_MAX-destIndex)) { 143 return -1; // integer overflow 144 } 145 146 if(destIndex<destCapacity) { 147 /* append the result */ 148 if(c>=0) { 149 /* code point */ 150 UBool isError=FALSE; 151 U8_APPEND(dest, destIndex, destCapacity, c, isError); 152 if(isError) { 153 /* overflow, nothing written */ 154 destIndex+=length; 155 } 156 } else { 157 /* string */ 158 int32_t destLength; 159 errorCode=U_ZERO_ERROR; 160 u_strToUTF8( 161 (char *)(dest+destIndex), destCapacity-destIndex, &destLength, 162 s, length, 163 &errorCode); 164 if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) { 165 return -1; 166 } 167 if(destLength>(INT32_MAX-destIndex)) { 168 return -1; // integer overflow 169 } 170 destIndex+=destLength; 171 /* we might have an overflow, but we know the actual length */ 172 } 173 } else { 174 /* preflight */ 175 if(c>=0) { 176 destIndex+=length; 177 } else { 178 int32_t destLength; 179 errorCode=U_ZERO_ERROR; 180 u_strToUTF8( 181 NULL, 0, &destLength, 182 s, length, 183 &errorCode); 184 if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) { 185 return -1; 186 } 187 if(destLength>(INT32_MAX-destIndex)) { 188 return -1; // integer overflow 189 } 190 destIndex+=destLength; 191 } 192 } 193 return destIndex; 194 } 195 196 static inline int32_t 197 appendUChar(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar c) { 198 int32_t length=U8_LENGTH(c); 199 if(length>(INT32_MAX-destIndex)) { 200 return -1; // integer overflow 201 } 202 int32_t limit=destIndex+length; 203 if(limit<=destCapacity) { 204 U8_APPEND_UNSAFE(dest, destIndex, c); 205 } 206 return limit; 207 } 208 209 static inline int32_t 210 appendString(uint8_t *dest, int32_t destIndex, int32_t destCapacity, 211 const uint8_t *s, int32_t length) { 212 if(length>0) { 213 if(length>(INT32_MAX-destIndex)) { 214 return -1; // integer overflow 215 } 216 if((destIndex+length)<=destCapacity) { 217 uprv_memcpy(dest+destIndex, s, length); 218 } 219 destIndex+=length; 220 } 221 return destIndex; 222 } 223 224 static UChar32 U_CALLCONV 225 utf8_caseContextIterator(void *context, int8_t dir) { 226 UCaseContext *csc=(UCaseContext *)context; 227 UChar32 c; 228 229 if(dir<0) { 230 /* reset for backward iteration */ 231 csc->index=csc->cpStart; 232 csc->dir=dir; 233 } else if(dir>0) { 234 /* reset for forward iteration */ 235 csc->index=csc->cpLimit; 236 csc->dir=dir; 237 } else { 238 /* continue current iteration direction */ 239 dir=csc->dir; 240 } 241 242 if(dir<0) { 243 if(csc->start<csc->index) { 244 U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c); 245 return c; 246 } 247 } else { 248 if(csc->index<csc->limit) { 249 U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c); 250 return c; 251 } 252 } 253 return U_SENTINEL; 254 } 255 256 /* 257 * Case-maps [srcStart..srcLimit[ but takes 258 * context [0..srcLength[ into account. 259 */ 260 static int32_t 261 _caseMap(const UCaseMap *csm, UCaseMapFull *map, 262 uint8_t *dest, int32_t destCapacity, 263 const uint8_t *src, UCaseContext *csc, 264 int32_t srcStart, int32_t srcLimit, 265 UErrorCode *pErrorCode) { 266 const UChar *s = NULL; 267 UChar32 c, c2 = 0; 268 int32_t srcIndex, destIndex; 269 int32_t locCache; 270 271 locCache=csm->locCache; 272 273 /* case mapping loop */ 274 srcIndex=srcStart; 275 destIndex=0; 276 while(srcIndex<srcLimit) { 277 csc->cpStart=srcIndex; 278 U8_NEXT(src, srcIndex, srcLimit, c); 279 csc->cpLimit=srcIndex; 280 if(c<0) { 281 // Malformed UTF-8. 282 destIndex=appendString(dest, destIndex, destCapacity, src+csc->cpStart, srcIndex-csc->cpStart); 283 if(destIndex<0) { 284 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 285 return 0; 286 } 287 continue; 288 } 289 c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache); 290 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) { 291 /* fast path version of appendResult() for ASCII results */ 292 dest[destIndex++]=(uint8_t)c2; 293 } else { 294 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 295 if(destIndex<0) { 296 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 297 return 0; 298 } 299 } 300 } 301 302 if(destIndex>destCapacity) { 303 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 304 } 305 return destIndex; 306 } 307 308 #if !UCONFIG_NO_BREAK_ITERATION 309 310 U_CFUNC int32_t U_CALLCONV 311 ucasemap_internalUTF8ToTitle(const UCaseMap *csm, 312 uint8_t *dest, int32_t destCapacity, 313 const uint8_t *src, int32_t srcLength, 314 UErrorCode *pErrorCode) { 315 const UChar *s; 316 UChar32 c; 317 int32_t prev, titleStart, titleLimit, idx, destIndex; 318 UBool isFirstIndex; 319 320 if(U_FAILURE(*pErrorCode)) { 321 return 0; 322 } 323 324 // Use the C++ abstract base class to minimize dependencies. 325 // TODO: Change UCaseMap.iter to store a BreakIterator directly. 326 BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter); 327 328 /* set up local variables */ 329 int32_t locCache=csm->locCache; 330 UCaseContext csc=UCASECONTEXT_INITIALIZER; 331 csc.p=(void *)src; 332 csc.limit=srcLength; 333 destIndex=0; 334 prev=0; 335 isFirstIndex=TRUE; 336 337 /* titlecasing loop */ 338 while(prev<srcLength) { 339 /* find next index where to titlecase */ 340 if(isFirstIndex) { 341 isFirstIndex=FALSE; 342 idx=bi->first(); 343 } else { 344 idx=bi->next(); 345 } 346 if(idx==UBRK_DONE || idx>srcLength) { 347 idx=srcLength; 348 } 349 350 /* 351 * Unicode 4 & 5 section 3.13 Default Case Operations: 352 * 353 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 354 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 355 * cased character F. If F exists, map F to default_title(F); then map each 356 * subsequent character C to default_lower(C). 357 * 358 * In this implementation, segment [prev..index[ into 3 parts: 359 * a) uncased characters (copy as-is) [prev..titleStart[ 360 * b) first case letter (titlecase) [titleStart..titleLimit[ 361 * c) subsequent characters (lowercase) [titleLimit..index[ 362 */ 363 if(prev<idx) { 364 /* find and copy uncased characters [prev..titleStart[ */ 365 titleStart=titleLimit=prev; 366 U8_NEXT(src, titleLimit, idx, c); 367 if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { 368 /* Adjust the titlecasing index (titleStart) to the next cased character. */ 369 for(;;) { 370 titleStart=titleLimit; 371 if(titleLimit==idx) { 372 /* 373 * only uncased characters in [prev..index[ 374 * stop with titleStart==titleLimit==index 375 */ 376 break; 377 } 378 U8_NEXT(src, titleLimit, idx, c); 379 if(UCASE_NONE!=ucase_getType(csm->csp, c)) { 380 break; /* cased letter at [titleStart..titleLimit[ */ 381 } 382 } 383 destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev); 384 if(destIndex<0) { 385 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 386 return 0; 387 } 388 } 389 390 if(titleStart<titleLimit) { 391 /* titlecase c which is from [titleStart..titleLimit[ */ 392 if(c>=0) { 393 csc.cpStart=titleStart; 394 csc.cpLimit=titleLimit; 395 c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache); 396 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 397 } else { 398 // Malformed UTF-8. 399 destIndex=appendString(dest, destIndex, destCapacity, src+titleStart, titleLimit-titleStart); 400 } 401 if(destIndex<0) { 402 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 403 return 0; 404 } 405 406 /* Special case Dutch IJ titlecasing */ 407 if (titleStart+1 < idx && 408 ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH && 409 (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && 410 (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { 411 destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); 412 titleLimit++; 413 } 414 /* lowercase [titleLimit..index[ */ 415 if(titleLimit<idx) { 416 if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) { 417 /* Normal operation: Lowercase the rest of the word. */ 418 destIndex+= 419 _caseMap( 420 csm, ucase_toFullLower, 421 dest+destIndex, destCapacity-destIndex, 422 src, &csc, 423 titleLimit, idx, 424 pErrorCode); 425 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 426 *pErrorCode=U_ZERO_ERROR; 427 } 428 if(U_FAILURE(*pErrorCode)) { 429 return destIndex; 430 } 431 } else { 432 /* Optionally just copy the rest of the word unchanged. */ 433 destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit); 434 if(destIndex<0) { 435 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 436 return 0; 437 } 438 } 439 } 440 } 441 } 442 443 prev=idx; 444 } 445 446 if(destIndex>destCapacity) { 447 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 448 } 449 return destIndex; 450 } 451 452 #endif 453 454 U_NAMESPACE_BEGIN 455 namespace GreekUpper { 456 457 UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i, int32_t length) { 458 while (i < length) { 459 UChar32 c; 460 U8_NEXT(s, i, length, c); 461 int32_t type = ucase_getTypeOrIgnorable(csp, c); 462 if ((type & UCASE_IGNORABLE) != 0) { 463 // Case-ignorable, continue with the loop. 464 } else if (type != UCASE_NONE) { 465 return TRUE; // Followed by cased letter. 466 } else { 467 return FALSE; // Uncased and not case-ignorable. 468 } 469 } 470 return FALSE; // Not followed by cased letter. 471 } 472 473 // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java. 474 int32_t toUpper(const UCaseMap *csm, 475 uint8_t *dest, int32_t destCapacity, 476 const uint8_t *src, int32_t srcLength, 477 UErrorCode *pErrorCode) { 478 int32_t locCache = UCASE_LOC_GREEK; 479 int32_t destIndex=0; 480 uint32_t state = 0; 481 for (int32_t i = 0; i < srcLength;) { 482 int32_t nextIndex = i; 483 UChar32 c; 484 U8_NEXT(src, nextIndex, srcLength, c); 485 uint32_t nextState = 0; 486 int32_t type = ucase_getTypeOrIgnorable(csm->csp, c); 487 if ((type & UCASE_IGNORABLE) != 0) { 488 // c is case-ignorable 489 nextState |= (state & AFTER_CASED); 490 } else if (type != UCASE_NONE) { 491 // c is cased 492 nextState |= AFTER_CASED; 493 } 494 uint32_t data = getLetterData(c); 495 if (data > 0) { 496 uint32_t upper = data & UPPER_MASK; 497 // Add a dialytika to this iota or ypsilon vowel 498 // if we removed a tonos from the previous vowel, 499 // and that previous vowel did not also have (or gain) a dialytika. 500 // Adding one only to the final vowel in a longer sequence 501 // (which does not occur in normal writing) would require lookahead. 502 // Set the same flag as for preserving an existing dialytika. 503 if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && 504 (upper == 0x399 || upper == 0x3A5)) { 505 data |= HAS_DIALYTIKA; 506 } 507 int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. 508 if ((data & HAS_YPOGEGRAMMENI) != 0) { 509 numYpogegrammeni = 1; 510 } 511 // Skip combining diacritics after this Greek letter. 512 int32_t nextNextIndex = nextIndex; 513 while (nextIndex < srcLength) { 514 UChar32 c2; 515 U8_NEXT(src, nextNextIndex, srcLength, c2); 516 uint32_t diacriticData = getDiacriticData(c2); 517 if (diacriticData != 0) { 518 data |= diacriticData; 519 if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { 520 ++numYpogegrammeni; 521 } 522 nextIndex = nextNextIndex; 523 } else { 524 break; // not a Greek diacritic 525 } 526 } 527 if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { 528 nextState |= AFTER_VOWEL_WITH_ACCENT; 529 } 530 // Map according to Greek rules. 531 UBool addTonos = FALSE; 532 if (upper == 0x397 && 533 (data & HAS_ACCENT) != 0 && 534 numYpogegrammeni == 0 && 535 (state & AFTER_CASED) == 0 && 536 !isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) { 537 // Keep disjunctive "or" with (only) a tonos. 538 // We use the same "word boundary" conditions as for the Final_Sigma test. 539 if (i == nextIndex) { 540 upper = 0x389; // Preserve the precomposed form. 541 } else { 542 addTonos = TRUE; 543 } 544 } else if ((data & HAS_DIALYTIKA) != 0) { 545 // Preserve a vowel with dialytika in precomposed form if it exists. 546 if (upper == 0x399) { 547 upper = 0x3AA; 548 data &= ~HAS_EITHER_DIALYTIKA; 549 } else if (upper == 0x3A5) { 550 upper = 0x3AB; 551 data &= ~HAS_EITHER_DIALYTIKA; 552 } 553 } 554 destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper); 555 if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { 556 destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika 557 } 558 if (destIndex >= 0 && addTonos) { 559 destIndex=appendUChar(dest, destIndex, destCapacity, 0x301); 560 } 561 while (destIndex >= 0 && numYpogegrammeni > 0) { 562 destIndex=appendUChar(dest, destIndex, destCapacity, 0x399); 563 --numYpogegrammeni; 564 } 565 if(destIndex<0) { 566 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 567 return 0; 568 } 569 } else if(c>=0) { 570 const UChar *s; 571 UChar32 c2 = 0; 572 c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache); 573 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) { 574 /* fast path version of appendResult() for ASCII results */ 575 dest[destIndex++]=(uint8_t)c2; 576 } else { 577 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 578 if(destIndex<0) { 579 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 580 return 0; 581 } 582 } 583 } else { 584 // Malformed UTF-8. 585 destIndex=appendString(dest, destIndex, destCapacity, src+i, nextIndex-i); 586 if(destIndex<0) { 587 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 588 return 0; 589 } 590 } 591 i = nextIndex; 592 state = nextState; 593 } 594 595 if(destIndex>destCapacity) { 596 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 597 } 598 return destIndex; 599 } 600 601 } // namespace GreekUpper 602 U_NAMESPACE_END 603 604 static int32_t U_CALLCONV 605 ucasemap_internalUTF8ToLower(const UCaseMap *csm, 606 uint8_t *dest, int32_t destCapacity, 607 const uint8_t *src, int32_t srcLength, 608 UErrorCode *pErrorCode) { 609 UCaseContext csc=UCASECONTEXT_INITIALIZER; 610 csc.p=(void *)src; 611 csc.limit=srcLength; 612 return _caseMap( 613 csm, ucase_toFullLower, 614 dest, destCapacity, 615 src, &csc, 0, srcLength, 616 pErrorCode); 617 } 618 619 static int32_t U_CALLCONV 620 ucasemap_internalUTF8ToUpper(const UCaseMap *csm, 621 uint8_t *dest, int32_t destCapacity, 622 const uint8_t *src, int32_t srcLength, 623 UErrorCode *pErrorCode) { 624 int32_t locCache = csm->locCache; 625 if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) { 626 return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode); 627 } 628 UCaseContext csc=UCASECONTEXT_INITIALIZER; 629 csc.p=(void *)src; 630 csc.limit=srcLength; 631 return _caseMap( 632 csm, ucase_toFullUpper, 633 dest, destCapacity, 634 src, &csc, 0, srcLength, 635 pErrorCode); 636 } 637 638 static int32_t 639 utf8_foldCase(const UCaseProps *csp, 640 uint8_t *dest, int32_t destCapacity, 641 const uint8_t *src, int32_t srcLength, 642 uint32_t options, 643 UErrorCode *pErrorCode) { 644 int32_t srcIndex, destIndex; 645 646 const UChar *s; 647 UChar32 c, c2; 648 int32_t start; 649 650 /* case mapping loop */ 651 srcIndex=destIndex=0; 652 while(srcIndex<srcLength) { 653 start=srcIndex; 654 U8_NEXT(src, srcIndex, srcLength, c); 655 if(c<0) { 656 // Malformed UTF-8. 657 destIndex=appendString(dest, destIndex, destCapacity, src+start, srcIndex-start); 658 if(destIndex<0) { 659 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 660 return 0; 661 } 662 continue; 663 } 664 c=ucase_toFullFolding(csp, c, &s, options); 665 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) { 666 /* fast path version of appendResult() for ASCII results */ 667 dest[destIndex++]=(uint8_t)c2; 668 } else { 669 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 670 if(destIndex<0) { 671 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 672 return 0; 673 } 674 } 675 } 676 677 if(destIndex>destCapacity) { 678 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 679 } 680 return destIndex; 681 } 682 683 static int32_t U_CALLCONV 684 ucasemap_internalUTF8Fold(const UCaseMap *csm, 685 uint8_t *dest, int32_t destCapacity, 686 const uint8_t *src, int32_t srcLength, 687 UErrorCode *pErrorCode) { 688 return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode); 689 } 690 691 U_CFUNC int32_t 692 ucasemap_mapUTF8(const UCaseMap *csm, 693 uint8_t *dest, int32_t destCapacity, 694 const uint8_t *src, int32_t srcLength, 695 UTF8CaseMapper *stringCaseMapper, 696 UErrorCode *pErrorCode) { 697 int32_t destLength; 698 699 /* check argument values */ 700 if(U_FAILURE(*pErrorCode)) { 701 return 0; 702 } 703 if( destCapacity<0 || 704 (dest==NULL && destCapacity>0) || 705 src==NULL || 706 srcLength<-1 707 ) { 708 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 709 return 0; 710 } 711 712 /* get the string length */ 713 if(srcLength==-1) { 714 srcLength=(int32_t)uprv_strlen((const char *)src); 715 } 716 717 /* check for overlapping source and destination */ 718 if( dest!=NULL && 719 ((src>=dest && src<(dest+destCapacity)) || 720 (dest>=src && dest<(src+srcLength))) 721 ) { 722 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 723 return 0; 724 } 725 726 destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode); 727 return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode); 728 } 729 730 /* public API functions */ 731 732 U_CAPI int32_t U_EXPORT2 733 ucasemap_utf8ToLower(const UCaseMap *csm, 734 char *dest, int32_t destCapacity, 735 const char *src, int32_t srcLength, 736 UErrorCode *pErrorCode) { 737 return ucasemap_mapUTF8(csm, 738 (uint8_t *)dest, destCapacity, 739 (const uint8_t *)src, srcLength, 740 ucasemap_internalUTF8ToLower, pErrorCode); 741 } 742 743 U_CAPI int32_t U_EXPORT2 744 ucasemap_utf8ToUpper(const UCaseMap *csm, 745 char *dest, int32_t destCapacity, 746 const char *src, int32_t srcLength, 747 UErrorCode *pErrorCode) { 748 return ucasemap_mapUTF8(csm, 749 (uint8_t *)dest, destCapacity, 750 (const uint8_t *)src, srcLength, 751 ucasemap_internalUTF8ToUpper, pErrorCode); 752 } 753 754 U_CAPI int32_t U_EXPORT2 755 ucasemap_utf8FoldCase(const UCaseMap *csm, 756 char *dest, int32_t destCapacity, 757 const char *src, int32_t srcLength, 758 UErrorCode *pErrorCode) { 759 return ucasemap_mapUTF8(csm, 760 (uint8_t *)dest, destCapacity, 761 (const uint8_t *)src, srcLength, 762 ucasemap_internalUTF8Fold, pErrorCode); 763 } 764