1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2005-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: ucasemap.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2005may06 16 * created by: Markus W. Scherer 17 * 18 * Case mapping service object and functions using it. 19 */ 20 21 #include "unicode/utypes.h" 22 #include "unicode/brkiter.h" 23 #include "unicode/bytestream.h" 24 #include "unicode/casemap.h" 25 #include "unicode/edits.h" 26 #include "unicode/stringoptions.h" 27 #include "unicode/stringpiece.h" 28 #include "unicode/ubrk.h" 29 #include "unicode/uloc.h" 30 #include "unicode/ustring.h" 31 #include "unicode/ucasemap.h" 32 #if !UCONFIG_NO_BREAK_ITERATION 33 #include "unicode/utext.h" 34 #endif 35 #include "unicode/utf.h" 36 #include "unicode/utf8.h" 37 #include "unicode/utf16.h" 38 #include "bytesinkutil.h" 39 #include "cmemory.h" 40 #include "cstring.h" 41 #include "uassert.h" 42 #include "ucase.h" 43 #include "ucasemap_imp.h" 44 #include "ustr_imp.h" 45 46 U_NAMESPACE_USE 47 48 /* UCaseMap service object -------------------------------------------------- */ 49 50 UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) : 51 #if !UCONFIG_NO_BREAK_ITERATION 52 iter(NULL), 53 #endif 54 caseLocale(UCASE_LOC_UNKNOWN), options(opts) { 55 ucasemap_setLocale(this, localeID, pErrorCode); 56 } 57 58 UCaseMap::~UCaseMap() { 59 #if !UCONFIG_NO_BREAK_ITERATION 60 delete iter; 61 #endif 62 } 63 64 U_CAPI UCaseMap * U_EXPORT2 65 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) { 66 if(U_FAILURE(*pErrorCode)) { 67 return NULL; 68 } 69 UCaseMap *csm = new UCaseMap(locale, options, pErrorCode); 70 if(csm==NULL) { 71 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 72 return NULL; 73 } else if (U_FAILURE(*pErrorCode)) { 74 delete csm; 75 return NULL; 76 } 77 return csm; 78 } 79 80 U_CAPI void U_EXPORT2 81 ucasemap_close(UCaseMap *csm) { 82 delete csm; 83 } 84 85 U_CAPI const char * U_EXPORT2 86 ucasemap_getLocale(const UCaseMap *csm) { 87 return csm->locale; 88 } 89 90 U_CAPI uint32_t U_EXPORT2 91 ucasemap_getOptions(const UCaseMap *csm) { 92 return csm->options; 93 } 94 95 U_CAPI void U_EXPORT2 96 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { 97 if(U_FAILURE(*pErrorCode)) { 98 return; 99 } 100 if (locale != NULL && *locale == 0) { 101 csm->locale[0] = 0; 102 csm->caseLocale = UCASE_LOC_ROOT; 103 return; 104 } 105 106 int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); 107 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) { 108 *pErrorCode=U_ZERO_ERROR; 109 /* we only really need the language code for case mappings */ 110 length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); 111 } 112 if(length==sizeof(csm->locale)) { 113 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 114 } 115 if(U_SUCCESS(*pErrorCode)) { 116 csm->caseLocale=UCASE_LOC_UNKNOWN; 117 csm->caseLocale = ucase_getCaseLocale(csm->locale); 118 } else { 119 csm->locale[0]=0; 120 csm->caseLocale = UCASE_LOC_ROOT; 121 } 122 } 123 124 U_CAPI void U_EXPORT2 125 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) { 126 if(U_FAILURE(*pErrorCode)) { 127 return; 128 } 129 csm->options=options; 130 } 131 132 /* UTF-8 string case mappings ----------------------------------------------- */ 133 134 /* TODO(markus): Move to a new, separate utf8case.cpp file. */ 135 136 namespace { 137 138 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ 139 inline UBool 140 appendResult(int32_t cpLength, int32_t result, const UChar *s, 141 ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) { 142 U_ASSERT(U_SUCCESS(errorCode)); 143 144 /* decode the result */ 145 if(result<0) { 146 /* (not) original code point */ 147 if(edits!=NULL) { 148 edits->addUnchanged(cpLength); 149 } 150 if((options & U_OMIT_UNCHANGED_TEXT) == 0) { 151 ByteSinkUtil::appendCodePoint(cpLength, ~result, sink); 152 } 153 } else { 154 if(result<=UCASE_MAX_STRING_LENGTH) { 155 // string: "result" is the UTF-16 length 156 return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode); 157 } else { 158 ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits); 159 } 160 } 161 return TRUE; 162 } 163 164 // See unicode/utf8.h U8_APPEND_UNSAFE(). 165 inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } 166 inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } 167 168 } // namespace 169 170 static UChar32 U_CALLCONV 171 utf8_caseContextIterator(void *context, int8_t dir) { 172 UCaseContext *csc=(UCaseContext *)context; 173 UChar32 c; 174 175 if(dir<0) { 176 /* reset for backward iteration */ 177 csc->index=csc->cpStart; 178 csc->dir=dir; 179 } else if(dir>0) { 180 /* reset for forward iteration */ 181 csc->index=csc->cpLimit; 182 csc->dir=dir; 183 } else { 184 /* continue current iteration direction */ 185 dir=csc->dir; 186 } 187 188 if(dir<0) { 189 if(csc->start<csc->index) { 190 U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c); 191 return c; 192 } 193 } else { 194 if(csc->index<csc->limit) { 195 U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c); 196 return c; 197 } 198 } 199 return U_SENTINEL; 200 } 201 202 /* 203 * Case-maps [srcStart..srcLimit[ but takes 204 * context [0..srcLength[ into account. 205 */ 206 static void 207 _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, 208 const uint8_t *src, UCaseContext *csc, 209 int32_t srcStart, int32_t srcLimit, 210 icu::ByteSink &sink, icu::Edits *edits, 211 UErrorCode &errorCode) { 212 /* case mapping loop */ 213 int32_t srcIndex=srcStart; 214 while (U_SUCCESS(errorCode) && srcIndex<srcLimit) { 215 int32_t cpStart; 216 csc->cpStart=cpStart=srcIndex; 217 UChar32 c; 218 U8_NEXT(src, srcIndex, srcLimit, c); 219 csc->cpLimit=srcIndex; 220 if(c<0) { 221 // Malformed UTF-8. 222 ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart, 223 sink, options, edits, errorCode); 224 } else { 225 const UChar *s; 226 c=map(c, utf8_caseContextIterator, csc, &s, caseLocale); 227 appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); 228 } 229 } 230 } 231 232 #if !UCONFIG_NO_BREAK_ITERATION 233 234 U_CFUNC void U_CALLCONV 235 ucasemap_internalUTF8ToTitle( 236 int32_t caseLocale, uint32_t options, BreakIterator *iter, 237 const uint8_t *src, int32_t srcLength, 238 ByteSink &sink, icu::Edits *edits, 239 UErrorCode &errorCode) { 240 if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) { 241 return; 242 } 243 244 /* set up local variables */ 245 UCaseContext csc=UCASECONTEXT_INITIALIZER; 246 csc.p=(void *)src; 247 csc.limit=srcLength; 248 int32_t prev=0; 249 UBool isFirstIndex=TRUE; 250 251 /* titlecasing loop */ 252 while(prev<srcLength) { 253 /* find next index where to titlecase */ 254 int32_t index; 255 if(isFirstIndex) { 256 isFirstIndex=FALSE; 257 index=iter->first(); 258 } else { 259 index=iter->next(); 260 } 261 if(index==UBRK_DONE || index>srcLength) { 262 index=srcLength; 263 } 264 265 /* 266 * Segment [prev..index[ into 3 parts: 267 * a) skipped characters (copy as-is) [prev..titleStart[ 268 * b) first letter (titlecase) [titleStart..titleLimit[ 269 * c) subsequent characters (lowercase) [titleLimit..index[ 270 */ 271 if(prev<index) { 272 /* find and copy skipped characters [prev..titleStart[ */ 273 int32_t titleStart=prev; 274 int32_t titleLimit=prev; 275 UChar32 c; 276 U8_NEXT(src, titleLimit, index, c); 277 if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) { 278 // Adjust the titlecasing index to the next cased character, 279 // or to the next letter/number/symbol/private use. 280 // Stop with titleStart<titleLimit<=index 281 // if there is a character to be titlecased, 282 // or else stop with titleStart==titleLimit==index. 283 UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0; 284 while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) { 285 titleStart=titleLimit; 286 if(titleLimit==index) { 287 break; 288 } 289 U8_NEXT(src, titleLimit, index, c); 290 } 291 if (prev < titleStart) { 292 if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev, 293 sink, options, edits, errorCode)) { 294 return; 295 } 296 } 297 } 298 299 if(titleStart<titleLimit) { 300 /* titlecase c which is from [titleStart..titleLimit[ */ 301 if(c>=0) { 302 csc.cpStart=titleStart; 303 csc.cpLimit=titleLimit; 304 const UChar *s; 305 c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale); 306 if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) { 307 return; 308 } 309 } else { 310 // Malformed UTF-8. 311 if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart, 312 sink, options, edits, errorCode)) { 313 return; 314 } 315 } 316 317 /* Special case Dutch IJ titlecasing */ 318 if (titleStart+1 < index && 319 caseLocale == UCASE_LOC_DUTCH && 320 (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) { 321 if (src[titleStart+1] == 0x006A) { 322 ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits); 323 titleLimit++; 324 } else if (src[titleStart+1] == 0x004A) { 325 // Keep the capital J from getting lowercased. 326 if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1, 327 sink, options, edits, errorCode)) { 328 return; 329 } 330 titleLimit++; 331 } 332 } 333 334 /* lowercase [titleLimit..index[ */ 335 if(titleLimit<index) { 336 if((options&U_TITLECASE_NO_LOWERCASE)==0) { 337 /* Normal operation: Lowercase the rest of the word. */ 338 _caseMap(caseLocale, options, ucase_toFullLower, 339 src, &csc, 340 titleLimit, index, 341 sink, edits, errorCode); 342 if(U_FAILURE(errorCode)) { 343 return; 344 } 345 } else { 346 /* Optionally just copy the rest of the word unchanged. */ 347 if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit, 348 sink, options, edits, errorCode)) { 349 return; 350 } 351 } 352 } 353 } 354 } 355 356 prev=index; 357 } 358 } 359 360 #endif 361 362 U_NAMESPACE_BEGIN 363 namespace GreekUpper { 364 365 UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) { 366 while (i < length) { 367 UChar32 c; 368 U8_NEXT(s, i, length, c); 369 int32_t type = ucase_getTypeOrIgnorable(c); 370 if ((type & UCASE_IGNORABLE) != 0) { 371 // Case-ignorable, continue with the loop. 372 } else if (type != UCASE_NONE) { 373 return TRUE; // Followed by cased letter. 374 } else { 375 return FALSE; // Uncased and not case-ignorable. 376 } 377 } 378 return FALSE; // Not followed by cased letter. 379 } 380 381 // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java. 382 void toUpper(uint32_t options, 383 const uint8_t *src, int32_t srcLength, 384 ByteSink &sink, Edits *edits, 385 UErrorCode &errorCode) { 386 uint32_t state = 0; 387 for (int32_t i = 0; i < srcLength;) { 388 int32_t nextIndex = i; 389 UChar32 c; 390 U8_NEXT(src, nextIndex, srcLength, c); 391 uint32_t nextState = 0; 392 int32_t type = ucase_getTypeOrIgnorable(c); 393 if ((type & UCASE_IGNORABLE) != 0) { 394 // c is case-ignorable 395 nextState |= (state & AFTER_CASED); 396 } else if (type != UCASE_NONE) { 397 // c is cased 398 nextState |= AFTER_CASED; 399 } 400 uint32_t data = getLetterData(c); 401 if (data > 0) { 402 uint32_t upper = data & UPPER_MASK; 403 // Add a dialytika to this iota or ypsilon vowel 404 // if we removed a tonos from the previous vowel, 405 // and that previous vowel did not also have (or gain) a dialytika. 406 // Adding one only to the final vowel in a longer sequence 407 // (which does not occur in normal writing) would require lookahead. 408 // Set the same flag as for preserving an existing dialytika. 409 if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && 410 (upper == 0x399 || upper == 0x3A5)) { 411 data |= HAS_DIALYTIKA; 412 } 413 int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. 414 if ((data & HAS_YPOGEGRAMMENI) != 0) { 415 numYpogegrammeni = 1; 416 } 417 // Skip combining diacritics after this Greek letter. 418 int32_t nextNextIndex = nextIndex; 419 while (nextIndex < srcLength) { 420 UChar32 c2; 421 U8_NEXT(src, nextNextIndex, srcLength, c2); 422 uint32_t diacriticData = getDiacriticData(c2); 423 if (diacriticData != 0) { 424 data |= diacriticData; 425 if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { 426 ++numYpogegrammeni; 427 } 428 nextIndex = nextNextIndex; 429 } else { 430 break; // not a Greek diacritic 431 } 432 } 433 if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { 434 nextState |= AFTER_VOWEL_WITH_ACCENT; 435 } 436 // Map according to Greek rules. 437 UBool addTonos = FALSE; 438 if (upper == 0x397 && 439 (data & HAS_ACCENT) != 0 && 440 numYpogegrammeni == 0 && 441 (state & AFTER_CASED) == 0 && 442 !isFollowedByCasedLetter(src, nextIndex, srcLength)) { 443 // Keep disjunctive "or" with (only) a tonos. 444 // We use the same "word boundary" conditions as for the Final_Sigma test. 445 if (i == nextIndex) { 446 upper = 0x389; // Preserve the precomposed form. 447 } else { 448 addTonos = TRUE; 449 } 450 } else if ((data & HAS_DIALYTIKA) != 0) { 451 // Preserve a vowel with dialytika in precomposed form if it exists. 452 if (upper == 0x399) { 453 upper = 0x3AA; 454 data &= ~HAS_EITHER_DIALYTIKA; 455 } else if (upper == 0x3A5) { 456 upper = 0x3AB; 457 data &= ~HAS_EITHER_DIALYTIKA; 458 } 459 } 460 461 UBool change; 462 if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) { 463 change = TRUE; // common, simple usage 464 } else { 465 // Find out first whether we are changing the text. 466 U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block 467 change = (i + 2) > nextIndex || 468 src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) || 469 numYpogegrammeni > 0; 470 int32_t i2 = i + 2; 471 if ((data & HAS_EITHER_DIALYTIKA) != 0) { 472 change |= (i2 + 2) > nextIndex || 473 src[i2] != (uint8_t)u8"\u0308"[0] || 474 src[i2 + 1] != (uint8_t)u8"\u0308"[1]; 475 i2 += 2; 476 } 477 if (addTonos) { 478 change |= (i2 + 2) > nextIndex || 479 src[i2] != (uint8_t)u8"\u0301"[0] || 480 src[i2 + 1] != (uint8_t)u8"\u0301"[1]; 481 i2 += 2; 482 } 483 int32_t oldLength = nextIndex - i; 484 int32_t newLength = (i2 - i) + numYpogegrammeni * 2; // 2 bytes per U+0399 485 change |= oldLength != newLength; 486 if (change) { 487 if (edits != NULL) { 488 edits->addReplace(oldLength, newLength); 489 } 490 } else { 491 if (edits != NULL) { 492 edits->addUnchanged(oldLength); 493 } 494 // Write unchanged text? 495 change = (options & U_OMIT_UNCHANGED_TEXT) == 0; 496 } 497 } 498 499 if (change) { 500 ByteSinkUtil::appendTwoBytes(upper, sink); 501 if ((data & HAS_EITHER_DIALYTIKA) != 0) { 502 sink.Append(u8"\u0308", 2); // restore or add a dialytika 503 } 504 if (addTonos) { 505 sink.Append(u8"\u0301", 2); 506 } 507 while (numYpogegrammeni > 0) { 508 sink.Append(u8"\u0399", 2); 509 --numYpogegrammeni; 510 } 511 } 512 } else if(c>=0) { 513 const UChar *s; 514 c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK); 515 if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) { 516 return; 517 } 518 } else { 519 // Malformed UTF-8. 520 if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i, 521 sink, options, edits, errorCode)) { 522 return; 523 } 524 } 525 i = nextIndex; 526 state = nextState; 527 } 528 } 529 530 } // namespace GreekUpper 531 U_NAMESPACE_END 532 533 static void U_CALLCONV 534 ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED 535 const uint8_t *src, int32_t srcLength, 536 icu::ByteSink &sink, icu::Edits *edits, 537 UErrorCode &errorCode) { 538 UCaseContext csc=UCASECONTEXT_INITIALIZER; 539 csc.p=(void *)src; 540 csc.limit=srcLength; 541 _caseMap( 542 caseLocale, options, ucase_toFullLower, 543 src, &csc, 0, srcLength, 544 sink, edits, errorCode); 545 } 546 547 static void U_CALLCONV 548 ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED 549 const uint8_t *src, int32_t srcLength, 550 icu::ByteSink &sink, icu::Edits *edits, 551 UErrorCode &errorCode) { 552 if (caseLocale == UCASE_LOC_GREEK) { 553 GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode); 554 } else { 555 UCaseContext csc=UCASECONTEXT_INITIALIZER; 556 csc.p=(void *)src; 557 csc.limit=srcLength; 558 _caseMap( 559 caseLocale, options, ucase_toFullUpper, 560 src, &csc, 0, srcLength, 561 sink, edits, errorCode); 562 } 563 } 564 565 static void U_CALLCONV 566 ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED 567 const uint8_t *src, int32_t srcLength, 568 icu::ByteSink &sink, icu::Edits *edits, 569 UErrorCode &errorCode) { 570 /* case mapping loop */ 571 int32_t srcIndex = 0; 572 while (U_SUCCESS(errorCode) && srcIndex < srcLength) { 573 int32_t cpStart = srcIndex; 574 UChar32 c; 575 U8_NEXT(src, srcIndex, srcLength, c); 576 if(c<0) { 577 // Malformed UTF-8. 578 ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart, 579 sink, options, edits, errorCode); 580 } else { 581 const UChar *s; 582 c = ucase_toFullFolding(c, &s, options); 583 appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); 584 } 585 } 586 } 587 588 void 589 ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM 590 const char *src, int32_t srcLength, 591 UTF8CaseMapper *stringCaseMapper, 592 icu::ByteSink &sink, icu::Edits *edits, 593 UErrorCode &errorCode) { 594 /* check argument values */ 595 if (U_FAILURE(errorCode)) { 596 return; 597 } 598 if ((src == nullptr && srcLength != 0) || srcLength < -1) { 599 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 600 return; 601 } 602 603 // Get the string length. 604 if (srcLength == -1) { 605 srcLength = (int32_t)uprv_strlen((const char *)src); 606 } 607 608 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { 609 edits->reset(); 610 } 611 stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR 612 (const uint8_t *)src, srcLength, sink, edits, errorCode); 613 sink.Flush(); 614 if (U_SUCCESS(errorCode)) { 615 if (edits != nullptr) { 616 edits->copyErrorTo(errorCode); 617 } 618 } 619 } 620 621 int32_t 622 ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM 623 char *dest, int32_t destCapacity, 624 const char *src, int32_t srcLength, 625 UTF8CaseMapper *stringCaseMapper, 626 icu::Edits *edits, 627 UErrorCode &errorCode) { 628 /* check argument values */ 629 if(U_FAILURE(errorCode)) { 630 return 0; 631 } 632 if( destCapacity<0 || 633 (dest==NULL && destCapacity>0) || 634 (src==NULL && srcLength!=0) || srcLength<-1 635 ) { 636 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 637 return 0; 638 } 639 640 /* get the string length */ 641 if(srcLength==-1) { 642 srcLength=(int32_t)uprv_strlen((const char *)src); 643 } 644 645 /* check for overlapping source and destination */ 646 if( dest!=NULL && 647 ((src>=dest && src<(dest+destCapacity)) || 648 (dest>=src && dest<(src+srcLength))) 649 ) { 650 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 651 return 0; 652 } 653 654 CheckedArrayByteSink sink(dest, destCapacity); 655 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { 656 edits->reset(); 657 } 658 stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR 659 (const uint8_t *)src, srcLength, sink, edits, errorCode); 660 sink.Flush(); 661 if (U_SUCCESS(errorCode)) { 662 if (sink.Overflowed()) { 663 errorCode = U_BUFFER_OVERFLOW_ERROR; 664 } else if (edits != nullptr) { 665 edits->copyErrorTo(errorCode); 666 } 667 } 668 return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode); 669 } 670 671 /* public API functions */ 672 673 U_CAPI int32_t U_EXPORT2 674 ucasemap_utf8ToLower(const UCaseMap *csm, 675 char *dest, int32_t destCapacity, 676 const char *src, int32_t srcLength, 677 UErrorCode *pErrorCode) { 678 return ucasemap_mapUTF8( 679 csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL 680 dest, destCapacity, 681 src, srcLength, 682 ucasemap_internalUTF8ToLower, NULL, *pErrorCode); 683 } 684 685 U_CAPI int32_t U_EXPORT2 686 ucasemap_utf8ToUpper(const UCaseMap *csm, 687 char *dest, int32_t destCapacity, 688 const char *src, int32_t srcLength, 689 UErrorCode *pErrorCode) { 690 return ucasemap_mapUTF8( 691 csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL 692 dest, destCapacity, 693 src, srcLength, 694 ucasemap_internalUTF8ToUpper, NULL, *pErrorCode); 695 } 696 697 U_CAPI int32_t U_EXPORT2 698 ucasemap_utf8FoldCase(const UCaseMap *csm, 699 char *dest, int32_t destCapacity, 700 const char *src, int32_t srcLength, 701 UErrorCode *pErrorCode) { 702 return ucasemap_mapUTF8( 703 UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL 704 dest, destCapacity, 705 src, srcLength, 706 ucasemap_internalUTF8Fold, NULL, *pErrorCode); 707 } 708 709 U_NAMESPACE_BEGIN 710 711 void CaseMap::utf8ToLower( 712 const char *locale, uint32_t options, 713 StringPiece src, ByteSink &sink, Edits *edits, 714 UErrorCode &errorCode) { 715 ucasemap_mapUTF8( 716 ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL 717 src.data(), src.length(), 718 ucasemap_internalUTF8ToLower, sink, edits, errorCode); 719 } 720 721 void CaseMap::utf8ToUpper( 722 const char *locale, uint32_t options, 723 StringPiece src, ByteSink &sink, Edits *edits, 724 UErrorCode &errorCode) { 725 ucasemap_mapUTF8( 726 ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL 727 src.data(), src.length(), 728 ucasemap_internalUTF8ToUpper, sink, edits, errorCode); 729 } 730 731 void CaseMap::utf8Fold( 732 uint32_t options, 733 StringPiece src, ByteSink &sink, Edits *edits, 734 UErrorCode &errorCode) { 735 ucasemap_mapUTF8( 736 UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL 737 src.data(), src.length(), 738 ucasemap_internalUTF8Fold, sink, edits, errorCode); 739 } 740 741 int32_t CaseMap::utf8ToLower( 742 const char *locale, uint32_t options, 743 const char *src, int32_t srcLength, 744 char *dest, int32_t destCapacity, Edits *edits, 745 UErrorCode &errorCode) { 746 return ucasemap_mapUTF8( 747 ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL 748 dest, destCapacity, 749 src, srcLength, 750 ucasemap_internalUTF8ToLower, edits, errorCode); 751 } 752 753 int32_t CaseMap::utf8ToUpper( 754 const char *locale, uint32_t options, 755 const char *src, int32_t srcLength, 756 char *dest, int32_t destCapacity, Edits *edits, 757 UErrorCode &errorCode) { 758 return ucasemap_mapUTF8( 759 ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL 760 dest, destCapacity, 761 src, srcLength, 762 ucasemap_internalUTF8ToUpper, edits, errorCode); 763 } 764 765 int32_t CaseMap::utf8Fold( 766 uint32_t options, 767 const char *src, int32_t srcLength, 768 char *dest, int32_t destCapacity, Edits *edits, 769 UErrorCode &errorCode) { 770 return ucasemap_mapUTF8( 771 UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL 772 dest, destCapacity, 773 src, srcLength, 774 ucasemap_internalUTF8Fold, edits, errorCode); 775 } 776 777 U_NAMESPACE_END 778