1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2002-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: strcase.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2002mar12 16 * created by: Markus W. Scherer 17 * 18 * Test file for string casing C++ API functions. 19 */ 20 21 #include "unicode/std_string.h" 22 #include "unicode/brkiter.h" 23 #include "unicode/casemap.h" 24 #include "unicode/edits.h" 25 #include "unicode/uchar.h" 26 #include "unicode/ures.h" 27 #include "unicode/uloc.h" 28 #include "unicode/locid.h" 29 #include "unicode/ubrk.h" 30 #include "unicode/unistr.h" 31 #include "unicode/ucasemap.h" 32 #include "unicode/ustring.h" 33 #include "ucase.h" 34 #include "ustrtest.h" 35 #include "unicode/tstdtmod.h" 36 #include "cmemory.h" 37 #include "testutil.h" 38 39 class StringCaseTest: public IntlTest { 40 public: 41 StringCaseTest(); 42 virtual ~StringCaseTest(); 43 44 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0); 45 46 void TestCaseConversion(); 47 48 void TestCasingImpl(const UnicodeString &input, 49 const UnicodeString &output, 50 int32_t whichCase, 51 void *iter, const char *localeID, uint32_t options); 52 void TestCasing(); 53 void TestTitleOptions(); 54 void TestFullCaseFoldingIterator(); 55 void TestGreekUpper(); 56 void TestLongUpper(); 57 void TestMalformedUTF8(); 58 void TestBufferOverflow(); 59 void TestEdits(); 60 void TestCopyMoveEdits(); 61 void TestEditsFindFwdBwd(); 62 void TestMergeEdits(); 63 void TestCaseMapWithEdits(); 64 void TestCaseMapUTF8WithEdits(); 65 void TestCaseMapToString(); 66 void TestCaseMapUTF8ToString(); 67 void TestLongUnicodeString(); 68 void TestBug13127(); 69 void TestInPlaceTitle(); 70 71 private: 72 void assertGreekUpper(const char16_t *s, const char16_t *expected); 73 74 Locale GREEK_LOCALE_; 75 }; 76 77 StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {} 78 79 StringCaseTest::~StringCaseTest() {} 80 81 extern IntlTest *createStringCaseTest() { 82 return new StringCaseTest(); 83 } 84 85 void 86 StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 87 if(exec) { 88 logln("TestSuite StringCaseTest: "); 89 } 90 TESTCASE_AUTO_BEGIN; 91 TESTCASE_AUTO(TestCaseConversion); 92 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 93 TESTCASE_AUTO(TestCasing); 94 TESTCASE_AUTO(TestTitleOptions); 95 #endif 96 TESTCASE_AUTO(TestFullCaseFoldingIterator); 97 TESTCASE_AUTO(TestGreekUpper); 98 TESTCASE_AUTO(TestLongUpper); 99 TESTCASE_AUTO(TestMalformedUTF8); 100 TESTCASE_AUTO(TestBufferOverflow); 101 TESTCASE_AUTO(TestEdits); 102 TESTCASE_AUTO(TestCopyMoveEdits); 103 TESTCASE_AUTO(TestEditsFindFwdBwd); 104 TESTCASE_AUTO(TestMergeEdits); 105 TESTCASE_AUTO(TestCaseMapWithEdits); 106 TESTCASE_AUTO(TestCaseMapUTF8WithEdits); 107 TESTCASE_AUTO(TestCaseMapToString); 108 TESTCASE_AUTO(TestCaseMapUTF8ToString); 109 TESTCASE_AUTO(TestLongUnicodeString); 110 #if !UCONFIG_NO_BREAK_ITERATION 111 TESTCASE_AUTO(TestBug13127); 112 TESTCASE_AUTO(TestInPlaceTitle); 113 #endif 114 TESTCASE_AUTO_END; 115 } 116 117 void 118 StringCaseTest::TestCaseConversion() 119 { 120 static const UChar uppercaseGreek[] = 121 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4, 122 0x39f, 0x3a3, 0 }; 123 // "IESUS CHRISTOS" 124 125 static const UChar lowercaseGreek[] = 126 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4, 127 0x3bf, 0x3c2, 0 }; 128 // "iesus christos" 129 130 static const UChar lowercaseTurkish[] = 131 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f, 132 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 }; 133 134 static const UChar uppercaseTurkish[] = 135 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20, 136 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 }; 137 138 UnicodeString expectedResult; 139 UnicodeString test3; 140 141 test3 += (UChar32)0x0130; 142 test3 += "STANBUL, NOT CONSTANTINOPLE!"; 143 144 UnicodeString test4(test3); 145 test4.toLower(Locale("")); 146 expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape(); 147 if (test4 != expectedResult) 148 errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 149 150 test4 = test3; 151 test4.toLower(Locale("tr", "TR")); 152 expectedResult = lowercaseTurkish; 153 if (test4 != expectedResult) 154 errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 155 156 test3 = "topkap"; 157 test3 += (UChar32)0x0131; 158 test3 += " palace, istanbul"; 159 test4 = test3; 160 161 test4.toUpper(Locale("")); 162 expectedResult = "TOPKAPI PALACE, ISTANBUL"; 163 if (test4 != expectedResult) 164 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 165 166 test4 = test3; 167 test4.toUpper(Locale("tr", "TR")); 168 expectedResult = uppercaseTurkish; 169 if (test4 != expectedResult) 170 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 171 172 test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe"); 173 174 test3.toUpper(Locale("de", "DE")); 175 expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE"); 176 if (test3 != expectedResult) 177 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\"."); 178 179 test4.replace(0, test4.length(), uppercaseGreek); 180 181 test4.toLower(Locale("el", "GR")); 182 expectedResult = lowercaseGreek; 183 if (test4 != expectedResult) 184 errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 185 186 test4.replace(0, test4.length(), lowercaseGreek); 187 188 test4.toUpper(); 189 expectedResult = uppercaseGreek; 190 if (test4 != expectedResult) 191 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 192 193 // more string case mapping tests with the new implementation 194 { 195 static const UChar 196 197 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff }, 198 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, 199 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, 200 201 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff }, 202 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, 203 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, 204 205 beforeMiniUpper[]= { 0xdf, 0x61 }, 206 miniUpper[]= { 0x53, 0x53, 0x41 }; 207 208 UnicodeString s; 209 210 /* lowercase with root locale */ 211 s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower)); 212 s.toLower(""); 213 if( s.length()!=UPRV_LENGTHOF(lowerRoot) || 214 s!=UnicodeString(FALSE, lowerRoot, s.length()) 215 ) { 216 errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\""); 217 } 218 219 /* lowercase with turkish locale */ 220 s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower)); 221 s.setCharAt(0, beforeLower[0]).toLower(Locale("tr")); 222 if( s.length()!=UPRV_LENGTHOF(lowerTurkish) || 223 s!=UnicodeString(FALSE, lowerTurkish, s.length()) 224 ) { 225 errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\""); 226 } 227 228 /* uppercase with root locale */ 229 s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper)); 230 s.setCharAt(0, beforeUpper[0]).toUpper(Locale("")); 231 if( s.length()!=UPRV_LENGTHOF(upperRoot) || 232 s!=UnicodeString(FALSE, upperRoot, s.length()) 233 ) { 234 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\""); 235 } 236 237 /* uppercase with turkish locale */ 238 s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper)); 239 s.toUpper(Locale("tr")); 240 if( s.length()!=UPRV_LENGTHOF(upperTurkish) || 241 s!=UnicodeString(FALSE, upperTurkish, s.length()) 242 ) { 243 errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\""); 244 } 245 246 /* uppercase a short string with root locale */ 247 s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper)); 248 s.setCharAt(0, beforeMiniUpper[0]).toUpper(""); 249 if( s.length()!=UPRV_LENGTHOF(miniUpper) || 250 s!=UnicodeString(FALSE, miniUpper, s.length()) 251 ) { 252 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\""); 253 } 254 } 255 256 // test some supplementary characters (>= Unicode 3.1) 257 { 258 UnicodeString t; 259 260 UnicodeString 261 deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(), 262 deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(), 263 deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape(); 264 (t=deseretInput).toLower(); 265 if(t!=deseretLower) { 266 errln("error lowercasing Deseret (plane 1) characters"); 267 } 268 (t=deseretInput).toUpper(); 269 if(t!=deseretUpper) { 270 errln("error uppercasing Deseret (plane 1) characters"); 271 } 272 } 273 274 // test some more cases that looked like problems 275 { 276 UnicodeString t; 277 278 UnicodeString 279 ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(), 280 ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(), 281 ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape(); 282 (t=ljInput).toLower("en"); 283 if(t!=ljLower) { 284 errln("error lowercasing LJ characters"); 285 } 286 (t=ljInput).toUpper("en"); 287 if(t!=ljUpper) { 288 errln("error uppercasing LJ characters"); 289 } 290 } 291 292 #if !UCONFIG_NO_NORMALIZATION 293 // some context-sensitive casing depends on normalization data being present 294 295 // Unicode 3.1.1 SpecialCasing tests 296 { 297 UnicodeString t; 298 299 // sigmas preceded and/or followed by cased letters 300 UnicodeString 301 sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(), 302 sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(), 303 sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(); 304 305 (t=sigmas).toLower(); 306 if(t!=sigmasLower) { 307 errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\""); 308 } 309 310 (t=sigmas).toUpper(Locale("")); 311 if(t!=sigmasUpper) { 312 errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\""); 313 } 314 315 // turkish & azerbaijani dotless i & dotted I 316 // remove dot above if there was a capital I before and there are no more accents above 317 UnicodeString 318 dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(), 319 dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(), 320 dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape(); 321 322 (t=dots).toLower("tr"); 323 if(t!=dotsTurkish) { 324 errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\""); 325 } 326 327 (t=dots).toLower("de"); 328 if(t!=dotsDefault) { 329 errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); 330 } 331 } 332 333 // more Unicode 3.1.1 tests 334 { 335 UnicodeString t; 336 337 // lithuanian dot above in uppercasing 338 UnicodeString 339 dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(), 340 dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(), 341 dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape(); 342 343 (t=dots).toUpper("lt"); 344 if(t!=dotsLithuanian) { 345 errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\""); 346 } 347 348 (t=dots).toUpper("de"); 349 if(t!=dotsDefault) { 350 errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); 351 } 352 353 // lithuanian adds dot above to i in lowercasing if there are more above accents 354 UnicodeString 355 i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(), 356 iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(), 357 iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape(); 358 359 (t=i).toLower("lt"); 360 if(t!=iLithuanian) { 361 errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\""); 362 } 363 364 (t=i).toLower("de"); 365 if(t!=iDefault) { 366 errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\""); 367 } 368 } 369 370 #endif 371 372 // test case folding 373 { 374 UnicodeString 375 s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(), 376 f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(), 377 g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(), 378 t; 379 380 (t=s).foldCase(); 381 if(f!=t) { 382 errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\""); 383 } 384 385 // alternate handling for dotted I/dotless i (U+0130, U+0131) 386 (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I); 387 if(g!=t) { 388 errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\""); 389 } 390 } 391 } 392 393 // data-driven case mapping tests ------------------------------------------ *** 394 395 enum { 396 TEST_LOWER, 397 TEST_UPPER, 398 TEST_TITLE, 399 TEST_FOLD, 400 TEST_COUNT 401 }; 402 403 // names of TestData children in casing.txt 404 static const char *const dataNames[TEST_COUNT+1]={ 405 "lowercasing", 406 "uppercasing", 407 "titlecasing", 408 "casefolding", 409 "" 410 }; 411 412 void 413 StringCaseTest::TestCasingImpl(const UnicodeString &input, 414 const UnicodeString &output, 415 int32_t whichCase, 416 void *iter, const char *localeID, uint32_t options) { 417 // UnicodeString 418 UnicodeString result; 419 const char *name; 420 Locale locale(localeID); 421 422 result=input; 423 switch(whichCase) { 424 case TEST_LOWER: 425 name="toLower"; 426 result.toLower(locale); 427 break; 428 case TEST_UPPER: 429 name="toUpper"; 430 result.toUpper(locale); 431 break; 432 #if !UCONFIG_NO_BREAK_ITERATION 433 case TEST_TITLE: 434 name="toTitle"; 435 result.toTitle((BreakIterator *)iter, locale, options); 436 break; 437 #endif 438 case TEST_FOLD: 439 name="foldCase"; 440 result.foldCase(options); 441 break; 442 default: 443 name=""; 444 break; // won't happen 445 } 446 if(result!=output) { 447 dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); 448 } 449 #if !UCONFIG_NO_BREAK_ITERATION 450 if(whichCase==TEST_TITLE && options==0) { 451 result=input; 452 result.toTitle((BreakIterator *)iter, locale); 453 if(result!=output) { 454 dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); 455 } 456 } 457 #endif 458 459 // UTF-8 460 char utf8In[100], utf8Out[100]; 461 int32_t utf8InLength, utf8OutLength, resultLength; 462 UChar *buffer; 463 464 IcuTestErrorCode errorCode(*this, "TestCasingImpl"); 465 LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode)); 466 #if !UCONFIG_NO_BREAK_ITERATION 467 if(iter!=NULL) { 468 // Clone the break iterator so that the UCaseMap can safely adopt it. 469 UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode); 470 ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode); 471 } 472 #endif 473 474 u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode); 475 switch(whichCase) { 476 case TEST_LOWER: 477 name="ucasemap_utf8ToLower"; 478 utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), 479 utf8Out, (int32_t)sizeof(utf8Out), 480 utf8In, utf8InLength, errorCode); 481 break; 482 case TEST_UPPER: 483 name="ucasemap_utf8ToUpper"; 484 utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(), 485 utf8Out, (int32_t)sizeof(utf8Out), 486 utf8In, utf8InLength, errorCode); 487 break; 488 #if !UCONFIG_NO_BREAK_ITERATION 489 case TEST_TITLE: 490 name="ucasemap_utf8ToTitle"; 491 utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(), 492 utf8Out, (int32_t)sizeof(utf8Out), 493 utf8In, utf8InLength, errorCode); 494 break; 495 #endif 496 case TEST_FOLD: 497 name="ucasemap_utf8FoldCase"; 498 utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(), 499 utf8Out, (int32_t)sizeof(utf8Out), 500 utf8In, utf8InLength, errorCode); 501 break; 502 default: 503 name=""; 504 utf8OutLength=0; 505 break; // won't happen 506 } 507 buffer=result.getBuffer(utf8OutLength); 508 u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode); 509 result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0); 510 511 if(errorCode.isFailure()) { 512 errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); 513 errorCode.reset(); 514 } else if(result!=output) { 515 errln("error: %s() got a wrong result for a test case from casing.res", name); 516 errln("expected \"" + output + "\" got \"" + result + "\"" ); 517 } 518 } 519 520 void 521 StringCaseTest::TestCasing() { 522 UErrorCode status = U_ZERO_ERROR; 523 #if !UCONFIG_NO_BREAK_ITERATION 524 LocalUBreakIteratorPointer iter; 525 #endif 526 char cLocaleID[100]; 527 UnicodeString locale, input, output, optionsString, result; 528 uint32_t options; 529 int32_t whichCase, type; 530 LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status)); 531 if(U_SUCCESS(status)) { 532 for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) { 533 #if UCONFIG_NO_BREAK_ITERATION 534 if(whichCase==TEST_TITLE) { 535 continue; 536 } 537 #endif 538 LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status)); 539 if(U_FAILURE(status)) { 540 errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status)); 541 break; 542 } 543 const DataMap *myCase = NULL; 544 while(casingTest->nextCase(myCase, status)) { 545 input = myCase->getString("Input", status); 546 output = myCase->getString("Output", status); 547 548 if(whichCase!=TEST_FOLD) { 549 locale = myCase->getString("Locale", status); 550 } 551 locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), ""); 552 553 #if !UCONFIG_NO_BREAK_ITERATION 554 if(whichCase==TEST_TITLE) { 555 type = myCase->getInt("Type", status); 556 if(type>=0) { 557 iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status)); 558 } else if(type==-2) { 559 // Open a trivial break iterator that only delivers { 0, length } 560 // or even just { 0 } as boundaries. 561 static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;" 562 UParseError parseError; 563 iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status)); 564 } 565 } 566 #endif 567 options = 0; 568 if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) { 569 optionsString = myCase->getString("Options", status); 570 if(optionsString.indexOf((UChar)0x54)>=0) { // T 571 options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I; 572 } 573 if(optionsString.indexOf((UChar)0x4c)>=0) { // L 574 options|=U_TITLECASE_NO_LOWERCASE; 575 } 576 if(optionsString.indexOf((UChar)0x41)>=0) { // A 577 options|=U_TITLECASE_NO_BREAK_ADJUSTMENT; 578 } 579 } 580 581 if(U_FAILURE(status)) { 582 dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status)); 583 status = U_ZERO_ERROR; 584 } else { 585 #if UCONFIG_NO_BREAK_ITERATION 586 LocalPointer<UMemory> iter; 587 #endif 588 TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options); 589 } 590 591 #if !UCONFIG_NO_BREAK_ITERATION 592 iter.adoptInstead(NULL); 593 #endif 594 } 595 } 596 } 597 598 #if !UCONFIG_NO_BREAK_ITERATION 599 // more tests for API coverage 600 status=U_ZERO_ERROR; 601 input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape(); 602 (result=input).toTitle(NULL); 603 if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) { 604 dataerrln("UnicodeString::toTitle(NULL) failed."); 605 } 606 #endif 607 } 608 609 void 610 StringCaseTest::TestTitleOptions() { 611 // New options in ICU 60. 612 TestCasingImpl(u"cAt! eTc.", u"Cat! etc.", TEST_TITLE, 613 nullptr, "", U_TITLECASE_WHOLE_STRING); 614 TestCasingImpl(u"a CaT. A dOg! eTc.", u"A CaT. A dOg! ETc.", TEST_TITLE, 615 nullptr, "", U_TITLECASE_SENTENCES|U_TITLECASE_NO_LOWERCASE); 616 TestCasingImpl(u"49eRs", u"49ers", TEST_TITLE, 617 nullptr, "", U_TITLECASE_WHOLE_STRING); 618 TestCasingImpl(u"(aBc)", u"(abc)", TEST_TITLE, 619 nullptr, "", U_TITLECASE_WHOLE_STRING); 620 TestCasingImpl(u"49eRs", u"49Ers", TEST_TITLE, 621 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED); 622 TestCasingImpl(u"(aBc)", u"(Abc)", TEST_TITLE, 623 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED); 624 TestCasingImpl(u" john. Smith", u" John. Smith", TEST_TITLE, 625 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_LOWERCASE); 626 TestCasingImpl(u" john. Smith", u" john. smith", TEST_TITLE, 627 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_BREAK_ADJUSTMENT); 628 TestCasingImpl(u"ijs", u"IJs", TEST_TITLE, 629 nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING); 630 TestCasingImpl(u"ijs", u"js", TEST_TITLE, 631 nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING); 632 633 #if !UCONFIG_NO_BREAK_ITERATION 634 // Test conflicting settings. 635 // If & when we add more options, then the ORed combinations may become 636 // indistinguishable from valid values. 637 IcuTestErrorCode errorCode(*this, "TestTitleOptions"); 638 CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT|U_TITLECASE_ADJUST_TO_CASED, nullptr, 639 u"", 0, nullptr, 0, nullptr, errorCode); 640 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) { 641 errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument", 642 errorCode.errorName()); 643 } 644 errorCode.reset(); 645 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING|U_TITLECASE_SENTENCES, nullptr, 646 u"", 0, nullptr, 0, nullptr, errorCode); 647 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) { 648 errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument", 649 errorCode.errorName()); 650 } 651 errorCode.reset(); 652 LocalPointer<BreakIterator> iter( 653 BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode)); 654 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING, iter.getAlias(), 655 u"", 0, nullptr, 0, nullptr, errorCode); 656 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) { 657 errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument", 658 errorCode.errorName()); 659 } 660 errorCode.reset(); 661 #endif 662 } 663 664 void 665 StringCaseTest::TestFullCaseFoldingIterator() { 666 UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi"); 667 UnicodeString ss=UNICODE_STRING_SIMPLE("ss"); 668 FullCaseFoldingIterator iter; 669 int32_t count=0; 670 int32_t countSpecific=0; 671 UChar32 c; 672 UnicodeString full; 673 while((c=iter.next(full))>=0) { 674 ++count; 675 // Check that the full Case_Folding has more than 1 code point. 676 if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) { 677 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c); 678 continue; 679 } 680 // Check that full == Case_Folding(c). 681 UnicodeString cf(c); 682 cf.foldCase(); 683 if(full!=cf) { 684 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c); 685 continue; 686 } 687 // Spot-check a couple of specific cases. 688 if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) { 689 ++countSpecific; 690 } 691 } 692 if(countSpecific!=3) { 693 errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases"); 694 } 695 if(count<70) { 696 errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count); 697 } 698 } 699 700 void 701 StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) { 702 UnicodeString s16(s); 703 UnicodeString expected16(expected); 704 UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")"; 705 UnicodeString result16(s16); 706 result16.toUpper(GREEK_LOCALE_); 707 assertEquals(msg, expected16, result16); 708 709 msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap="; 710 int32_t length = expected16.length(); 711 int32_t capacities[] = { 712 // Keep in sync with the UTF-8 capacities near the bottom of this function. 713 0, length / 2, length - 1, length, length + 1 714 }; 715 for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) { 716 int32_t cap = capacities[i]; 717 UChar *dest16 = result16.getBuffer(expected16.length() + 1); 718 u_memset(dest16, 0x55AA, result16.getCapacity()); 719 UErrorCode errorCode = U_ZERO_ERROR; 720 length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode); 721 assertEquals(msg + cap, expected16.length(), length); 722 UErrorCode expectedErrorCode; 723 if (cap < expected16.length()) { 724 expectedErrorCode = U_BUFFER_OVERFLOW_ERROR; 725 } else if (cap == expected16.length()) { 726 expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING; 727 } else { 728 expectedErrorCode = U_ZERO_ERROR; 729 assertEquals(msg + cap + " NUL", 0, dest16[length]); 730 } 731 assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode); 732 result16.releaseBuffer(length); 733 if (cap >= expected16.length()) { 734 assertEquals(msg + cap, expected16, result16); 735 } 736 } 737 738 UErrorCode errorCode = U_ZERO_ERROR; 739 LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode)); 740 assertSuccess("ucasemap_open", errorCode); 741 std::string s8; 742 s16.toUTF8String(s8); 743 msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")"; 744 char dest8[1000]; 745 length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8), 746 s8.data(), s8.length(), &errorCode); 747 assertSuccess("ucasemap_utf8ToUpper", errorCode); 748 StringPiece result8(dest8, length); 749 UnicodeString result16From8 = UnicodeString::fromUTF8(result8); 750 assertEquals(msg, expected16, result16From8); 751 752 msg += " cap="; 753 capacities[1] = length / 2; 754 capacities[2] = length - 1; 755 capacities[3] = length; 756 capacities[4] = length + 1; 757 char dest8b[1000]; 758 int32_t expected8Length = length; // Assuming the previous call worked. 759 for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) { 760 int32_t cap = capacities[i]; 761 memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b)); 762 UErrorCode errorCode = U_ZERO_ERROR; 763 length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap, 764 s8.data(), s8.length(), &errorCode); 765 assertEquals(msg + cap, expected8Length, length); 766 UErrorCode expectedErrorCode; 767 if (cap < expected8Length) { 768 expectedErrorCode = U_BUFFER_OVERFLOW_ERROR; 769 } else if (cap == expected8Length) { 770 expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING; 771 } else { 772 expectedErrorCode = U_ZERO_ERROR; 773 // Casts to int32_t to avoid matching UBool. 774 assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]); 775 } 776 assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode); 777 if (cap >= expected8Length) { 778 assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length)); 779 } 780 } 781 } 782 783 void 784 StringCaseTest::TestGreekUpper() { 785 // http://bugs.icu-project.org/trac/ticket/5456 786 assertGreekUpper(u", , ", u", , "); 787 // https://bugzilla.mozilla.org/show_bug.cgi?id=307039 788 // https://bug307039.bmoattachments.org/attachment.cgi?id=194893 789 assertGreekUpper(u"", u""); 790 assertGreekUpper(u", , ", u", , "); 791 assertGreekUpper(u", , ", u", , "); 792 assertGreekUpper(u", , ", u", , "); 793 assertGreekUpper(u"", u""); 794 assertGreekUpper(u"", u""); 795 assertGreekUpper(u" ", u" "); 796 // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html 797 assertGreekUpper(u" ", u" "); 798 assertGreekUpper(u" ", u" "); 799 // http://unicode.org/udhr/d/udhr_ell_polytonic.html 800 assertGreekUpper(u" ", u" "); 801 assertGreekUpper(u" ", u" "); 802 // From Google bug report 803 assertGreekUpper(u", ", u", "); 804 // http://crbug.com/234797 805 assertGreekUpper(u" !", u" !"); 806 assertGreekUpper(u", ", u", "); 807 assertGreekUpper(u" .", u" ."); 808 // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/ 809 assertGreekUpper(u"", u""); 810 assertGreekUpper(u".", u"."); 811 } 812 813 void 814 StringCaseTest::TestLongUpper() { 815 if (quick) { 816 logln("not exhaustive mode: skipping this test"); 817 return; 818 } 819 // Ticket #12663, crash with an extremely long string where 820 // U+0390 maps to 0399 0308 0301 so that the result is three times as long 821 // and overflows an int32_t. 822 int32_t length = 0x40000004; // more than 1G UChars 823 UnicodeString s(length, (UChar32)0x390, length); 824 UnicodeString result; 825 UChar *dest = result.getBuffer(length + 1); 826 if (s.isBogus() || dest == NULL) { 827 logln("Out of memory, unable to run this test on this machine."); 828 return; 829 } 830 IcuTestErrorCode errorCode(*this, "TestLongUpper"); 831 int32_t destLength = u_strToUpper(dest, result.getCapacity(), 832 s.getBuffer(), s.length(), "", errorCode); 833 result.releaseBuffer(destLength); 834 if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) { 835 errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)", 836 errorCode.errorName(), (long)destLength); 837 } 838 } 839 840 void StringCaseTest::TestMalformedUTF8() { 841 // ticket #12639 842 IcuTestErrorCode errorCode(*this, "TestMalformedUTF8"); 843 LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode)); 844 if (errorCode.isFailure()) { 845 errln("ucasemap_open(English) failed - %s", errorCode.errorName()); 846 return; 847 } 848 char src[1] = { (char)0x85 }; // malformed UTF-8 849 char dest[3] = { 0, 0, 0 }; 850 int32_t destLength; 851 #if !UCONFIG_NO_BREAK_ITERATION 852 destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode); 853 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { 854 errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x", 855 errorCode.errorName(), (int)destLength, dest[0]); 856 } 857 #endif 858 859 errorCode.reset(); 860 dest[0] = 0; 861 destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode); 862 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { 863 errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x", 864 errorCode.errorName(), (int)destLength, dest[0]); 865 } 866 867 errorCode.reset(); 868 dest[0] = 0; 869 destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode); 870 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { 871 errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x", 872 errorCode.errorName(), (int)destLength, dest[0]); 873 } 874 875 errorCode.reset(); 876 dest[0] = 0; 877 destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode); 878 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { 879 errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x", 880 errorCode.errorName(), (int)destLength, dest[0]); 881 } 882 } 883 884 void StringCaseTest::TestBufferOverflow() { 885 // Ticket #12849, incorrect result from Title Case preflight operation, 886 // when buffer overflow error is expected. 887 IcuTestErrorCode errorCode(*this, "TestBufferOverflow"); 888 LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode)); 889 if (errorCode.isFailure()) { 890 errln("ucasemap_open(English) failed - %s", errorCode.errorName()); 891 return; 892 } 893 894 UnicodeString data("hello world"); 895 int32_t result; 896 #if !UCONFIG_NO_BREAK_ITERATION 897 result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode); 898 if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) { 899 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: " 900 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)", 901 __FILE__, __LINE__, data.length(), errorCode.errorName(), result); 902 } 903 #endif 904 errorCode.reset(); 905 906 std::string data_utf8; 907 data.toUTF8String(data_utf8); 908 #if !UCONFIG_NO_BREAK_ITERATION 909 result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), data_utf8.length(), errorCode); 910 if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) { 911 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: " 912 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)", 913 __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result); 914 } 915 #endif 916 errorCode.reset(); 917 } 918 919 void StringCaseTest::TestEdits() { 920 IcuTestErrorCode errorCode(*this, "TestEdits"); 921 Edits edits; 922 assertFalse("new Edits hasChanges", edits.hasChanges()); 923 assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges()); 924 assertEquals("new Edits", 0, edits.lengthDelta()); 925 edits.addUnchanged(1); // multiple unchanged ranges are combined 926 edits.addUnchanged(10000); // too long, and they are split 927 edits.addReplace(0, 0); 928 edits.addUnchanged(2); 929 assertFalse("unchanged 10003 hasChanges", edits.hasChanges()); 930 assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges()); 931 assertEquals("unchanged 10003", 0, edits.lengthDelta()); 932 edits.addReplace(2, 1); // multiple short equal-lengths edits are compressed 933 edits.addUnchanged(0); 934 edits.addReplace(2, 1); 935 edits.addReplace(2, 1); 936 edits.addReplace(0, 10); 937 edits.addReplace(100, 0); 938 edits.addReplace(3000, 4000); // variable-length encoding 939 edits.addReplace(100000, 100000); 940 assertTrue("some edits hasChanges", edits.hasChanges()); 941 assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges()); 942 assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta()); 943 UErrorCode outErrorCode = U_ZERO_ERROR; 944 assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode)); 945 946 static const EditChange coarseExpectedChanges[] = { 947 { FALSE, 10003, 10003 }, 948 { TRUE, 103106, 104013 } 949 }; 950 TestUtility::checkEditsIter(*this, u"coarse", 951 edits.getCoarseIterator(), edits.getCoarseIterator(), 952 coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode); 953 TestUtility::checkEditsIter(*this, u"coarse changes", 954 edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(), 955 coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode); 956 957 static const EditChange fineExpectedChanges[] = { 958 { FALSE, 10003, 10003 }, 959 { TRUE, 2, 1 }, 960 { TRUE, 2, 1 }, 961 { TRUE, 2, 1 }, 962 { TRUE, 0, 10 }, 963 { TRUE, 100, 0 }, 964 { TRUE, 3000, 4000 }, 965 { TRUE, 100000, 100000 } 966 }; 967 TestUtility::checkEditsIter(*this, u"fine", 968 edits.getFineIterator(), edits.getFineIterator(), 969 fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode); 970 TestUtility::checkEditsIter(*this, u"fine changes", 971 edits.getFineChangesIterator(), edits.getFineChangesIterator(), 972 fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode); 973 974 edits.reset(); 975 assertFalse("reset hasChanges", edits.hasChanges()); 976 assertEquals("reset numberOfChanges", 0, edits.numberOfChanges()); 977 assertEquals("reset", 0, edits.lengthDelta()); 978 Edits::Iterator ei = edits.getCoarseChangesIterator(); 979 assertFalse("reset then iterator", ei.next(errorCode)); 980 } 981 982 void StringCaseTest::TestCopyMoveEdits() { 983 IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits"); 984 // Exceed the stack array capacity. 985 Edits a; 986 for (int32_t i = 0; i < 250; ++i) { 987 a.addReplace(i % 10, (i % 10) + 1); 988 } 989 assertEquals("a: many edits, length delta", 250, a.lengthDelta()); 990 991 // copy 992 Edits b(a); 993 assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta()); 994 assertEquals("a remains: many edits, length delta", 250, a.lengthDelta()); 995 TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode); 996 997 // assign 998 Edits c; 999 c.addUnchanged(99); 1000 c.addReplace(88, 77); 1001 c = b; 1002 assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta()); 1003 assertEquals("b remains: many edits, length delta", 250, b.lengthDelta()); 1004 TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode); 1005 1006 // std::move trouble on these platforms. 1007 // See https://ssl.icu-project.org/trac/ticket/13393 1008 #if !UPRV_INCOMPLETE_CPP11_SUPPORT && !(U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) 1009 // move constructor empties object with heap array 1010 Edits d(std::move(a)); 1011 assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta()); 1012 assertFalse("a moved away: no more hasChanges", a.hasChanges()); 1013 TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode); 1014 Edits empty; 1015 TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode); 1016 1017 // move assignment empties object with heap array 1018 Edits e; 1019 e.addReplace(0, 1000); 1020 e = std::move(b); 1021 assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta()); 1022 assertFalse("b moved away: no more hasChanges", b.hasChanges()); 1023 TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode); 1024 TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode); 1025 1026 // Edits::Iterator default constructor. 1027 Edits::Iterator iter; 1028 assertFalse("Edits::Iterator().next()", iter.next(errorCode)); 1029 assertSuccess("Edits::Iterator().next()", errorCode); 1030 iter = e.getFineChangesIterator(); 1031 assertTrue("iter.next()", iter.next(errorCode)); 1032 assertSuccess("iter.next()", errorCode); 1033 assertTrue("iter.hasChange()", iter.hasChange()); 1034 assertEquals("iter.newLength()", 1, iter.newLength()); 1035 #endif 1036 } 1037 1038 void StringCaseTest::TestEditsFindFwdBwd() { 1039 IcuTestErrorCode errorCode(*this, "TestEditsFindFwdBwd"); 1040 // Some users need index mappings to be efficient when they are out of order. 1041 // The most interesting failure case for this test is it taking a very long time. 1042 Edits e; 1043 constexpr int32_t N = 200000; 1044 for (int32_t i = 0; i < N; ++i) { 1045 e.addUnchanged(1); 1046 e.addReplace(3, 1); 1047 } 1048 Edits::Iterator iter = e.getFineIterator(); 1049 for (int32_t i = 0; i <= N; i += 2) { 1050 assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode)); 1051 assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode)); 1052 } 1053 for (int32_t i = N; i >= 0; i -= 2) { 1054 assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode)); 1055 assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode)); 1056 } 1057 } 1058 1059 void StringCaseTest::TestMergeEdits() { 1060 // For debugging, set -v to see matching edits up to a failure. 1061 IcuTestErrorCode errorCode(*this, "TestMergeEdits"); 1062 Edits ab, bc, ac, expected_ac; 1063 1064 // Simple: Two parallel non-changes. 1065 ab.addUnchanged(2); 1066 bc.addUnchanged(2); 1067 expected_ac.addUnchanged(2); 1068 1069 // Simple: Two aligned changes. 1070 ab.addReplace(3, 2); 1071 bc.addReplace(2, 1); 1072 expected_ac.addReplace(3, 1); 1073 1074 // Unequal non-changes. 1075 ab.addUnchanged(5); 1076 bc.addUnchanged(3); 1077 expected_ac.addUnchanged(3); 1078 // ab ahead by 2 1079 1080 // Overlapping changes accumulate until they share a boundary. 1081 ab.addReplace(4, 3); 1082 bc.addReplace(3, 2); 1083 ab.addReplace(4, 3); 1084 bc.addReplace(3, 2); 1085 ab.addReplace(4, 3); 1086 bc.addReplace(3, 2); 1087 bc.addUnchanged(4); 1088 expected_ac.addReplace(14, 8); 1089 // bc ahead by 2 1090 1091 // Balance out intermediate-string lengths. 1092 ab.addUnchanged(2); 1093 expected_ac.addUnchanged(2); 1094 1095 // Insert something and delete it: Should disappear. 1096 ab.addReplace(0, 5); 1097 ab.addReplace(0, 2); 1098 bc.addReplace(7, 0); 1099 1100 // Parallel change to make a new boundary. 1101 ab.addReplace(1, 2); 1102 bc.addReplace(2, 3); 1103 expected_ac.addReplace(1, 3); 1104 1105 // Multiple ab deletions should remain separate at the boundary. 1106 ab.addReplace(1, 0); 1107 ab.addReplace(2, 0); 1108 ab.addReplace(3, 0); 1109 expected_ac.addReplace(1, 0); 1110 expected_ac.addReplace(2, 0); 1111 expected_ac.addReplace(3, 0); 1112 1113 // Unequal non-changes can be split for another boundary. 1114 ab.addUnchanged(2); 1115 bc.addUnchanged(1); 1116 expected_ac.addUnchanged(1); 1117 // ab ahead by 1 1118 1119 // Multiple bc insertions should create a boundary and remain separate. 1120 bc.addReplace(0, 4); 1121 bc.addReplace(0, 5); 1122 bc.addReplace(0, 6); 1123 expected_ac.addReplace(0, 4); 1124 expected_ac.addReplace(0, 5); 1125 expected_ac.addReplace(0, 6); 1126 // ab ahead by 1 1127 1128 // Multiple ab deletions in the middle of a bc change are merged. 1129 bc.addReplace(2, 2); 1130 // bc ahead by 1 1131 ab.addReplace(1, 0); 1132 ab.addReplace(2, 0); 1133 ab.addReplace(3, 0); 1134 ab.addReplace(4, 1); 1135 expected_ac.addReplace(11, 2); 1136 1137 // Multiple bc insertions in the middle of an ab change are merged. 1138 ab.addReplace(5, 6); 1139 bc.addReplace(3, 3); 1140 // ab ahead by 3 1141 bc.addReplace(0, 4); 1142 bc.addReplace(0, 5); 1143 bc.addReplace(0, 6); 1144 bc.addReplace(3, 7); 1145 expected_ac.addReplace(5, 25); 1146 1147 // Delete around a deletion. 1148 ab.addReplace(4, 4); 1149 ab.addReplace(3, 0); 1150 ab.addUnchanged(2); 1151 bc.addReplace(2, 2); 1152 bc.addReplace(4, 0); 1153 expected_ac.addReplace(9, 2); 1154 1155 // Insert into an insertion. 1156 ab.addReplace(0, 2); 1157 bc.addReplace(1, 1); 1158 bc.addReplace(0, 8); 1159 bc.addUnchanged(4); 1160 expected_ac.addReplace(0, 10); 1161 // bc ahead by 3 1162 1163 // Balance out intermediate-string lengths. 1164 ab.addUnchanged(3); 1165 expected_ac.addUnchanged(3); 1166 1167 // Deletions meet insertions. 1168 // Output order is arbitrary in principle, but we expect insertions first 1169 // and want to keep it that way. 1170 ab.addReplace(2, 0); 1171 ab.addReplace(4, 0); 1172 ab.addReplace(6, 0); 1173 bc.addReplace(0, 1); 1174 bc.addReplace(0, 3); 1175 bc.addReplace(0, 5); 1176 expected_ac.addReplace(0, 1); 1177 expected_ac.addReplace(0, 3); 1178 expected_ac.addReplace(0, 5); 1179 expected_ac.addReplace(2, 0); 1180 expected_ac.addReplace(4, 0); 1181 expected_ac.addReplace(6, 0); 1182 1183 // End with a non-change, so that further edits are never reordered. 1184 ab.addUnchanged(1); 1185 bc.addUnchanged(1); 1186 expected_ac.addUnchanged(1); 1187 1188 ac.mergeAndAppend(ab, bc, errorCode); 1189 assertSuccess("ab+bc", errorCode); 1190 if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) { 1191 return; 1192 } 1193 1194 // Append more Edits. 1195 Edits ab2, bc2; 1196 ab2.addUnchanged(5); 1197 bc2.addReplace(1, 2); 1198 bc2.addUnchanged(4); 1199 expected_ac.addReplace(1, 2); 1200 expected_ac.addUnchanged(4); 1201 ac.mergeAndAppend(ab2, bc2, errorCode); 1202 assertSuccess("ab2+bc2", errorCode); 1203 if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) { 1204 return; 1205 } 1206 1207 // Append empty edits. 1208 Edits empty; 1209 ac.mergeAndAppend(empty, empty, errorCode); 1210 assertSuccess("empty+empty", errorCode); 1211 if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) { 1212 return; 1213 } 1214 1215 // Error: Append more edits with mismatched intermediate-string lengths. 1216 Edits mismatch; 1217 mismatch.addReplace(1, 1); 1218 ac.mergeAndAppend(ab2, mismatch, errorCode); 1219 assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get()); 1220 errorCode.reset(); 1221 ac.mergeAndAppend(mismatch, bc2, errorCode); 1222 assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get()); 1223 errorCode.reset(); 1224 } 1225 1226 void StringCaseTest::TestCaseMapWithEdits() { 1227 IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits"); 1228 UChar dest[20]; 1229 Edits edits; 1230 1231 int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT, 1232 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1233 assertEquals(u"toLower(IstanBul)", UnicodeString(u"b"), UnicodeString(TRUE, dest, length)); 1234 static const EditChange lowerExpectedChanges[] = { 1235 { TRUE, 1, 1 }, 1236 { FALSE, 4, 4 }, 1237 { TRUE, 1, 1 }, 1238 { FALSE, 2, 2 } 1239 }; 1240 TestUtility::checkEditsIter(*this, u"toLower(IstanBul)", 1241 edits.getFineIterator(), edits.getFineIterator(), 1242 lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges), 1243 TRUE, errorCode); 1244 1245 edits.reset(); 1246 length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT, 1247 u"", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1248 assertEquals(u"toUpper()", UnicodeString(u""), UnicodeString(TRUE, dest, length)); 1249 static const EditChange upperExpectedChanges[] = { 1250 { FALSE, 1, 1 }, 1251 { TRUE, 1, 1 }, 1252 { TRUE, 1, 1 }, 1253 { TRUE, 1, 1 }, 1254 { TRUE, 1, 1 }, 1255 { TRUE, 1, 1 } 1256 }; 1257 TestUtility::checkEditsIter(*this, u"toUpper()", 1258 edits.getFineIterator(), edits.getFineIterator(), 1259 upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges), 1260 TRUE, errorCode); 1261 1262 edits.reset(); 1263 1264 #if !UCONFIG_NO_BREAK_ITERATION 1265 length = CaseMap::toTitle("nl", 1266 U_OMIT_UNCHANGED_TEXT | 1267 U_TITLECASE_NO_BREAK_ADJUSTMENT | 1268 U_TITLECASE_NO_LOWERCASE, 1269 nullptr, u"IjssEL IglOo", 12, 1270 dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1271 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length)); 1272 static const EditChange titleExpectedChanges[] = { 1273 { FALSE, 1, 1 }, 1274 { TRUE, 1, 1 }, 1275 { FALSE, 10, 10 } 1276 }; 1277 TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)", 1278 edits.getFineIterator(), edits.getFineIterator(), 1279 titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges), 1280 TRUE, errorCode); 1281 #endif 1282 1283 // No explicit nor automatic edits.reset(). Edits should be appended. 1284 length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I, 1285 u"ItanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1286 assertEquals(u"foldCase(ItanBul)", UnicodeString(u"ssb"), UnicodeString(TRUE, dest, length)); 1287 static const EditChange foldExpectedChanges[] = { 1288 #if !UCONFIG_NO_BREAK_ITERATION 1289 // From titlecasing. 1290 { FALSE, 1, 1 }, 1291 { TRUE, 1, 1 }, 1292 { FALSE, 10, 10 }, 1293 #endif 1294 // From case folding. 1295 { TRUE, 1, 1 }, 1296 { TRUE, 1, 2 }, 1297 { FALSE, 3, 3 }, 1298 { TRUE, 1, 1 }, 1299 { FALSE, 2, 2 } 1300 }; 1301 TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, ItanBul)", 1302 edits.getFineIterator(), edits.getFineIterator(), 1303 foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges), 1304 TRUE, errorCode); 1305 } 1306 1307 void StringCaseTest::TestCaseMapUTF8WithEdits() { 1308 IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits"); 1309 char dest[50]; 1310 Edits edits; 1311 1312 int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, 1313 u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1314 assertEquals(u"toLower(IstanBul)", UnicodeString(u"b"), 1315 UnicodeString::fromUTF8(StringPiece(dest, length))); 1316 static const EditChange lowerExpectedChanges[] = { 1317 { TRUE, 1, 2 }, 1318 { FALSE, 4, 4 }, 1319 { TRUE, 1, 1 }, 1320 { FALSE, 2, 2 } 1321 }; 1322 TestUtility::checkEditsIter(*this, u"toLower(IstanBul)", 1323 edits.getFineIterator(), edits.getFineIterator(), 1324 lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges), 1325 TRUE, errorCode); 1326 1327 edits.reset(); 1328 length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, 1329 u8"", 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1330 assertEquals(u"toUpper()", UnicodeString(u""), 1331 UnicodeString::fromUTF8(StringPiece(dest, length))); 1332 static const EditChange upperExpectedChanges[] = { 1333 { FALSE, 2, 2 }, 1334 { TRUE, 2, 2 }, 1335 { TRUE, 2, 2 }, 1336 { TRUE, 2, 2 }, 1337 { TRUE, 2, 2 }, 1338 { TRUE, 2, 2 } 1339 }; 1340 TestUtility::checkEditsIter(*this, u"toUpper()", 1341 edits.getFineIterator(), edits.getFineIterator(), 1342 upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges), 1343 TRUE, errorCode); 1344 1345 edits.reset(); 1346 #if !UCONFIG_NO_BREAK_ITERATION 1347 length = CaseMap::utf8ToTitle("nl", 1348 U_OMIT_UNCHANGED_TEXT | 1349 U_TITLECASE_NO_BREAK_ADJUSTMENT | 1350 U_TITLECASE_NO_LOWERCASE, 1351 nullptr, u8"IjssEL IglOo", 12, 1352 dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1353 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), 1354 UnicodeString::fromUTF8(StringPiece(dest, length))); 1355 static const EditChange titleExpectedChanges[] = { 1356 { FALSE, 1, 1 }, 1357 { TRUE, 1, 1 }, 1358 { FALSE, 10, 10 } 1359 }; 1360 TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)", 1361 edits.getFineIterator(), edits.getFineIterator(), 1362 titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges), 1363 TRUE, errorCode); 1364 #endif 1365 1366 // No explicit nor automatic edits.reset(). Edits should be appended. 1367 length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | 1368 U_FOLD_CASE_EXCLUDE_SPECIAL_I, 1369 u8"ItanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1370 assertEquals(u"foldCase(ItanBul)", UnicodeString(u"ssb"), 1371 UnicodeString::fromUTF8(StringPiece(dest, length))); 1372 static const EditChange foldExpectedChanges[] = { 1373 #if !UCONFIG_NO_BREAK_ITERATION 1374 // From titlecasing. 1375 { FALSE, 1, 1 }, 1376 { TRUE, 1, 1 }, 1377 { FALSE, 10, 10 }, 1378 #endif 1379 // From case folding. 1380 { TRUE, 1, 2 }, 1381 { TRUE, 2, 2 }, 1382 { FALSE, 3, 3 }, 1383 { TRUE, 1, 1 }, 1384 { FALSE, 2, 2 } 1385 }; 1386 TestUtility::checkEditsIter(*this, u"foldCase(ItanBul)", 1387 edits.getFineIterator(), edits.getFineIterator(), 1388 foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges), 1389 TRUE, errorCode); 1390 } 1391 1392 void StringCaseTest::TestCaseMapToString() { 1393 // This test function name is parallel with one in UCharacterCaseTest.java. 1394 // It is a bit of a misnomer until we have CaseMap API that writes to 1395 // a UnicodeString, at which point we should change this code here. 1396 IcuTestErrorCode errorCode(*this, "TestCaseMapToString"); 1397 UChar dest[20]; 1398 1399 // Omit unchanged text. 1400 int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT, 1401 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1402 assertEquals(u"toLower(IstanBul)", 1403 UnicodeString(u"b"), UnicodeString(TRUE, dest, length)); 1404 length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT, 1405 u"", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1406 assertEquals(u"toUpper()", 1407 UnicodeString(u""), UnicodeString(TRUE, dest, length)); 1408 #if !UCONFIG_NO_BREAK_ITERATION 1409 length = CaseMap::toTitle("nl", 1410 U_OMIT_UNCHANGED_TEXT | 1411 U_TITLECASE_NO_BREAK_ADJUSTMENT | 1412 U_TITLECASE_NO_LOWERCASE, 1413 nullptr, u"IjssEL IglOo", 12, 1414 dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1415 assertEquals(u"toTitle(IjssEL IglOo)", 1416 UnicodeString(u"J"), UnicodeString(TRUE, dest, length)); 1417 #endif 1418 length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, 1419 u"ItanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1420 assertEquals(u"foldCase(ItanBul)", 1421 UnicodeString(u"ssb"), UnicodeString(TRUE, dest, length)); 1422 1423 // Return the whole result string. 1424 length = CaseMap::toLower("tr", 0, 1425 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1426 assertEquals(u"toLower(IstanBul)", 1427 UnicodeString(u"stanbul"), UnicodeString(TRUE, dest, length)); 1428 length = CaseMap::toUpper("el", 0, 1429 u"", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1430 assertEquals(u"toUpper()", 1431 UnicodeString(u""), UnicodeString(TRUE, dest, length)); 1432 #if !UCONFIG_NO_BREAK_ITERATION 1433 length = CaseMap::toTitle("nl", 1434 U_TITLECASE_NO_BREAK_ADJUSTMENT | 1435 U_TITLECASE_NO_LOWERCASE, 1436 nullptr, u"IjssEL IglOo", 12, 1437 dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1438 assertEquals(u"toTitle(IjssEL IglOo)", 1439 UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length)); 1440 #endif 1441 length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, 1442 u"ItanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1443 assertEquals(u"foldCase(ItanBul)", 1444 UnicodeString(u"sstanbul"), UnicodeString(TRUE, dest, length)); 1445 } 1446 1447 void StringCaseTest::TestCaseMapUTF8ToString() { 1448 IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString"); 1449 std::string dest; 1450 StringByteSink<std::string> sink(&dest); 1451 1452 // Omit unchanged text. 1453 CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode); 1454 assertEquals(u"toLower(IstanBul)", UnicodeString(u"b"), UnicodeString::fromUTF8(dest)); 1455 dest.clear(); 1456 CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"", sink, nullptr, errorCode); 1457 assertEquals(u"toUpper()", UnicodeString(u""), 1458 UnicodeString::fromUTF8(dest)); 1459 #if !UCONFIG_NO_BREAK_ITERATION 1460 dest.clear(); 1461 CaseMap::utf8ToTitle( 1462 "nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE, 1463 nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode); 1464 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), 1465 UnicodeString::fromUTF8(dest)); 1466 #endif 1467 dest.clear(); 1468 CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, 1469 u8"ItanBul", sink, nullptr, errorCode); 1470 assertEquals(u"foldCase(ItanBul)", UnicodeString(u"ssb"), 1471 UnicodeString::fromUTF8(dest)); 1472 1473 // Return the whole result string. 1474 dest.clear(); 1475 CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode); 1476 assertEquals(u"toLower(IstanBul)", UnicodeString(u"stanbul"), 1477 UnicodeString::fromUTF8(dest)); 1478 dest.clear(); 1479 CaseMap::utf8ToUpper("el", 0, u8"", sink, nullptr, errorCode); 1480 assertEquals(u"toUpper()", UnicodeString(u""), 1481 UnicodeString::fromUTF8(dest)); 1482 #if !UCONFIG_NO_BREAK_ITERATION 1483 dest.clear(); 1484 CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE, 1485 nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode); 1486 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"), 1487 UnicodeString::fromUTF8(dest)); 1488 #endif 1489 dest.clear(); 1490 CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"ItanBul", sink, nullptr, errorCode); 1491 assertEquals(u"foldCase(ItanBul)", UnicodeString(u"sstanbul"), 1492 UnicodeString::fromUTF8(dest)); 1493 } 1494 1495 void StringCaseTest::TestLongUnicodeString() { 1496 // Code coverage for UnicodeString case mapping code handling 1497 // long strings or many changes in a string. 1498 UnicodeString s(TRUE, 1499 (const UChar *) 1500 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" 1501 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" 1502 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" 1503 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" 1504 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" 1505 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51); 1506 UnicodeString expected(TRUE, 1507 (const UChar *) 1508 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" 1509 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" 1510 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" 1511 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" 1512 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" 1513 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51); 1514 s.toUpper(Locale::getRoot()); 1515 assertEquals("string length 306", expected, s); 1516 } 1517 1518 #if !UCONFIG_NO_BREAK_ITERATION 1519 void StringCaseTest::TestBug13127() { 1520 // Test case crashed when the bug was present. 1521 const char16_t *s16 = u""; 1522 UnicodeString s(TRUE, s16, -1); 1523 s.toTitle(0, Locale::getEnglish()); 1524 } 1525 1526 void StringCaseTest::TestInPlaceTitle() { 1527 // Similar to TestBug13127. u_strToTitle() can modify the buffer in-place. 1528 IcuTestErrorCode errorCode(*this, "TestInPlaceTitle"); 1529 char16_t s[32] = u" abcdef"; 1530 const char16_t *expected = u"Ss Ss Ss Abcdef"; 1531 int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode); 1532 assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length); 1533 assertEquals("u_strToTitle(in-place)", expected, s); 1534 } 1535 #endif 1536