1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2002-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: strcase.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2002mar12 16 * created by: Markus W. Scherer 17 * 18 * Test file for string casing C++ API functions. 19 */ 20 21 #include "unicode/std_string.h" 22 #include "unicode/brkiter.h" 23 #include "unicode/casemap.h" 24 #include "unicode/edits.h" 25 #include "unicode/uchar.h" 26 #include "unicode/ures.h" 27 #include "unicode/uloc.h" 28 #include "unicode/locid.h" 29 #include "unicode/ubrk.h" 30 #include "unicode/unistr.h" 31 #include "unicode/ucasemap.h" 32 #include "unicode/ustring.h" 33 #include "ucase.h" 34 #include "ustrtest.h" 35 #include "unicode/tstdtmod.h" 36 #include "cmemory.h" 37 #include "testutil.h" 38 39 class StringCaseTest: public IntlTest { 40 public: 41 StringCaseTest(); 42 virtual ~StringCaseTest(); 43 44 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0); 45 46 void TestCaseConversion(); 47 48 void TestCasingImpl(const UnicodeString &input, 49 const UnicodeString &output, 50 int32_t whichCase, 51 void *iter, const char *localeID, uint32_t options); 52 void TestCasing(); 53 void TestTitleOptions(); 54 void TestFullCaseFoldingIterator(); 55 void TestGreekUpper(); 56 void TestLongUpper(); 57 void TestMalformedUTF8(); 58 void TestBufferOverflow(); 59 void TestEdits(); 60 void TestCopyMoveEdits(); 61 void TestEditsFindFwdBwd(); 62 void TestMergeEdits(); 63 void TestCaseMapWithEdits(); 64 void TestCaseMapUTF8WithEdits(); 65 void TestCaseMapToString(); 66 void TestCaseMapUTF8ToString(); 67 void TestLongUnicodeString(); 68 void TestBug13127(); 69 void TestInPlaceTitle(); 70 void TestCaseMapEditsIteratorDocs(); 71 void TestCaseMapGreekExtended(); 72 73 private: 74 void assertGreekUpper(const char16_t *s, const char16_t *expected); 75 76 Locale GREEK_LOCALE_; 77 }; 78 79 StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {} 80 81 StringCaseTest::~StringCaseTest() {} 82 83 extern IntlTest *createStringCaseTest() { 84 return new StringCaseTest(); 85 } 86 87 void 88 StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 89 if(exec) { 90 logln("TestSuite StringCaseTest: "); 91 } 92 TESTCASE_AUTO_BEGIN; 93 TESTCASE_AUTO(TestCaseConversion); 94 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 95 TESTCASE_AUTO(TestCasing); 96 TESTCASE_AUTO(TestTitleOptions); 97 #endif 98 TESTCASE_AUTO(TestFullCaseFoldingIterator); 99 TESTCASE_AUTO(TestGreekUpper); 100 TESTCASE_AUTO(TestLongUpper); 101 TESTCASE_AUTO(TestMalformedUTF8); 102 TESTCASE_AUTO(TestBufferOverflow); 103 TESTCASE_AUTO(TestEdits); 104 TESTCASE_AUTO(TestCopyMoveEdits); 105 TESTCASE_AUTO(TestEditsFindFwdBwd); 106 TESTCASE_AUTO(TestMergeEdits); 107 TESTCASE_AUTO(TestCaseMapWithEdits); 108 TESTCASE_AUTO(TestCaseMapUTF8WithEdits); 109 TESTCASE_AUTO(TestCaseMapToString); 110 TESTCASE_AUTO(TestCaseMapUTF8ToString); 111 TESTCASE_AUTO(TestLongUnicodeString); 112 #if !UCONFIG_NO_BREAK_ITERATION 113 TESTCASE_AUTO(TestBug13127); 114 TESTCASE_AUTO(TestInPlaceTitle); 115 #endif 116 TESTCASE_AUTO(TestCaseMapEditsIteratorDocs); 117 TESTCASE_AUTO(TestCaseMapGreekExtended); 118 TESTCASE_AUTO_END; 119 } 120 121 void 122 StringCaseTest::TestCaseConversion() 123 { 124 static const UChar uppercaseGreek[] = 125 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4, 126 0x39f, 0x3a3, 0 }; 127 // "IESUS CHRISTOS" 128 129 static const UChar lowercaseGreek[] = 130 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4, 131 0x3bf, 0x3c2, 0 }; 132 // "iesus christos" 133 134 static const UChar lowercaseTurkish[] = 135 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f, 136 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 }; 137 138 static const UChar uppercaseTurkish[] = 139 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20, 140 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 }; 141 142 UnicodeString expectedResult; 143 UnicodeString test3; 144 145 test3 += (UChar32)0x0130; 146 test3 += "STANBUL, NOT CONSTANTINOPLE!"; 147 148 UnicodeString test4(test3); 149 test4.toLower(Locale("")); 150 expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape(); 151 if (test4 != expectedResult) 152 errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 153 154 test4 = test3; 155 test4.toLower(Locale("tr", "TR")); 156 expectedResult = lowercaseTurkish; 157 if (test4 != expectedResult) 158 errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 159 160 test3 = "topkap"; 161 test3 += (UChar32)0x0131; 162 test3 += " palace, istanbul"; 163 test4 = test3; 164 165 test4.toUpper(Locale("")); 166 expectedResult = "TOPKAPI PALACE, ISTANBUL"; 167 if (test4 != expectedResult) 168 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 169 170 test4 = test3; 171 test4.toUpper(Locale("tr", "TR")); 172 expectedResult = uppercaseTurkish; 173 if (test4 != expectedResult) 174 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 175 176 test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe"); 177 178 test3.toUpper(Locale("de", "DE")); 179 expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE"); 180 if (test3 != expectedResult) 181 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\"."); 182 183 test4.replace(0, test4.length(), uppercaseGreek); 184 185 test4.toLower(Locale("el", "GR")); 186 expectedResult = lowercaseGreek; 187 if (test4 != expectedResult) 188 errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 189 190 test4.replace(0, test4.length(), lowercaseGreek); 191 192 test4.toUpper(); 193 expectedResult = uppercaseGreek; 194 if (test4 != expectedResult) 195 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); 196 197 // more string case mapping tests with the new implementation 198 { 199 static const UChar 200 201 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff }, 202 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, 203 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, 204 205 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff }, 206 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, 207 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, 208 209 beforeMiniUpper[]= { 0xdf, 0x61 }, 210 miniUpper[]= { 0x53, 0x53, 0x41 }; 211 212 UnicodeString s; 213 214 /* lowercase with root locale */ 215 s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower)); 216 s.toLower(""); 217 if( s.length()!=UPRV_LENGTHOF(lowerRoot) || 218 s!=UnicodeString(FALSE, lowerRoot, s.length()) 219 ) { 220 errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\""); 221 } 222 223 /* lowercase with turkish locale */ 224 s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower)); 225 s.setCharAt(0, beforeLower[0]).toLower(Locale("tr")); 226 if( s.length()!=UPRV_LENGTHOF(lowerTurkish) || 227 s!=UnicodeString(FALSE, lowerTurkish, s.length()) 228 ) { 229 errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\""); 230 } 231 232 /* uppercase with root locale */ 233 s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper)); 234 s.setCharAt(0, beforeUpper[0]).toUpper(Locale("")); 235 if( s.length()!=UPRV_LENGTHOF(upperRoot) || 236 s!=UnicodeString(FALSE, upperRoot, s.length()) 237 ) { 238 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\""); 239 } 240 241 /* uppercase with turkish locale */ 242 s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper)); 243 s.toUpper(Locale("tr")); 244 if( s.length()!=UPRV_LENGTHOF(upperTurkish) || 245 s!=UnicodeString(FALSE, upperTurkish, s.length()) 246 ) { 247 errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\""); 248 } 249 250 /* uppercase a short string with root locale */ 251 s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper)); 252 s.setCharAt(0, beforeMiniUpper[0]).toUpper(""); 253 if( s.length()!=UPRV_LENGTHOF(miniUpper) || 254 s!=UnicodeString(FALSE, miniUpper, s.length()) 255 ) { 256 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\""); 257 } 258 } 259 260 // test some supplementary characters (>= Unicode 3.1) 261 { 262 UnicodeString t; 263 264 UnicodeString 265 deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(), 266 deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(), 267 deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape(); 268 (t=deseretInput).toLower(); 269 if(t!=deseretLower) { 270 errln("error lowercasing Deseret (plane 1) characters"); 271 } 272 (t=deseretInput).toUpper(); 273 if(t!=deseretUpper) { 274 errln("error uppercasing Deseret (plane 1) characters"); 275 } 276 } 277 278 // test some more cases that looked like problems 279 { 280 UnicodeString t; 281 282 UnicodeString 283 ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(), 284 ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(), 285 ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape(); 286 (t=ljInput).toLower("en"); 287 if(t!=ljLower) { 288 errln("error lowercasing LJ characters"); 289 } 290 (t=ljInput).toUpper("en"); 291 if(t!=ljUpper) { 292 errln("error uppercasing LJ characters"); 293 } 294 } 295 296 #if !UCONFIG_NO_NORMALIZATION 297 // some context-sensitive casing depends on normalization data being present 298 299 // Unicode 3.1.1 SpecialCasing tests 300 { 301 UnicodeString t; 302 303 // sigmas preceded and/or followed by cased letters 304 UnicodeString 305 sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(), 306 sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(), 307 sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(); 308 309 (t=sigmas).toLower(); 310 if(t!=sigmasLower) { 311 errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\""); 312 } 313 314 (t=sigmas).toUpper(Locale("")); 315 if(t!=sigmasUpper) { 316 errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\""); 317 } 318 319 // turkish & azerbaijani dotless i & dotted I 320 // remove dot above if there was a capital I before and there are no more accents above 321 UnicodeString 322 dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(), 323 dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(), 324 dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape(); 325 326 (t=dots).toLower("tr"); 327 if(t!=dotsTurkish) { 328 errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\""); 329 } 330 331 (t=dots).toLower("de"); 332 if(t!=dotsDefault) { 333 errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); 334 } 335 } 336 337 // more Unicode 3.1.1 tests 338 { 339 UnicodeString t; 340 341 // lithuanian dot above in uppercasing 342 UnicodeString 343 dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(), 344 dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(), 345 dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape(); 346 347 (t=dots).toUpper("lt"); 348 if(t!=dotsLithuanian) { 349 errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\""); 350 } 351 352 (t=dots).toUpper("de"); 353 if(t!=dotsDefault) { 354 errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); 355 } 356 357 // lithuanian adds dot above to i in lowercasing if there are more above accents 358 UnicodeString 359 i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(), 360 iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(), 361 iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape(); 362 363 (t=i).toLower("lt"); 364 if(t!=iLithuanian) { 365 errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\""); 366 } 367 368 (t=i).toLower("de"); 369 if(t!=iDefault) { 370 errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\""); 371 } 372 } 373 374 #endif 375 376 // test case folding 377 { 378 UnicodeString 379 s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(), 380 f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(), 381 g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(), 382 t; 383 384 (t=s).foldCase(); 385 if(f!=t) { 386 errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\""); 387 } 388 389 // alternate handling for dotted I/dotless i (U+0130, U+0131) 390 (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I); 391 if(g!=t) { 392 errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\""); 393 } 394 } 395 } 396 397 // data-driven case mapping tests ------------------------------------------ *** 398 399 enum { 400 TEST_LOWER, 401 TEST_UPPER, 402 TEST_TITLE, 403 TEST_FOLD, 404 TEST_COUNT 405 }; 406 407 // names of TestData children in casing.txt 408 static const char *const dataNames[TEST_COUNT+1]={ 409 "lowercasing", 410 "uppercasing", 411 "titlecasing", 412 "casefolding", 413 "" 414 }; 415 416 void 417 StringCaseTest::TestCasingImpl(const UnicodeString &input, 418 const UnicodeString &output, 419 int32_t whichCase, 420 void *iter, const char *localeID, uint32_t options) { 421 // UnicodeString 422 UnicodeString result; 423 const char *name; 424 Locale locale(localeID); 425 426 result=input; 427 switch(whichCase) { 428 case TEST_LOWER: 429 name="toLower"; 430 result.toLower(locale); 431 break; 432 case TEST_UPPER: 433 name="toUpper"; 434 result.toUpper(locale); 435 break; 436 #if !UCONFIG_NO_BREAK_ITERATION 437 case TEST_TITLE: 438 name="toTitle"; 439 result.toTitle((BreakIterator *)iter, locale, options); 440 break; 441 #endif 442 case TEST_FOLD: 443 name="foldCase"; 444 result.foldCase(options); 445 break; 446 default: 447 name=""; 448 break; // won't happen 449 } 450 if(result!=output) { 451 dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); 452 } 453 #if !UCONFIG_NO_BREAK_ITERATION 454 if(whichCase==TEST_TITLE && options==0) { 455 result=input; 456 result.toTitle((BreakIterator *)iter, locale); 457 if(result!=output) { 458 dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); 459 } 460 } 461 #endif 462 463 // UTF-8 464 char utf8In[100], utf8Out[100]; 465 int32_t utf8InLength, utf8OutLength, resultLength; 466 UChar *buffer; 467 468 IcuTestErrorCode errorCode(*this, "TestCasingImpl"); 469 LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode)); 470 #if !UCONFIG_NO_BREAK_ITERATION 471 if(iter!=NULL) { 472 // Clone the break iterator so that the UCaseMap can safely adopt it. 473 UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode); 474 ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode); 475 } 476 #endif 477 478 u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode); 479 switch(whichCase) { 480 case TEST_LOWER: 481 name="ucasemap_utf8ToLower"; 482 utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), 483 utf8Out, (int32_t)sizeof(utf8Out), 484 utf8In, utf8InLength, errorCode); 485 break; 486 case TEST_UPPER: 487 name="ucasemap_utf8ToUpper"; 488 utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(), 489 utf8Out, (int32_t)sizeof(utf8Out), 490 utf8In, utf8InLength, errorCode); 491 break; 492 #if !UCONFIG_NO_BREAK_ITERATION 493 case TEST_TITLE: 494 name="ucasemap_utf8ToTitle"; 495 utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(), 496 utf8Out, (int32_t)sizeof(utf8Out), 497 utf8In, utf8InLength, errorCode); 498 break; 499 #endif 500 case TEST_FOLD: 501 name="ucasemap_utf8FoldCase"; 502 utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(), 503 utf8Out, (int32_t)sizeof(utf8Out), 504 utf8In, utf8InLength, errorCode); 505 break; 506 default: 507 name=""; 508 utf8OutLength=0; 509 break; // won't happen 510 } 511 buffer=result.getBuffer(utf8OutLength); 512 u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode); 513 result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0); 514 515 if(errorCode.isFailure()) { 516 errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); 517 errorCode.reset(); 518 } else if(result!=output) { 519 errln("error: %s() got a wrong result for a test case from casing.res", name); 520 errln("expected \"" + output + "\" got \"" + result + "\"" ); 521 } 522 } 523 524 void 525 StringCaseTest::TestCasing() { 526 UErrorCode status = U_ZERO_ERROR; 527 #if !UCONFIG_NO_BREAK_ITERATION 528 LocalUBreakIteratorPointer iter; 529 #endif 530 char cLocaleID[100]; 531 UnicodeString locale, input, output, optionsString, result; 532 uint32_t options; 533 int32_t whichCase, type; 534 LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status)); 535 if(U_SUCCESS(status)) { 536 for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) { 537 #if UCONFIG_NO_BREAK_ITERATION 538 if(whichCase==TEST_TITLE) { 539 continue; 540 } 541 #endif 542 LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status)); 543 if(U_FAILURE(status)) { 544 errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status)); 545 break; 546 } 547 const DataMap *myCase = NULL; 548 while(casingTest->nextCase(myCase, status)) { 549 input = myCase->getString("Input", status); 550 output = myCase->getString("Output", status); 551 552 if(whichCase!=TEST_FOLD) { 553 locale = myCase->getString("Locale", status); 554 } 555 locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), ""); 556 557 #if !UCONFIG_NO_BREAK_ITERATION 558 if(whichCase==TEST_TITLE) { 559 type = myCase->getInt("Type", status); 560 if(type>=0) { 561 iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status)); 562 } else if(type==-2) { 563 // Open a trivial break iterator that only delivers { 0, length } 564 // or even just { 0 } as boundaries. 565 static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;" 566 UParseError parseError; 567 iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status)); 568 } 569 } 570 #endif 571 options = 0; 572 if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) { 573 optionsString = myCase->getString("Options", status); 574 if(optionsString.indexOf((UChar)0x54)>=0) { // T 575 options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I; 576 } 577 if(optionsString.indexOf((UChar)0x4c)>=0) { // L 578 options|=U_TITLECASE_NO_LOWERCASE; 579 } 580 if(optionsString.indexOf((UChar)0x41)>=0) { // A 581 options|=U_TITLECASE_NO_BREAK_ADJUSTMENT; 582 } 583 } 584 585 if(U_FAILURE(status)) { 586 dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status)); 587 status = U_ZERO_ERROR; 588 } else { 589 #if UCONFIG_NO_BREAK_ITERATION 590 LocalPointer<UMemory> iter; 591 #endif 592 TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options); 593 } 594 595 #if !UCONFIG_NO_BREAK_ITERATION 596 iter.adoptInstead(NULL); 597 #endif 598 } 599 } 600 } 601 602 #if !UCONFIG_NO_BREAK_ITERATION 603 // more tests for API coverage 604 status=U_ZERO_ERROR; 605 input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape(); 606 (result=input).toTitle(NULL); 607 if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) { 608 dataerrln("UnicodeString::toTitle(NULL) failed."); 609 } 610 #endif 611 } 612 613 void 614 StringCaseTest::TestTitleOptions() { 615 // New options in ICU 60. 616 TestCasingImpl(u"cAt! eTc.", u"Cat! etc.", TEST_TITLE, 617 nullptr, "", U_TITLECASE_WHOLE_STRING); 618 TestCasingImpl(u"a CaT. A dOg! eTc.", u"A CaT. A dOg! ETc.", TEST_TITLE, 619 nullptr, "", U_TITLECASE_SENTENCES|U_TITLECASE_NO_LOWERCASE); 620 TestCasingImpl(u"49eRs", u"49ers", TEST_TITLE, 621 nullptr, "", U_TITLECASE_WHOLE_STRING); 622 TestCasingImpl(u"(aBc)", u"(abc)", TEST_TITLE, 623 nullptr, "", U_TITLECASE_WHOLE_STRING); 624 TestCasingImpl(u"49eRs", u"49Ers", TEST_TITLE, 625 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED); 626 TestCasingImpl(u"(aBc)", u"(Abc)", TEST_TITLE, 627 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED); 628 TestCasingImpl(u" john. Smith", u" John. Smith", TEST_TITLE, 629 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_LOWERCASE); 630 TestCasingImpl(u" john. Smith", u" john. smith", TEST_TITLE, 631 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_BREAK_ADJUSTMENT); 632 TestCasingImpl(u"ijs", u"IJs", TEST_TITLE, 633 nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING); 634 TestCasingImpl(u"ijs", u"js", TEST_TITLE, 635 nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING); 636 637 #if !UCONFIG_NO_BREAK_ITERATION 638 // Test conflicting settings. 639 // If & when we add more options, then the ORed combinations may become 640 // indistinguishable from valid values. 641 IcuTestErrorCode errorCode(*this, "TestTitleOptions"); 642 CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT|U_TITLECASE_ADJUST_TO_CASED, nullptr, 643 u"", 0, nullptr, 0, nullptr, errorCode); 644 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) { 645 errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument", 646 errorCode.errorName()); 647 } 648 errorCode.reset(); 649 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING|U_TITLECASE_SENTENCES, nullptr, 650 u"", 0, nullptr, 0, nullptr, errorCode); 651 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) { 652 errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument", 653 errorCode.errorName()); 654 } 655 errorCode.reset(); 656 LocalPointer<BreakIterator> iter( 657 BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode)); 658 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING, iter.getAlias(), 659 u"", 0, nullptr, 0, nullptr, errorCode); 660 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) { 661 errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument", 662 errorCode.errorName()); 663 } 664 errorCode.reset(); 665 #endif 666 } 667 668 void 669 StringCaseTest::TestFullCaseFoldingIterator() { 670 UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi"); 671 UnicodeString ss=UNICODE_STRING_SIMPLE("ss"); 672 FullCaseFoldingIterator iter; 673 int32_t count=0; 674 int32_t countSpecific=0; 675 UChar32 c; 676 UnicodeString full; 677 while((c=iter.next(full))>=0) { 678 ++count; 679 // Check that the full Case_Folding has more than 1 code point. 680 if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) { 681 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c); 682 continue; 683 } 684 // Check that full == Case_Folding(c). 685 UnicodeString cf(c); 686 cf.foldCase(); 687 if(full!=cf) { 688 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c); 689 continue; 690 } 691 // Spot-check a couple of specific cases. 692 if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) { 693 ++countSpecific; 694 } 695 } 696 if(countSpecific!=3) { 697 errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases"); 698 } 699 if(count<70) { 700 errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count); 701 } 702 } 703 704 void 705 StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) { 706 UnicodeString s16(s); 707 UnicodeString expected16(expected); 708 UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")"; 709 UnicodeString result16(s16); 710 result16.toUpper(GREEK_LOCALE_); 711 assertEquals(msg, expected16, result16); 712 713 msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap="; 714 int32_t length = expected16.length(); 715 int32_t capacities[] = { 716 // Keep in sync with the UTF-8 capacities near the bottom of this function. 717 0, length / 2, length - 1, length, length + 1 718 }; 719 for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) { 720 int32_t cap = capacities[i]; 721 UChar *dest16 = result16.getBuffer(expected16.length() + 1); 722 u_memset(dest16, 0x55AA, result16.getCapacity()); 723 UErrorCode errorCode = U_ZERO_ERROR; 724 length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode); 725 assertEquals(msg + cap, expected16.length(), length); 726 UErrorCode expectedErrorCode; 727 if (cap < expected16.length()) { 728 expectedErrorCode = U_BUFFER_OVERFLOW_ERROR; 729 } else if (cap == expected16.length()) { 730 expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING; 731 } else { 732 expectedErrorCode = U_ZERO_ERROR; 733 assertEquals(msg + cap + " NUL", 0, dest16[length]); 734 } 735 assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode); 736 result16.releaseBuffer(length); 737 if (cap >= expected16.length()) { 738 assertEquals(msg + cap, expected16, result16); 739 } 740 } 741 742 UErrorCode errorCode = U_ZERO_ERROR; 743 LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode)); 744 assertSuccess("ucasemap_open", errorCode); 745 std::string s8; 746 s16.toUTF8String(s8); 747 msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")"; 748 char dest8[1000]; 749 length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8), 750 s8.data(), s8.length(), &errorCode); 751 assertSuccess("ucasemap_utf8ToUpper", errorCode); 752 StringPiece result8(dest8, length); 753 UnicodeString result16From8 = UnicodeString::fromUTF8(result8); 754 assertEquals(msg, expected16, result16From8); 755 756 msg += " cap="; 757 capacities[1] = length / 2; 758 capacities[2] = length - 1; 759 capacities[3] = length; 760 capacities[4] = length + 1; 761 char dest8b[1000]; 762 int32_t expected8Length = length; // Assuming the previous call worked. 763 for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) { 764 int32_t cap = capacities[i]; 765 memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b)); 766 UErrorCode errorCode = U_ZERO_ERROR; 767 length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap, 768 s8.data(), s8.length(), &errorCode); 769 assertEquals(msg + cap, expected8Length, length); 770 UErrorCode expectedErrorCode; 771 if (cap < expected8Length) { 772 expectedErrorCode = U_BUFFER_OVERFLOW_ERROR; 773 } else if (cap == expected8Length) { 774 expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING; 775 } else { 776 expectedErrorCode = U_ZERO_ERROR; 777 // Casts to int32_t to avoid matching UBool. 778 assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]); 779 } 780 assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode); 781 if (cap >= expected8Length) { 782 assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length)); 783 } 784 } 785 } 786 787 void 788 StringCaseTest::TestGreekUpper() { 789 // http://bugs.icu-project.org/trac/ticket/5456 790 assertGreekUpper(u", , ", u", , "); 791 // https://bugzilla.mozilla.org/show_bug.cgi?id=307039 792 // https://bug307039.bmoattachments.org/attachment.cgi?id=194893 793 assertGreekUpper(u"", u""); 794 assertGreekUpper(u", , ", u", , "); 795 assertGreekUpper(u", , ", u", , "); 796 assertGreekUpper(u", , ", u", , "); 797 assertGreekUpper(u"", u""); 798 assertGreekUpper(u"", u""); 799 assertGreekUpper(u" ", u" "); 800 // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html 801 assertGreekUpper(u" ", u" "); 802 assertGreekUpper(u" ", u" "); 803 // http://unicode.org/udhr/d/udhr_ell_polytonic.html 804 assertGreekUpper(u" ", u" "); 805 assertGreekUpper(u" ", u" "); 806 // From Google bug report 807 assertGreekUpper(u", ", u", "); 808 // http://crbug.com/234797 809 assertGreekUpper(u" !", u" !"); 810 assertGreekUpper(u", ", u", "); 811 assertGreekUpper(u" .", u" ."); 812 // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/ 813 assertGreekUpper(u"", u""); 814 assertGreekUpper(u".", u"."); 815 } 816 817 void 818 StringCaseTest::TestLongUpper() { 819 if (quick) { 820 logln("not exhaustive mode: skipping this test"); 821 return; 822 } 823 // Ticket #12663, crash with an extremely long string where 824 // U+0390 maps to 0399 0308 0301 so that the result is three times as long 825 // and overflows an int32_t. 826 int32_t length = 0x40000004; // more than 1G UChars 827 UnicodeString s(length, (UChar32)0x390, length); 828 UnicodeString result; 829 UChar *dest = result.getBuffer(length + 1); 830 if (s.isBogus() || dest == NULL) { 831 logln("Out of memory, unable to run this test on this machine."); 832 return; 833 } 834 IcuTestErrorCode errorCode(*this, "TestLongUpper"); 835 int32_t destLength = u_strToUpper(dest, result.getCapacity(), 836 s.getBuffer(), s.length(), "", errorCode); 837 result.releaseBuffer(destLength); 838 if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) { 839 errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)", 840 errorCode.errorName(), (long)destLength); 841 } 842 } 843 844 void StringCaseTest::TestMalformedUTF8() { 845 // ticket #12639 846 IcuTestErrorCode errorCode(*this, "TestMalformedUTF8"); 847 LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode)); 848 if (errorCode.isFailure()) { 849 errln("ucasemap_open(English) failed - %s", errorCode.errorName()); 850 return; 851 } 852 char src[1] = { (char)0x85 }; // malformed UTF-8 853 char dest[3] = { 0, 0, 0 }; 854 int32_t destLength; 855 #if !UCONFIG_NO_BREAK_ITERATION 856 destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode); 857 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { 858 errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x", 859 errorCode.errorName(), (int)destLength, dest[0]); 860 } 861 #endif 862 863 errorCode.reset(); 864 dest[0] = 0; 865 destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode); 866 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { 867 errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x", 868 errorCode.errorName(), (int)destLength, dest[0]); 869 } 870 871 errorCode.reset(); 872 dest[0] = 0; 873 destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode); 874 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { 875 errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x", 876 errorCode.errorName(), (int)destLength, dest[0]); 877 } 878 879 errorCode.reset(); 880 dest[0] = 0; 881 destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode); 882 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { 883 errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x", 884 errorCode.errorName(), (int)destLength, dest[0]); 885 } 886 } 887 888 void StringCaseTest::TestBufferOverflow() { 889 // Ticket #12849, incorrect result from Title Case preflight operation, 890 // when buffer overflow error is expected. 891 IcuTestErrorCode errorCode(*this, "TestBufferOverflow"); 892 LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode)); 893 if (errorCode.isFailure()) { 894 errln("ucasemap_open(English) failed - %s", errorCode.errorName()); 895 return; 896 } 897 898 UnicodeString data("hello world"); 899 int32_t result; 900 #if !UCONFIG_NO_BREAK_ITERATION 901 result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode); 902 if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) { 903 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: " 904 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)", 905 __FILE__, __LINE__, data.length(), errorCode.errorName(), result); 906 } 907 #endif 908 errorCode.reset(); 909 910 std::string data_utf8; 911 data.toUTF8String(data_utf8); 912 #if !UCONFIG_NO_BREAK_ITERATION 913 result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), data_utf8.length(), errorCode); 914 if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) { 915 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: " 916 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)", 917 __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result); 918 } 919 #endif 920 errorCode.reset(); 921 } 922 923 void StringCaseTest::TestEdits() { 924 IcuTestErrorCode errorCode(*this, "TestEdits"); 925 Edits edits; 926 assertFalse("new Edits hasChanges", edits.hasChanges()); 927 assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges()); 928 assertEquals("new Edits", 0, edits.lengthDelta()); 929 edits.addUnchanged(1); // multiple unchanged ranges are combined 930 edits.addUnchanged(10000); // too long, and they are split 931 edits.addReplace(0, 0); 932 edits.addUnchanged(2); 933 assertFalse("unchanged 10003 hasChanges", edits.hasChanges()); 934 assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges()); 935 assertEquals("unchanged 10003", 0, edits.lengthDelta()); 936 edits.addReplace(2, 1); // multiple short equal-lengths edits are compressed 937 edits.addUnchanged(0); 938 edits.addReplace(2, 1); 939 edits.addReplace(2, 1); 940 edits.addReplace(0, 10); 941 edits.addReplace(100, 0); 942 edits.addReplace(3000, 4000); // variable-length encoding 943 edits.addReplace(100000, 100000); 944 assertTrue("some edits hasChanges", edits.hasChanges()); 945 assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges()); 946 assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta()); 947 UErrorCode outErrorCode = U_ZERO_ERROR; 948 assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode)); 949 950 static const EditChange coarseExpectedChanges[] = { 951 { FALSE, 10003, 10003 }, 952 { TRUE, 103106, 104013 } 953 }; 954 TestUtility::checkEditsIter(*this, u"coarse", 955 edits.getCoarseIterator(), edits.getCoarseIterator(), 956 coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode); 957 TestUtility::checkEditsIter(*this, u"coarse changes", 958 edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(), 959 coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode); 960 961 static const EditChange fineExpectedChanges[] = { 962 { FALSE, 10003, 10003 }, 963 { TRUE, 2, 1 }, 964 { TRUE, 2, 1 }, 965 { TRUE, 2, 1 }, 966 { TRUE, 0, 10 }, 967 { TRUE, 100, 0 }, 968 { TRUE, 3000, 4000 }, 969 { TRUE, 100000, 100000 } 970 }; 971 TestUtility::checkEditsIter(*this, u"fine", 972 edits.getFineIterator(), edits.getFineIterator(), 973 fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode); 974 TestUtility::checkEditsIter(*this, u"fine changes", 975 edits.getFineChangesIterator(), edits.getFineChangesIterator(), 976 fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode); 977 978 edits.reset(); 979 assertFalse("reset hasChanges", edits.hasChanges()); 980 assertEquals("reset numberOfChanges", 0, edits.numberOfChanges()); 981 assertEquals("reset", 0, edits.lengthDelta()); 982 Edits::Iterator ei = edits.getCoarseChangesIterator(); 983 assertFalse("reset then iterator", ei.next(errorCode)); 984 } 985 986 void StringCaseTest::TestCopyMoveEdits() { 987 IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits"); 988 // Exceed the stack array capacity. 989 Edits a; 990 for (int32_t i = 0; i < 250; ++i) { 991 a.addReplace(i % 10, (i % 10) + 1); 992 } 993 assertEquals("a: many edits, length delta", 250, a.lengthDelta()); 994 995 // copy 996 Edits b(a); 997 assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta()); 998 assertEquals("a remains: many edits, length delta", 250, a.lengthDelta()); 999 TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode); 1000 1001 // assign 1002 Edits c; 1003 c.addUnchanged(99); 1004 c.addReplace(88, 77); 1005 c = b; 1006 assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta()); 1007 assertEquals("b remains: many edits, length delta", 250, b.lengthDelta()); 1008 TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode); 1009 1010 // std::move trouble on these platforms. 1011 // See https://ssl.icu-project.org/trac/ticket/13393 1012 #if !(U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) 1013 // move constructor empties object with heap array 1014 Edits d(std::move(a)); 1015 assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta()); 1016 assertFalse("a moved away: no more hasChanges", a.hasChanges()); 1017 TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode); 1018 Edits empty; 1019 TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode); 1020 1021 // move assignment empties object with heap array 1022 Edits e; 1023 e.addReplace(0, 1000); 1024 e = std::move(b); 1025 assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta()); 1026 assertFalse("b moved away: no more hasChanges", b.hasChanges()); 1027 TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode); 1028 TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode); 1029 1030 // Edits::Iterator default constructor. 1031 Edits::Iterator iter; 1032 assertFalse("Edits::Iterator().next()", iter.next(errorCode)); 1033 assertSuccess("Edits::Iterator().next()", errorCode); 1034 iter = e.getFineChangesIterator(); 1035 assertTrue("iter.next()", iter.next(errorCode)); 1036 assertSuccess("iter.next()", errorCode); 1037 assertTrue("iter.hasChange()", iter.hasChange()); 1038 assertEquals("iter.newLength()", 1, iter.newLength()); 1039 #endif 1040 } 1041 1042 void StringCaseTest::TestEditsFindFwdBwd() { 1043 IcuTestErrorCode errorCode(*this, "TestEditsFindFwdBwd"); 1044 // Some users need index mappings to be efficient when they are out of order. 1045 // The most interesting failure case for this test is it taking a very long time. 1046 Edits e; 1047 constexpr int32_t N = 200000; 1048 for (int32_t i = 0; i < N; ++i) { 1049 e.addUnchanged(1); 1050 e.addReplace(3, 1); 1051 } 1052 Edits::Iterator iter = e.getFineIterator(); 1053 for (int32_t i = 0; i <= N; i += 2) { 1054 assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode)); 1055 assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode)); 1056 } 1057 for (int32_t i = N; i >= 0; i -= 2) { 1058 assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode)); 1059 assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode)); 1060 } 1061 } 1062 1063 void StringCaseTest::TestMergeEdits() { 1064 // For debugging, set -v to see matching edits up to a failure. 1065 IcuTestErrorCode errorCode(*this, "TestMergeEdits"); 1066 Edits ab, bc, ac, expected_ac; 1067 1068 // Simple: Two parallel non-changes. 1069 ab.addUnchanged(2); 1070 bc.addUnchanged(2); 1071 expected_ac.addUnchanged(2); 1072 1073 // Simple: Two aligned changes. 1074 ab.addReplace(3, 2); 1075 bc.addReplace(2, 1); 1076 expected_ac.addReplace(3, 1); 1077 1078 // Unequal non-changes. 1079 ab.addUnchanged(5); 1080 bc.addUnchanged(3); 1081 expected_ac.addUnchanged(3); 1082 // ab ahead by 2 1083 1084 // Overlapping changes accumulate until they share a boundary. 1085 ab.addReplace(4, 3); 1086 bc.addReplace(3, 2); 1087 ab.addReplace(4, 3); 1088 bc.addReplace(3, 2); 1089 ab.addReplace(4, 3); 1090 bc.addReplace(3, 2); 1091 bc.addUnchanged(4); 1092 expected_ac.addReplace(14, 8); 1093 // bc ahead by 2 1094 1095 // Balance out intermediate-string lengths. 1096 ab.addUnchanged(2); 1097 expected_ac.addUnchanged(2); 1098 1099 // Insert something and delete it: Should disappear. 1100 ab.addReplace(0, 5); 1101 ab.addReplace(0, 2); 1102 bc.addReplace(7, 0); 1103 1104 // Parallel change to make a new boundary. 1105 ab.addReplace(1, 2); 1106 bc.addReplace(2, 3); 1107 expected_ac.addReplace(1, 3); 1108 1109 // Multiple ab deletions should remain separate at the boundary. 1110 ab.addReplace(1, 0); 1111 ab.addReplace(2, 0); 1112 ab.addReplace(3, 0); 1113 expected_ac.addReplace(1, 0); 1114 expected_ac.addReplace(2, 0); 1115 expected_ac.addReplace(3, 0); 1116 1117 // Unequal non-changes can be split for another boundary. 1118 ab.addUnchanged(2); 1119 bc.addUnchanged(1); 1120 expected_ac.addUnchanged(1); 1121 // ab ahead by 1 1122 1123 // Multiple bc insertions should create a boundary and remain separate. 1124 bc.addReplace(0, 4); 1125 bc.addReplace(0, 5); 1126 bc.addReplace(0, 6); 1127 expected_ac.addReplace(0, 4); 1128 expected_ac.addReplace(0, 5); 1129 expected_ac.addReplace(0, 6); 1130 // ab ahead by 1 1131 1132 // Multiple ab deletions in the middle of a bc change are merged. 1133 bc.addReplace(2, 2); 1134 // bc ahead by 1 1135 ab.addReplace(1, 0); 1136 ab.addReplace(2, 0); 1137 ab.addReplace(3, 0); 1138 ab.addReplace(4, 1); 1139 expected_ac.addReplace(11, 2); 1140 1141 // Multiple bc insertions in the middle of an ab change are merged. 1142 ab.addReplace(5, 6); 1143 bc.addReplace(3, 3); 1144 // ab ahead by 3 1145 bc.addReplace(0, 4); 1146 bc.addReplace(0, 5); 1147 bc.addReplace(0, 6); 1148 bc.addReplace(3, 7); 1149 expected_ac.addReplace(5, 25); 1150 1151 // Delete around a deletion. 1152 ab.addReplace(4, 4); 1153 ab.addReplace(3, 0); 1154 ab.addUnchanged(2); 1155 bc.addReplace(2, 2); 1156 bc.addReplace(4, 0); 1157 expected_ac.addReplace(9, 2); 1158 1159 // Insert into an insertion. 1160 ab.addReplace(0, 2); 1161 bc.addReplace(1, 1); 1162 bc.addReplace(0, 8); 1163 bc.addUnchanged(4); 1164 expected_ac.addReplace(0, 10); 1165 // bc ahead by 3 1166 1167 // Balance out intermediate-string lengths. 1168 ab.addUnchanged(3); 1169 expected_ac.addUnchanged(3); 1170 1171 // Deletions meet insertions. 1172 // Output order is arbitrary in principle, but we expect insertions first 1173 // and want to keep it that way. 1174 ab.addReplace(2, 0); 1175 ab.addReplace(4, 0); 1176 ab.addReplace(6, 0); 1177 bc.addReplace(0, 1); 1178 bc.addReplace(0, 3); 1179 bc.addReplace(0, 5); 1180 expected_ac.addReplace(0, 1); 1181 expected_ac.addReplace(0, 3); 1182 expected_ac.addReplace(0, 5); 1183 expected_ac.addReplace(2, 0); 1184 expected_ac.addReplace(4, 0); 1185 expected_ac.addReplace(6, 0); 1186 1187 // End with a non-change, so that further edits are never reordered. 1188 ab.addUnchanged(1); 1189 bc.addUnchanged(1); 1190 expected_ac.addUnchanged(1); 1191 1192 ac.mergeAndAppend(ab, bc, errorCode); 1193 assertSuccess("ab+bc", errorCode); 1194 if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) { 1195 return; 1196 } 1197 1198 // Append more Edits. 1199 Edits ab2, bc2; 1200 ab2.addUnchanged(5); 1201 bc2.addReplace(1, 2); 1202 bc2.addUnchanged(4); 1203 expected_ac.addReplace(1, 2); 1204 expected_ac.addUnchanged(4); 1205 ac.mergeAndAppend(ab2, bc2, errorCode); 1206 assertSuccess("ab2+bc2", errorCode); 1207 if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) { 1208 return; 1209 } 1210 1211 // Append empty edits. 1212 Edits empty; 1213 ac.mergeAndAppend(empty, empty, errorCode); 1214 assertSuccess("empty+empty", errorCode); 1215 if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) { 1216 return; 1217 } 1218 1219 // Error: Append more edits with mismatched intermediate-string lengths. 1220 Edits mismatch; 1221 mismatch.addReplace(1, 1); 1222 ac.mergeAndAppend(ab2, mismatch, errorCode); 1223 assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get()); 1224 errorCode.reset(); 1225 ac.mergeAndAppend(mismatch, bc2, errorCode); 1226 assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get()); 1227 errorCode.reset(); 1228 } 1229 1230 void StringCaseTest::TestCaseMapWithEdits() { 1231 IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits"); 1232 UChar dest[20]; 1233 Edits edits; 1234 1235 int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT, 1236 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1237 assertEquals(u"toLower(IstanBul)", UnicodeString(u"b"), UnicodeString(TRUE, dest, length)); 1238 static const EditChange lowerExpectedChanges[] = { 1239 { TRUE, 1, 1 }, 1240 { FALSE, 4, 4 }, 1241 { TRUE, 1, 1 }, 1242 { FALSE, 2, 2 } 1243 }; 1244 TestUtility::checkEditsIter(*this, u"toLower(IstanBul)", 1245 edits.getFineIterator(), edits.getFineIterator(), 1246 lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges), 1247 TRUE, errorCode); 1248 1249 edits.reset(); 1250 length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT, 1251 u"", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1252 assertEquals(u"toUpper()", UnicodeString(u""), UnicodeString(TRUE, dest, length)); 1253 static const EditChange upperExpectedChanges[] = { 1254 { FALSE, 1, 1 }, 1255 { TRUE, 1, 1 }, 1256 { TRUE, 1, 1 }, 1257 { TRUE, 1, 1 }, 1258 { TRUE, 1, 1 }, 1259 { TRUE, 1, 1 } 1260 }; 1261 TestUtility::checkEditsIter(*this, u"toUpper()", 1262 edits.getFineIterator(), edits.getFineIterator(), 1263 upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges), 1264 TRUE, errorCode); 1265 1266 edits.reset(); 1267 1268 #if !UCONFIG_NO_BREAK_ITERATION 1269 length = CaseMap::toTitle("nl", 1270 U_OMIT_UNCHANGED_TEXT | 1271 U_TITLECASE_NO_BREAK_ADJUSTMENT | 1272 U_TITLECASE_NO_LOWERCASE, 1273 nullptr, u"IjssEL IglOo", 12, 1274 dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1275 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length)); 1276 static const EditChange titleExpectedChanges[] = { 1277 { FALSE, 1, 1 }, 1278 { TRUE, 1, 1 }, 1279 { FALSE, 10, 10 } 1280 }; 1281 TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)", 1282 edits.getFineIterator(), edits.getFineIterator(), 1283 titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges), 1284 TRUE, errorCode); 1285 #endif 1286 1287 // No explicit nor automatic edits.reset(). Edits should be appended. 1288 length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I, 1289 u"ItanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1290 assertEquals(u"foldCase(ItanBul)", UnicodeString(u"ssb"), UnicodeString(TRUE, dest, length)); 1291 static const EditChange foldExpectedChanges[] = { 1292 #if !UCONFIG_NO_BREAK_ITERATION 1293 // From titlecasing. 1294 { FALSE, 1, 1 }, 1295 { TRUE, 1, 1 }, 1296 { FALSE, 10, 10 }, 1297 #endif 1298 // From case folding. 1299 { TRUE, 1, 1 }, 1300 { TRUE, 1, 2 }, 1301 { FALSE, 3, 3 }, 1302 { TRUE, 1, 1 }, 1303 { FALSE, 2, 2 } 1304 }; 1305 TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, ItanBul)", 1306 edits.getFineIterator(), edits.getFineIterator(), 1307 foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges), 1308 TRUE, errorCode); 1309 } 1310 1311 void StringCaseTest::TestCaseMapUTF8WithEdits() { 1312 IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits"); 1313 char dest[50]; 1314 Edits edits; 1315 1316 int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, 1317 u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1318 assertEquals(u"toLower(IstanBul)", UnicodeString(u"b"), 1319 UnicodeString::fromUTF8(StringPiece(dest, length))); 1320 static const EditChange lowerExpectedChanges[] = { 1321 { TRUE, 1, 2 }, 1322 { FALSE, 4, 4 }, 1323 { TRUE, 1, 1 }, 1324 { FALSE, 2, 2 } 1325 }; 1326 TestUtility::checkEditsIter(*this, u"toLower(IstanBul)", 1327 edits.getFineIterator(), edits.getFineIterator(), 1328 lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges), 1329 TRUE, errorCode); 1330 1331 edits.reset(); 1332 length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, 1333 u8"", 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1334 assertEquals(u"toUpper()", UnicodeString(u""), 1335 UnicodeString::fromUTF8(StringPiece(dest, length))); 1336 static const EditChange upperExpectedChanges[] = { 1337 { FALSE, 2, 2 }, 1338 { TRUE, 2, 2 }, 1339 { TRUE, 2, 2 }, 1340 { TRUE, 2, 2 }, 1341 { TRUE, 2, 2 }, 1342 { TRUE, 2, 2 } 1343 }; 1344 TestUtility::checkEditsIter(*this, u"toUpper()", 1345 edits.getFineIterator(), edits.getFineIterator(), 1346 upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges), 1347 TRUE, errorCode); 1348 1349 edits.reset(); 1350 #if !UCONFIG_NO_BREAK_ITERATION 1351 length = CaseMap::utf8ToTitle("nl", 1352 U_OMIT_UNCHANGED_TEXT | 1353 U_TITLECASE_NO_BREAK_ADJUSTMENT | 1354 U_TITLECASE_NO_LOWERCASE, 1355 nullptr, u8"IjssEL IglOo", 12, 1356 dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1357 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), 1358 UnicodeString::fromUTF8(StringPiece(dest, length))); 1359 static const EditChange titleExpectedChanges[] = { 1360 { FALSE, 1, 1 }, 1361 { TRUE, 1, 1 }, 1362 { FALSE, 10, 10 } 1363 }; 1364 TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)", 1365 edits.getFineIterator(), edits.getFineIterator(), 1366 titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges), 1367 TRUE, errorCode); 1368 #endif 1369 1370 // No explicit nor automatic edits.reset(). Edits should be appended. 1371 length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | 1372 U_FOLD_CASE_EXCLUDE_SPECIAL_I, 1373 u8"ItanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode); 1374 assertEquals(u"foldCase(ItanBul)", UnicodeString(u"ssb"), 1375 UnicodeString::fromUTF8(StringPiece(dest, length))); 1376 static const EditChange foldExpectedChanges[] = { 1377 #if !UCONFIG_NO_BREAK_ITERATION 1378 // From titlecasing. 1379 { FALSE, 1, 1 }, 1380 { TRUE, 1, 1 }, 1381 { FALSE, 10, 10 }, 1382 #endif 1383 // From case folding. 1384 { TRUE, 1, 2 }, 1385 { TRUE, 2, 2 }, 1386 { FALSE, 3, 3 }, 1387 { TRUE, 1, 1 }, 1388 { FALSE, 2, 2 } 1389 }; 1390 TestUtility::checkEditsIter(*this, u"foldCase(ItanBul)", 1391 edits.getFineIterator(), edits.getFineIterator(), 1392 foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges), 1393 TRUE, errorCode); 1394 } 1395 1396 void StringCaseTest::TestCaseMapToString() { 1397 // This test function name is parallel with one in UCharacterCaseTest.java. 1398 // It is a bit of a misnomer until we have CaseMap API that writes to 1399 // a UnicodeString, at which point we should change this code here. 1400 IcuTestErrorCode errorCode(*this, "TestCaseMapToString"); 1401 UChar dest[20]; 1402 1403 // Omit unchanged text. 1404 int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT, 1405 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1406 assertEquals(u"toLower(IstanBul)", 1407 UnicodeString(u"b"), UnicodeString(TRUE, dest, length)); 1408 length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT, 1409 u"", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1410 assertEquals(u"toUpper()", 1411 UnicodeString(u""), UnicodeString(TRUE, dest, length)); 1412 #if !UCONFIG_NO_BREAK_ITERATION 1413 length = CaseMap::toTitle("nl", 1414 U_OMIT_UNCHANGED_TEXT | 1415 U_TITLECASE_NO_BREAK_ADJUSTMENT | 1416 U_TITLECASE_NO_LOWERCASE, 1417 nullptr, u"IjssEL IglOo", 12, 1418 dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1419 assertEquals(u"toTitle(IjssEL IglOo)", 1420 UnicodeString(u"J"), UnicodeString(TRUE, dest, length)); 1421 #endif 1422 length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, 1423 u"ItanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1424 assertEquals(u"foldCase(ItanBul)", 1425 UnicodeString(u"ssb"), UnicodeString(TRUE, dest, length)); 1426 1427 // Return the whole result string. 1428 length = CaseMap::toLower("tr", 0, 1429 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1430 assertEquals(u"toLower(IstanBul)", 1431 UnicodeString(u"stanbul"), UnicodeString(TRUE, dest, length)); 1432 length = CaseMap::toUpper("el", 0, 1433 u"", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1434 assertEquals(u"toUpper()", 1435 UnicodeString(u""), UnicodeString(TRUE, dest, length)); 1436 #if !UCONFIG_NO_BREAK_ITERATION 1437 length = CaseMap::toTitle("nl", 1438 U_TITLECASE_NO_BREAK_ADJUSTMENT | 1439 U_TITLECASE_NO_LOWERCASE, 1440 nullptr, u"IjssEL IglOo", 12, 1441 dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1442 assertEquals(u"toTitle(IjssEL IglOo)", 1443 UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length)); 1444 #endif 1445 length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, 1446 u"ItanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); 1447 assertEquals(u"foldCase(ItanBul)", 1448 UnicodeString(u"sstanbul"), UnicodeString(TRUE, dest, length)); 1449 } 1450 1451 void StringCaseTest::TestCaseMapUTF8ToString() { 1452 IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString"); 1453 std::string dest; 1454 StringByteSink<std::string> sink(&dest); 1455 1456 // Omit unchanged text. 1457 CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode); 1458 assertEquals(u"toLower(IstanBul)", UnicodeString(u"b"), UnicodeString::fromUTF8(dest)); 1459 dest.clear(); 1460 CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"", sink, nullptr, errorCode); 1461 assertEquals(u"toUpper()", UnicodeString(u""), 1462 UnicodeString::fromUTF8(dest)); 1463 #if !UCONFIG_NO_BREAK_ITERATION 1464 dest.clear(); 1465 CaseMap::utf8ToTitle( 1466 "nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE, 1467 nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode); 1468 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), 1469 UnicodeString::fromUTF8(dest)); 1470 #endif 1471 dest.clear(); 1472 CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, 1473 u8"ItanBul", sink, nullptr, errorCode); 1474 assertEquals(u"foldCase(ItanBul)", UnicodeString(u"ssb"), 1475 UnicodeString::fromUTF8(dest)); 1476 1477 // Return the whole result string. 1478 dest.clear(); 1479 CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode); 1480 assertEquals(u"toLower(IstanBul)", UnicodeString(u"stanbul"), 1481 UnicodeString::fromUTF8(dest)); 1482 dest.clear(); 1483 CaseMap::utf8ToUpper("el", 0, u8"", sink, nullptr, errorCode); 1484 assertEquals(u"toUpper()", UnicodeString(u""), 1485 UnicodeString::fromUTF8(dest)); 1486 #if !UCONFIG_NO_BREAK_ITERATION 1487 dest.clear(); 1488 CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE, 1489 nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode); 1490 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"), 1491 UnicodeString::fromUTF8(dest)); 1492 #endif 1493 dest.clear(); 1494 CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"ItanBul", sink, nullptr, errorCode); 1495 assertEquals(u"foldCase(ItanBul)", UnicodeString(u"sstanbul"), 1496 UnicodeString::fromUTF8(dest)); 1497 } 1498 1499 void StringCaseTest::TestLongUnicodeString() { 1500 // Code coverage for UnicodeString case mapping code handling 1501 // long strings or many changes in a string. 1502 UnicodeString s(TRUE, 1503 (const UChar *) 1504 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" 1505 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" 1506 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" 1507 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" 1508 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" 1509 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51); 1510 UnicodeString expected(TRUE, 1511 (const UChar *) 1512 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" 1513 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" 1514 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" 1515 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" 1516 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" 1517 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51); 1518 s.toUpper(Locale::getRoot()); 1519 assertEquals("string length 306", expected, s); 1520 } 1521 1522 #if !UCONFIG_NO_BREAK_ITERATION 1523 void StringCaseTest::TestBug13127() { 1524 // Test case crashed when the bug was present. 1525 const char16_t *s16 = u""; 1526 UnicodeString s(TRUE, s16, -1); 1527 s.toTitle(0, Locale::getEnglish()); 1528 } 1529 1530 void StringCaseTest::TestInPlaceTitle() { 1531 // Similar to TestBug13127. u_strToTitle() can modify the buffer in-place. 1532 IcuTestErrorCode errorCode(*this, "TestInPlaceTitle"); 1533 char16_t s[32] = u" abcdef"; 1534 const char16_t *expected = u"Ss Ss Ss Abcdef"; 1535 int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode); 1536 assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length); 1537 assertEquals("u_strToTitle(in-place)", expected, s); 1538 } 1539 #endif 1540 1541 void StringCaseTest::TestCaseMapEditsIteratorDocs() { 1542 IcuTestErrorCode status(*this, "TestCaseMapEditsIteratorDocs"); 1543 const char16_t* input = u"abcDeF"; 1544 int32_t inputLength = u_strlen(input); 1545 // output: "abcssdef" 1546 1547 char16_t output[10]; 1548 Edits edits; 1549 CaseMap::fold(0, input, -1, output, 10, &edits, status); 1550 1551 static const char16_t* fineIteratorExpected[] = { 1552 u"{ src[0..3] dest[0..3] (no-change) }", 1553 u"{ src[3..4] dest[3..5], repl[0..2] }", 1554 u"{ src[4..5] dest[5..6], repl[2..3] }", 1555 u"{ src[5..6] dest[6..7] (no-change) }", 1556 u"{ src[6..7] dest[7..8], repl[3..4] }", 1557 }; 1558 static const char16_t* fineChangesIteratorExpected[] = { 1559 u"{ src[3..4] dest[3..5], repl[0..2] }", 1560 u"{ src[4..5] dest[5..6], repl[2..3] }", 1561 u"{ src[6..7] dest[7..8], repl[3..4] }", 1562 }; 1563 static const char16_t* coarseIteratorExpected[] = { 1564 u"{ src[0..3] dest[0..3] (no-change) }", 1565 u"{ src[3..5] dest[3..6], repl[0..3] }", 1566 u"{ src[5..6] dest[6..7] (no-change) }", 1567 u"{ src[6..7] dest[7..8], repl[3..4] }", 1568 }; 1569 static const char16_t* coarseChangesIteratorExpected[] = { 1570 u"{ src[3..5] dest[3..6], repl[0..3] }", 1571 u"{ src[6..7] dest[7..8], repl[3..4] }", 1572 }; 1573 1574 // Expected destination indices when source index is queried 1575 static int32_t expectedDestFineEditIndices[] = {0, 0, 0, 3, 5, 6, 7}; 1576 static int32_t expectedDestCoarseEditIndices[] = {0, 0, 0, 3, 3, 6, 7}; 1577 static int32_t expectedDestFineStringIndices[] = {0, 1, 2, 3, 5, 6, 7}; 1578 static int32_t expectedDestCoarseStringIndices[] = {0, 1, 2, 3, 6, 6, 7}; 1579 1580 // Expected source indices when destination index is queried 1581 static int32_t expectedSrcFineEditIndices[] = { 0, 0, 0, 3, 3, 4, 5, 6 }; 1582 static int32_t expectedSrcCoarseEditIndices[] = { 0, 0, 0, 3, 3, 3, 5, 6 }; 1583 static int32_t expectedSrcFineStringIndices[] = { 0, 1, 2, 3, 4, 4, 5, 6 }; 1584 static int32_t expectedSrcCoarseStringIndices[] = { 0, 1, 2, 3, 5, 5, 5, 6 }; 1585 1586 // Demonstrate the iterator next() method: 1587 Edits::Iterator fineIterator = edits.getFineIterator(); 1588 int i = 0; 1589 UnicodeString toString; 1590 while (fineIterator.next(status)) { 1591 UnicodeString expected = fineIteratorExpected[i++]; 1592 assertEquals(UnicodeString(u"Iteration #") + i, 1593 expected, 1594 fineIterator.toString(toString.remove())); 1595 } 1596 Edits::Iterator fineChangesIterator = edits.getFineChangesIterator(); 1597 i = 0; 1598 while (fineChangesIterator.next(status)) { 1599 UnicodeString expected = fineChangesIteratorExpected[i++]; 1600 assertEquals(UnicodeString(u"Iteration #") + i, 1601 expected, 1602 fineChangesIterator.toString(toString.remove())); 1603 } 1604 Edits::Iterator coarseIterator = edits.getCoarseIterator(); 1605 i = 0; 1606 while (coarseIterator.next(status)) { 1607 UnicodeString expected = coarseIteratorExpected[i++]; 1608 assertEquals(UnicodeString(u"Iteration #") + i, 1609 expected, 1610 coarseIterator.toString(toString.remove())); 1611 } 1612 Edits::Iterator coarseChangesIterator = edits.getCoarseChangesIterator(); 1613 i = 0; 1614 while (coarseChangesIterator.next(status)) { 1615 UnicodeString expected = coarseChangesIteratorExpected[i++]; 1616 assertEquals(UnicodeString(u"Iteration #") + i, 1617 expected, 1618 coarseChangesIterator.toString(toString.remove())); 1619 } 1620 1621 // Demonstrate the iterator indexing methods: 1622 // fineIterator should have the same behavior as fineChangesIterator, and 1623 // coarseIterator should have the same behavior as coarseChangesIterator. 1624 for (int32_t srcIndex=0; srcIndex<inputLength; srcIndex++) { 1625 fineIterator.findSourceIndex(srcIndex, status); 1626 fineChangesIterator.findSourceIndex(srcIndex, status); 1627 coarseIterator.findSourceIndex(srcIndex, status); 1628 coarseChangesIterator.findSourceIndex(srcIndex, status); 1629 1630 assertEquals(UnicodeString("Source index: ") + srcIndex, 1631 expectedDestFineEditIndices[srcIndex], 1632 fineIterator.destinationIndex()); 1633 assertEquals(UnicodeString("Source index: ") + srcIndex, 1634 expectedDestFineEditIndices[srcIndex], 1635 fineChangesIterator.destinationIndex()); 1636 assertEquals(UnicodeString("Source index: ") + srcIndex, 1637 expectedDestCoarseEditIndices[srcIndex], 1638 coarseIterator.destinationIndex()); 1639 assertEquals(UnicodeString("Source index: ") + srcIndex, 1640 expectedDestCoarseEditIndices[srcIndex], 1641 coarseChangesIterator.destinationIndex()); 1642 1643 assertEquals(UnicodeString("Source index: ") + srcIndex, 1644 expectedDestFineStringIndices[srcIndex], 1645 fineIterator.destinationIndexFromSourceIndex(srcIndex, status)); 1646 assertEquals(UnicodeString("Source index: ") + srcIndex, 1647 expectedDestFineStringIndices[srcIndex], 1648 fineChangesIterator.destinationIndexFromSourceIndex(srcIndex, status)); 1649 assertEquals(UnicodeString("Source index: ") + srcIndex, 1650 expectedDestCoarseStringIndices[srcIndex], 1651 coarseIterator.destinationIndexFromSourceIndex(srcIndex, status)); 1652 assertEquals(UnicodeString("Source index: ") + srcIndex, 1653 expectedDestCoarseStringIndices[srcIndex], 1654 coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex, status)); 1655 } 1656 for (int32_t destIndex=0; destIndex<inputLength; destIndex++) { 1657 fineIterator.findDestinationIndex(destIndex, status); 1658 fineChangesIterator.findDestinationIndex(destIndex, status); 1659 coarseIterator.findDestinationIndex(destIndex, status); 1660 coarseChangesIterator.findDestinationIndex(destIndex, status); 1661 1662 assertEquals(UnicodeString("Destination index: ") + destIndex, 1663 expectedSrcFineEditIndices[destIndex], 1664 fineIterator.sourceIndex()); 1665 assertEquals(UnicodeString("Destination index: ") + destIndex, 1666 expectedSrcFineEditIndices[destIndex], 1667 fineChangesIterator.sourceIndex()); 1668 assertEquals(UnicodeString("Destination index: ") + destIndex, 1669 expectedSrcCoarseEditIndices[destIndex], 1670 coarseIterator.sourceIndex()); 1671 assertEquals(UnicodeString("Destination index: ") + destIndex, 1672 expectedSrcCoarseEditIndices[destIndex], 1673 coarseChangesIterator.sourceIndex()); 1674 1675 assertEquals(UnicodeString("Destination index: ") + destIndex, 1676 expectedSrcFineStringIndices[destIndex], 1677 fineIterator.sourceIndexFromDestinationIndex(destIndex, status)); 1678 assertEquals(UnicodeString("Destination index: ") + destIndex, 1679 expectedSrcFineStringIndices[destIndex], 1680 fineChangesIterator.sourceIndexFromDestinationIndex(destIndex, status)); 1681 assertEquals(UnicodeString("Destination index: ") + destIndex, 1682 expectedSrcCoarseStringIndices[destIndex], 1683 coarseIterator.sourceIndexFromDestinationIndex(destIndex, status)); 1684 assertEquals(UnicodeString("Destination index: ") + destIndex, 1685 expectedSrcCoarseStringIndices[destIndex], 1686 coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex, status)); 1687 } 1688 } 1689 1690 void StringCaseTest::TestCaseMapGreekExtended() { 1691 // Ticket 13851 1692 UnicodeString s(u"\u1F80\u1F88\u1FFC"); 1693 UnicodeString result(s); 1694 result.toLower(Locale::getRoot()); 1695 assertEquals(u"lower", u"\u1F80\u1F80\u1FF3", result); 1696 #if !UCONFIG_NO_BREAK_ITERATION 1697 result = s; 1698 result.toTitle(nullptr, Locale::getRoot()); 1699 assertEquals(u"title", u"\u1F88\u1F80\u1FF3", result); 1700 #endif 1701 } 1702 1703 //#endif 1704