1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 9 #include "ustrtest.h" 10 #include "unicode/appendable.h" 11 #include "unicode/std_string.h" 12 #include "unicode/unistr.h" 13 #include "unicode/uchar.h" 14 #include "unicode/ustring.h" 15 #include "unicode/locid.h" 16 #include "unicode/strenum.h" 17 #include "unicode/ucnv.h" 18 #include "unicode/uenum.h" 19 #include "unicode/utf16.h" 20 #include "cmemory.h" 21 #include "charstr.h" 22 23 #if 0 24 #include "unicode/ustream.h" 25 26 #include <iostream> 27 using namespace std; 28 29 #endif 30 31 UnicodeStringTest::~UnicodeStringTest() {} 32 33 extern IntlTest *createStringCaseTest(); 34 35 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par) 36 { 37 if (exec) logln("TestSuite UnicodeStringTest: "); 38 TESTCASE_AUTO_BEGIN; 39 TESTCASE_AUTO_CREATE_CLASS(StringCaseTest); 40 TESTCASE_AUTO(TestBasicManipulation); 41 TESTCASE_AUTO(TestCompare); 42 TESTCASE_AUTO(TestExtract); 43 TESTCASE_AUTO(TestRemoveReplace); 44 TESTCASE_AUTO(TestSearching); 45 TESTCASE_AUTO(TestSpacePadding); 46 TESTCASE_AUTO(TestPrefixAndSuffix); 47 TESTCASE_AUTO(TestFindAndReplace); 48 TESTCASE_AUTO(TestBogus); 49 TESTCASE_AUTO(TestReverse); 50 TESTCASE_AUTO(TestMiscellaneous); 51 TESTCASE_AUTO(TestStackAllocation); 52 TESTCASE_AUTO(TestUnescape); 53 TESTCASE_AUTO(TestCountChar32); 54 TESTCASE_AUTO(TestStringEnumeration); 55 TESTCASE_AUTO(TestNameSpace); 56 TESTCASE_AUTO(TestUTF32); 57 TESTCASE_AUTO(TestUTF8); 58 TESTCASE_AUTO(TestReadOnlyAlias); 59 TESTCASE_AUTO(TestAppendable); 60 TESTCASE_AUTO(TestUnicodeStringImplementsAppendable); 61 TESTCASE_AUTO(TestSizeofUnicodeString); 62 TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated); 63 TESTCASE_AUTO(TestMoveSwap); 64 TESTCASE_AUTO(TestUInt16Pointers); 65 TESTCASE_AUTO(TestWCharPointers); 66 TESTCASE_AUTO(TestNullPointers); 67 TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf); 68 TESTCASE_AUTO_END; 69 } 70 71 void 72 UnicodeStringTest::TestBasicManipulation() 73 { 74 UnicodeString test1("Now is the time for all men to come swiftly to the aid of the party.\n"); 75 UnicodeString expectedValue; 76 UnicodeString *c; 77 78 c=(UnicodeString *)test1.clone(); 79 test1.insert(24, "good "); 80 expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n"; 81 if (test1 != expectedValue) 82 errln("insert() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\""); 83 84 c->insert(24, "good "); 85 if(*c != expectedValue) { 86 errln("clone()->insert() failed: expected \"" + expectedValue + "\"\n,got \"" + *c + "\""); 87 } 88 delete c; 89 90 test1.remove(41, 8); 91 expectedValue = "Now is the time for all good men to come to the aid of the party.\n"; 92 if (test1 != expectedValue) 93 errln("remove() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\""); 94 95 test1.replace(58, 6, "ir country"); 96 expectedValue = "Now is the time for all good men to come to the aid of their country.\n"; 97 if (test1 != expectedValue) 98 errln("replace() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\""); 99 100 UChar temp[80]; 101 test1.extract(0, 15, temp); 102 103 UnicodeString test2(temp, 15); 104 105 expectedValue = "Now is the time"; 106 if (test2 != expectedValue) 107 errln("extract() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\""); 108 109 test2 += " for me to go!\n"; 110 expectedValue = "Now is the time for me to go!\n"; 111 if (test2 != expectedValue) 112 errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\""); 113 114 if (test1.length() != 70) 115 errln(UnicodeString("length() failed: expected 70, got ") + test1.length()); 116 if (test2.length() != 30) 117 errln(UnicodeString("length() failed: expected 30, got ") + test2.length()); 118 119 UnicodeString test3; 120 test3.append((UChar32)0x20402); 121 if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){ 122 errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3)); 123 } 124 if(test3.length() != 2){ 125 errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length()); 126 } 127 test3.append((UChar32)0x0074); 128 if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){ 129 errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3)); 130 } 131 if(test3.length() != 3){ 132 errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length()); 133 } 134 135 // test some UChar32 overloads 136 if( test3.setTo((UChar32)0x10330).length() != 2 || 137 test3.insert(0, (UChar32)0x20100).length() != 4 || 138 test3.replace(2, 2, (UChar32)0xe0061).length() != 4 || 139 (test3 = (UChar32)0x14001).length() != 2 140 ) { 141 errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed"); 142 } 143 144 { 145 // test moveIndex32() 146 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 147 148 if( 149 s.moveIndex32(2, -1)!=0 || 150 s.moveIndex32(2, 1)!=4 || 151 s.moveIndex32(2, 2)!=5 || 152 s.moveIndex32(5, -2)!=2 || 153 s.moveIndex32(0, -1)!=0 || 154 s.moveIndex32(6, 1)!=6 155 ) { 156 errln("UnicodeString::moveIndex32() failed"); 157 } 158 159 if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) { 160 errln("UnicodeString::getChar32Start() failed"); 161 } 162 163 if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) { 164 errln("UnicodeString::getChar32Limit() failed"); 165 } 166 } 167 168 { 169 // test new 2.2 constructors and setTo function that parallel Java's substring function. 170 UnicodeString src("Hello folks how are you?"); 171 UnicodeString target1("how are you?"); 172 if (target1 != UnicodeString(src, 12)) { 173 errln("UnicodeString(const UnicodeString&, int32_t) failed"); 174 } 175 UnicodeString target2("folks"); 176 if (target2 != UnicodeString(src, 6, 5)) { 177 errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed"); 178 } 179 if (target1 != target2.setTo(src, 12)) { 180 errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed"); 181 } 182 } 183 184 { 185 // op+ is new in ICU 2.8 186 UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", ""); 187 if(s!=UnicodeString("abcdefghi", "")) { 188 errln("operator+(UniStr, UniStr) failed"); 189 } 190 } 191 192 { 193 // tests for Jitterbug 2360 194 // verify that APIs with source pointer + length accept length == -1 195 // mostly test only where modified, only few functions did not already do this 196 if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) { 197 errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1"); 198 } 199 200 UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff }; 201 UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer)); 202 203 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) { 204 errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1"); 205 } 206 if(t.length()!=u_strlen(buffer)) { 207 errln("UnicodeString(buffer, length, capacity) does not work with length==-1"); 208 } 209 210 if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) { 211 errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1"); 212 } 213 if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) { 214 errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work"); 215 } 216 217 buffer[u_strlen(buffer)]=0xe4; 218 UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer)); 219 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) { 220 errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1"); 221 } 222 if(u.length()!=UPRV_LENGTHOF(buffer)) { 223 errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1"); 224 } 225 226 static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 }; 227 UConverter *cnv; 228 UErrorCode errorCode=U_ZERO_ERROR; 229 230 cnv=ucnv_open("ISO-8859-1", &errorCode); 231 UnicodeString v(cs, -1, cnv, errorCode); 232 ucnv_close(cnv); 233 if(v!=CharsToUnicodeString("a\\xe4\\x85")) { 234 errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1"); 235 } 236 } 237 238 #if U_CHARSET_IS_UTF8 239 { 240 // Test the hardcoded-UTF-8 UnicodeString optimizations. 241 static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 }; 242 static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 }; 243 UnicodeString from8a = UnicodeString((const char *)utf8); 244 UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1); 245 UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16)); 246 if(from8a != from16 || from8b != from16) { 247 errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed"); 248 } 249 char buffer[16]; 250 int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer)); 251 if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) { 252 errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed"); 253 } 254 length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer)); 255 if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) { 256 errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed"); 257 } 258 } 259 #endif 260 } 261 262 void 263 UnicodeStringTest::TestCompare() 264 { 265 UnicodeString test1("this is a test"); 266 UnicodeString test2("this is a test"); 267 UnicodeString test3("this is a test of the emergency broadcast system"); 268 UnicodeString test4("never say, \"this is a test\"!!"); 269 270 UnicodeString test5((UChar)0x5000); 271 UnicodeString test6((UChar)0x5100); 272 273 UChar uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 274 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 }; 275 char chars[] = "this is a test"; 276 277 // test operator== and operator!= 278 if (test1 != test2 || test1 == test3 || test1 == test4) 279 errln("operator== or operator!= failed"); 280 281 // test operator> and operator< 282 if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) || 283 !(test5 < test6) 284 ) { 285 errln("operator> or operator< failed"); 286 } 287 288 // test operator>= and operator<= 289 if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4)) 290 errln("operator>= or operator<= failed"); 291 292 // test compare(UnicodeString) 293 if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0) 294 errln("compare(UnicodeString) failed"); 295 296 //test compare(offset, length, UnicodeString) 297 if(test1.compare(0, 14, test2) != 0 || 298 test3.compare(0, 14, test2) != 0 || 299 test4.compare(12, 14, test2) != 0 || 300 test3.compare(0, 18, test1) <=0 ) 301 errln("compare(offset, length, UnicodeString) failes"); 302 303 // test compare(UChar*) 304 if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0) 305 errln("compare(UChar*) failed"); 306 307 // test compare(char*) 308 if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0) 309 errln("compare(char*) failed"); 310 311 // test compare(UChar*, length) 312 if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0) 313 errln("compare(UChar*, length) failed"); 314 315 // test compare(thisOffset, thisLength, that, thatOffset, thatLength) 316 if (test1.compare(0, 14, test2, 0, 14) != 0 317 || test1.compare(0, 14, test3, 0, 14) != 0 318 || test1.compare(0, 14, test4, 12, 14) != 0) 319 errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed"); 320 321 if (test1.compare(10, 4, test2, 0, 4) >= 0 322 || test1.compare(10, 4, test3, 22, 9) <= 0 323 || test1.compare(10, 4, test4, 22, 4) != 0) 324 errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed"); 325 326 // test compareBetween 327 if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0 328 || test1.compareBetween(0, 14, test4, 12, 26) != 0) 329 errln("compareBetween failed"); 330 331 if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0 332 || test1.compareBetween(10, 14, test4, 22, 26) != 0) 333 errln("compareBetween failed"); 334 335 // test compare() etc. with strings that share a buffer but are not equal 336 test2=test1; // share the buffer, length() too large for the stackBuffer 337 test2.truncate(1); // change only the length, not the buffer 338 if( test1==test2 || test1<=test2 || 339 test1.compare(test2)<=0 || 340 test1.compareCodePointOrder(test2)<=0 || 341 test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 || 342 test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 || 343 test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 || 344 test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0 345 ) { 346 errln("UnicodeStrings that share a buffer but have different lengths compare as equal"); 347 } 348 349 /* test compareCodePointOrder() */ 350 { 351 /* these strings are in ascending order */ 352 static const UChar strings[][4]={ 353 { 0x61, 0 }, /* U+0061 */ 354 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */ 355 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */ 356 { 0xd800, 0 }, /* U+d800 */ 357 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */ 358 { 0xdfff, 0 }, /* U+dfff */ 359 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */ 360 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */ 361 { 0xd800, 0xdc02, 0 }, /* U+10002 */ 362 { 0xd84d, 0xdc56, 0 } /* U+23456 */ 363 }; 364 UnicodeString u[20]; // must be at least as long as strings[] 365 int32_t i; 366 367 for(i=0; i<UPRV_LENGTHOF(strings); ++i) { 368 u[i]=UnicodeString(TRUE, strings[i], -1); 369 } 370 371 for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) { 372 if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) { 373 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i); 374 } 375 } 376 } 377 378 /* test caseCompare() */ 379 { 380 static const UChar 381 _mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 }, 382 _otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 }, 383 _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 }, 384 _different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 }; 385 386 UnicodeString 387 mixed(TRUE, _mixed, -1), 388 otherDefault(TRUE, _otherDefault, -1), 389 otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1), 390 different(TRUE, _different, -1); 391 392 int8_t result; 393 394 /* test caseCompare() */ 395 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT); 396 if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) { 397 errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result); 398 } 399 result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I); 400 if(result!=0) { 401 errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result); 402 } 403 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I); 404 if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) { 405 errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n"); 406 } 407 408 /* test caseCompare() */ 409 result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT); 410 if(result<=0) { 411 errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result); 412 } 413 414 /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */ 415 result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT); 416 if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) { 417 errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result); 418 } 419 420 /* test caseCompare() - stop in the middle of the sharp s (U+00df) */ 421 result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT); 422 if(result<=0) { 423 errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result); 424 } 425 } 426 427 // test that srcLength=-1 is handled in functions that 428 // take input const UChar */int32_t srcLength (j785) 429 { 430 static const UChar u[]={ 0x61, 0x308, 0x62, 0 }; 431 UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape(); 432 433 if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) { 434 errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work"); 435 } 436 437 if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) { 438 errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work"); 439 } 440 441 if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) { 442 errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work"); 443 } 444 445 if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) { 446 errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work"); 447 } 448 449 if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) { 450 errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work"); 451 } 452 453 UnicodeString s2, s3; 454 s2.replace(0, 0, u+1, -1); 455 s3.replace(0, 0, u, 1, -1); 456 if(s.compare(1, 999, s2)!=0 || s2!=s3) { 457 errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work"); 458 } 459 } 460 } 461 462 void 463 UnicodeStringTest::TestExtract() 464 { 465 UnicodeString test1("Now is the time for all good men to come to the aid of their country.", ""); 466 UnicodeString test2; 467 UChar test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13}; 468 char test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13}; 469 UnicodeString test5; 470 char test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13}; 471 472 test1.extract(11, 12, test2); 473 test1.extract(11, 12, test3); 474 if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) { 475 errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer."); 476 } 477 478 // test proper pinning in extractBetween() 479 test1.extractBetween(-3, 7, test5); 480 if(test5!=UNICODE_STRING("Now is ", 7)) { 481 errln("UnicodeString.extractBetween(-3, 7) did not pin properly."); 482 } 483 484 test1.extractBetween(11, 23, test5); 485 if (test1.extract(60, 71, test6) != 9) { 486 errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer."); 487 } 488 if (test1.extract(11, 12, test6) != 12) { 489 errln("UnicodeString.extract() failed to return the correct size of destination buffer."); 490 } 491 492 // convert test4 back to Unicode for comparison 493 UnicodeString test4b(test4, 12); 494 495 if (test1.extract(11, 12, (char *)NULL) != 12) { 496 errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer."); 497 } 498 if (test1.extract(11, -1, test6) != 0) { 499 errln("UnicodeString.extract(-1) failed to stop reading the string."); 500 } 501 502 for (int32_t i = 0; i < 12; i++) { 503 if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) { 504 errln(UnicodeString("extracting into a UnicodeString failed at position ") + i); 505 break; 506 } 507 if (test1.charAt((int32_t)(11 + i)) != test3[i]) { 508 errln(UnicodeString("extracting into an array of UChar failed at position ") + i); 509 break; 510 } 511 if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) { 512 errln(UnicodeString("extracting into an array of char failed at position ") + i); 513 break; 514 } 515 if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) { 516 errln(UnicodeString("extracting with extractBetween failed at position ") + i); 517 break; 518 } 519 } 520 521 // test preflighting and overflows with invariant conversion 522 if (test1.extract(0, 10, (char *)NULL, "") != 10) { 523 errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10"); 524 } 525 526 test4[2] = (char)0xff; 527 if (test1.extract(0, 10, test4, 2, "") != 10) { 528 errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10"); 529 } 530 if (test4[2] != (char)0xff) { 531 errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]"); 532 } 533 534 { 535 // test new, NUL-terminating extract() function 536 UnicodeString s("terminate", ""); 537 UChar dest[20]={ 538 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 539 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5 540 }; 541 UErrorCode errorCode; 542 int32_t length; 543 544 errorCode=U_ZERO_ERROR; 545 length=s.extract((UChar *)NULL, 0, errorCode); 546 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) { 547 errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode)); 548 } 549 550 errorCode=U_ZERO_ERROR; 551 length=s.extract(dest, s.length()-1, errorCode); 552 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) { 553 errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", 554 length, u_errorName(errorCode), s.length()); 555 } 556 557 errorCode=U_ZERO_ERROR; 558 length=s.extract(dest, s.length(), errorCode); 559 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) { 560 errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)", 561 length, u_errorName(errorCode), s.length()); 562 } 563 if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) { 564 errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly"); 565 } 566 567 errorCode=U_ZERO_ERROR; 568 length=s.extract(dest, s.length()+1, errorCode); 569 if(errorCode!=U_ZERO_ERROR || length!=s.length()) { 570 errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)", 571 length, u_errorName(errorCode), s.length()); 572 } 573 if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) { 574 errln("UnicodeString.extract(dest large enough) did not extract the string correctly"); 575 } 576 } 577 578 { 579 // test new UConverter extract() and constructor 580 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 581 char buffer[32]; 582 static const char expect[]={ 583 (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99, 584 (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f, 585 (char)0xc3, (char)0x84, 586 (char)0xe1, (char)0xbb, (char)0x90 587 }; 588 UErrorCode errorCode=U_ZERO_ERROR; 589 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 590 int32_t length; 591 592 if(U_SUCCESS(errorCode)) { 593 // test preflighting 594 if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 || 595 errorCode!=U_BUFFER_OVERFLOW_ERROR 596 ) { 597 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)", 598 length, u_errorName(errorCode)); 599 } 600 errorCode=U_ZERO_ERROR; 601 if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 || 602 errorCode!=U_BUFFER_OVERFLOW_ERROR 603 ) { 604 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)", 605 length, u_errorName(errorCode)); 606 } 607 608 // try error cases 609 errorCode=U_ZERO_ERROR; 610 if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) { 611 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination"); 612 } 613 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 614 if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) { 615 errln("UnicodeString::extract(UConverter) succeeded with a previous error code"); 616 } 617 errorCode=U_ZERO_ERROR; 618 619 // extract for real 620 if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 || 621 uprv_memcmp(buffer, expect, 13)!=0 || 622 buffer[13]!=0 || 623 U_FAILURE(errorCode) 624 ) { 625 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)", 626 length, u_errorName(errorCode)); 627 } 628 // Test again with just the converter name. 629 if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 || 630 uprv_memcmp(buffer, expect, 13)!=0 || 631 buffer[13]!=0 || 632 U_FAILURE(errorCode) 633 ) { 634 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)", 635 length, u_errorName(errorCode)); 636 } 637 638 // try the constructor 639 UnicodeString t(expect, sizeof(expect), cnv, errorCode); 640 if(U_FAILURE(errorCode) || s!=t) { 641 errln("UnicodeString(UConverter) conversion failed (%s)", 642 u_errorName(errorCode)); 643 } 644 645 ucnv_close(cnv); 646 } 647 } 648 } 649 650 void 651 UnicodeStringTest::TestRemoveReplace() 652 { 653 UnicodeString test1("The rain in Spain stays mainly on the plain"); 654 UnicodeString test2("eat SPAMburgers!"); 655 UChar test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 }; 656 char test4[] = "SPAM"; 657 UnicodeString& test5 = test1; 658 659 test1.replace(4, 4, test2, 4, 4); 660 test1.replace(12, 5, test3, 4); 661 test3[4] = 0; 662 test1.replace(17, 4, test3); 663 test1.replace(23, 4, test4); 664 test1.replaceBetween(37, 42, test2, 4, 8); 665 666 if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM") 667 errln("One of the replace methods failed:\n" 668 " expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n" 669 " got \"" + test1 + "\""); 670 671 test1.remove(21, 1); 672 test1.removeBetween(26, 28); 673 674 if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM") 675 errln("One of the remove methods failed:\n" 676 " expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n" 677 " got \"" + test1 + "\""); 678 679 for (int32_t i = 0; i < test1.length(); i++) { 680 if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) { 681 test1.setCharAt(i, 0x78); 682 } 683 } 684 685 if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM") 686 errln("One of the remove methods failed:\n" 687 " expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n" 688 " got \"" + test1 + "\""); 689 690 test1.remove(); 691 if (test1.length() != 0) 692 errln("Remove() failed: expected empty string, got \"" + test1 + "\""); 693 } 694 695 void 696 UnicodeStringTest::TestSearching() 697 { 698 UnicodeString test1("test test ttest tetest testesteststt"); 699 UnicodeString test2("test"); 700 UChar testChar = 0x74; 701 702 UChar32 testChar32 = 0x20402; 703 UChar testData[]={ 704 // 0 1 2 3 4 5 6 7 705 0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02, 706 707 // 8 9 10 11 12 13 14 15 708 0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071, 709 710 // 16 17 18 19 711 0xdc02, 0xd841, 0x0073, 0x0000 712 }; 713 UnicodeString test3(testData); 714 UnicodeString test4(testChar32); 715 716 uint16_t occurrences = 0; 717 int32_t startPos = 0; 718 for ( ; 719 startPos != -1 && startPos < test1.length(); 720 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0) 721 ; 722 if (occurrences != 6) 723 errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences); 724 725 for ( occurrences = 0, startPos = 10; 726 startPos != -1 && startPos < test1.length(); 727 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0) 728 ; 729 if (occurrences != 4) 730 errln(UnicodeString("indexOf with starting offset failed: " 731 "expected to find 4 occurrences, found ") + occurrences); 732 733 int32_t endPos = 28; 734 for ( occurrences = 0, startPos = 5; 735 startPos != -1 && startPos < test1.length(); 736 (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0) 737 ; 738 if (occurrences != 4) 739 errln(UnicodeString("indexOf with starting and ending offsets failed: " 740 "expected to find 4 occurrences, found ") + occurrences); 741 742 //using UChar32 string 743 for ( startPos=0, occurrences=0; 744 startPos != -1 && startPos < test3.length(); 745 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0) 746 ; 747 if (occurrences != 4) 748 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences); 749 750 for ( startPos=10, occurrences=0; 751 startPos != -1 && startPos < test3.length(); 752 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0) 753 ; 754 if (occurrences != 2) 755 errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences); 756 //--- 757 758 for ( occurrences = 0, startPos = 0; 759 startPos != -1 && startPos < test1.length(); 760 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 761 ; 762 if (occurrences != 16) 763 errln(UnicodeString("indexOf with character failed: " 764 "expected to find 16 occurrences, found ") + occurrences); 765 766 for ( occurrences = 0, startPos = 10; 767 startPos != -1 && startPos < test1.length(); 768 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 769 ; 770 if (occurrences != 12) 771 errln(UnicodeString("indexOf with character & start offset failed: " 772 "expected to find 12 occurrences, found ") + occurrences); 773 774 for ( occurrences = 0, startPos = 5, endPos = 28; 775 startPos != -1 && startPos < test1.length(); 776 (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 777 ; 778 if (occurrences != 10) 779 errln(UnicodeString("indexOf with character & start & end offsets failed: " 780 "expected to find 10 occurrences, found ") + occurrences); 781 782 //testing for UChar32 783 UnicodeString subString; 784 for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){ 785 subString.append(test3, startPos, test3.length()); 786 if(subString.indexOf(testChar32) != -1 ){ 787 ++occurrences; 788 } 789 subString.remove(); 790 } 791 if (occurrences != 14) 792 errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences); 793 794 for ( occurrences = 0, startPos = 0; 795 startPos != -1 && startPos < test3.length(); 796 (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 797 ; 798 if (occurrences != 4) 799 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences); 800 801 endPos=test3.length(); 802 for ( occurrences = 0, startPos = 5; 803 startPos != -1 && startPos < test3.length(); 804 (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 805 ; 806 if (occurrences != 3) 807 errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences); 808 //--- 809 810 if(test1.lastIndexOf(test2)!=29) { 811 errln("test1.lastIndexOf(test2)!=29"); 812 } 813 814 if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) { 815 errln("test1.lastIndexOf(test2, start) failed"); 816 } 817 818 for ( occurrences = 0, startPos = 32; 819 startPos != -1; 820 (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0) 821 ; 822 if (occurrences != 4) 823 errln(UnicodeString("lastIndexOf with starting and ending offsets failed: " 824 "expected to find 4 occurrences, found ") + occurrences); 825 826 for ( occurrences = 0, startPos = 32; 827 startPos != -1; 828 (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0) 829 ; 830 if (occurrences != 11) 831 errln(UnicodeString("lastIndexOf with character & start & end offsets failed: " 832 "expected to find 11 occurrences, found ") + occurrences); 833 834 //testing UChar32 835 startPos=test3.length(); 836 for ( occurrences = 0; 837 startPos != -1; 838 (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0) 839 ; 840 if (occurrences != 3) 841 errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences); 842 843 844 for ( occurrences = 0, endPos = test3.length(); endPos > 0; endPos -= 1){ 845 subString.remove(); 846 subString.append(test3, 0, endPos); 847 if(subString.lastIndexOf(testChar32) != -1 ){ 848 ++occurrences; 849 } 850 } 851 if (occurrences != 18) 852 errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences); 853 //--- 854 855 // test that indexOf(UChar32) and lastIndexOf(UChar32) 856 // do not find surrogate code points when they are part of matched pairs 857 // (= part of supplementary code points) 858 // Jitterbug 1542 859 if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) { 860 errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point"); 861 } 862 if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 || 863 UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 || 864 test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16 865 ) { 866 errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point"); 867 } 868 } 869 870 void 871 UnicodeStringTest::TestSpacePadding() 872 { 873 UnicodeString test1("hello"); 874 UnicodeString test2(" there"); 875 UnicodeString test3("Hi! How ya doin'? Beautiful day, isn't it?"); 876 UnicodeString test4; 877 UBool returnVal; 878 UnicodeString expectedValue; 879 880 returnVal = test1.padLeading(15); 881 expectedValue = " hello"; 882 if (returnVal == FALSE || test1 != expectedValue) 883 errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 884 885 returnVal = test2.padTrailing(15); 886 expectedValue = " there "; 887 if (returnVal == FALSE || test2 != expectedValue) 888 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\"."); 889 890 expectedValue = test3; 891 returnVal = test3.padTrailing(15); 892 if (returnVal == TRUE || test3 != expectedValue) 893 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\"."); 894 895 expectedValue = "hello"; 896 test4.setTo(test1).trim(); 897 898 if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue) 899 errln("trim(UnicodeString&) failed"); 900 901 test1.trim(); 902 if (test1 != expectedValue) 903 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 904 905 test2.trim(); 906 expectedValue = "there"; 907 if (test2 != expectedValue) 908 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\"."); 909 910 test3.trim(); 911 expectedValue = "Hi! How ya doin'? Beautiful day, isn't it?"; 912 if (test3 != expectedValue) 913 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\"."); 914 915 returnVal = test1.truncate(15); 916 expectedValue = "hello"; 917 if (returnVal == TRUE || test1 != expectedValue) 918 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 919 920 returnVal = test2.truncate(15); 921 expectedValue = "there"; 922 if (returnVal == TRUE || test2 != expectedValue) 923 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\"."); 924 925 returnVal = test3.truncate(15); 926 expectedValue = "Hi! How ya doi"; 927 if (returnVal == FALSE || test3 != expectedValue) 928 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\"."); 929 } 930 931 void 932 UnicodeStringTest::TestPrefixAndSuffix() 933 { 934 UnicodeString test1("Now is the time for all good men to come to the aid of their country."); 935 UnicodeString test2("Now"); 936 UnicodeString test3("country."); 937 UnicodeString test4("count"); 938 939 if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) { 940 errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\"."); 941 } 942 943 if (test1.startsWith(test3) || 944 test1.startsWith(test3.getBuffer(), test3.length()) || 945 test1.startsWith(test3.getTerminatedBuffer(), 0, -1) 946 ) { 947 errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\"."); 948 } 949 950 if (test1.endsWith(test2)) { 951 errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\"."); 952 } 953 954 if (!test1.endsWith(test3)) { 955 errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 956 } 957 if (!test1.endsWith(test3, 0, INT32_MAX)) { 958 errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 959 } 960 961 if(!test1.endsWith(test3.getBuffer(), test3.length())) { 962 errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 963 } 964 if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) { 965 errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 966 } 967 968 if (!test3.startsWith(test4)) { 969 errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\"."); 970 } 971 972 if (test4.startsWith(test3)) { 973 errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\"."); 974 } 975 } 976 977 void 978 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() { 979 UnicodeString test("abcde"); 980 const UChar ab[] = { 0x61, 0x62, 0 }; 981 const UChar de[] = { 0x64, 0x65, 0 }; 982 assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1)); 983 assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1)); 984 assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1)); 985 assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1)); 986 } 987 988 void 989 UnicodeStringTest::TestFindAndReplace() 990 { 991 UnicodeString test1("One potato, two potato, three potato, four\n"); 992 UnicodeString test2("potato"); 993 UnicodeString test3("MISSISSIPPI"); 994 995 UnicodeString expectedValue; 996 997 test1.findAndReplace(test2, test3); 998 expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n"; 999 if (test1 != expectedValue) 1000 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 1001 test1.findAndReplace(2, 32, test3, test2); 1002 expectedValue = "One potato, two potato, three MISSISSIPPI, four\n"; 1003 if (test1 != expectedValue) 1004 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 1005 } 1006 1007 void 1008 UnicodeStringTest::TestReverse() 1009 { 1010 UnicodeString test("backwards words say to used I"); 1011 1012 test.reverse(); 1013 test.reverse(2, 4); 1014 test.reverse(7, 2); 1015 test.reverse(10, 3); 1016 test.reverse(14, 5); 1017 test.reverse(20, 9); 1018 1019 if (test != "I used to say words backwards") 1020 errln("reverse() failed: Expected \"I used to say words backwards\",\n got \"" 1021 + test + "\""); 1022 1023 test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 1024 test.reverse(); 1025 if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) { 1026 errln("reverse() failed with supplementary characters"); 1027 } 1028 1029 // Test case for ticket #8091: 1030 // UnicodeString::reverse() failed to see a lead surrogate in the middle of 1031 // an odd-length string that contains no other lead surrogates. 1032 test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape(); 1033 UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape(); 1034 test.reverse(); 1035 if(test!=expected) { 1036 errln("reverse() failed with only lead surrogate in the middle"); 1037 } 1038 } 1039 1040 void 1041 UnicodeStringTest::TestMiscellaneous() 1042 { 1043 UnicodeString test1("This is a test"); 1044 UnicodeString test2("This is a test"); 1045 UnicodeString test3("Me too!"); 1046 1047 // test getBuffer(minCapacity) and releaseBuffer() 1048 test1=UnicodeString(); // make sure that it starts with its stackBuffer 1049 UChar *p=test1.getBuffer(20); 1050 if(test1.getCapacity()<20) { 1051 errln("UnicodeString::getBuffer(20).getCapacity()<20"); 1052 } 1053 1054 test1.append((UChar)7); // must not be able to modify the string here 1055 test1.setCharAt(3, 7); 1056 test1.reverse(); 1057 if( test1.length()!=0 || 1058 test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff || 1059 test1.getBuffer(10)!=0 || test1.getBuffer()!=0 1060 ) { 1061 errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString"); 1062 } 1063 1064 p[0]=1; 1065 p[1]=2; 1066 p[2]=3; 1067 test1.releaseBuffer(3); 1068 test1.append((UChar)4); 1069 1070 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) { 1071 errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString"); 1072 } 1073 1074 // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect 1075 test1.releaseBuffer(1); 1076 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) { 1077 errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString"); 1078 } 1079 1080 // test getBuffer(const) 1081 const UChar *q=test1.getBuffer(), *r=test1.getBuffer(); 1082 if( test1.length()!=4 || 1083 q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 || 1084 r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4 1085 ) { 1086 errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer"); 1087 } 1088 1089 // test releaseBuffer() with a NUL-terminated buffer 1090 test1.getBuffer(20)[2]=0; 1091 test1.releaseBuffer(); // implicit -1 1092 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) { 1093 errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString"); 1094 } 1095 1096 // test releaseBuffer() with a non-NUL-terminated buffer 1097 p=test1.getBuffer(256); 1098 for(int32_t i=0; i<test1.getCapacity(); ++i) { 1099 p[i]=(UChar)1; // fill the buffer with all non-NUL code units 1100 } 1101 test1.releaseBuffer(); // implicit -1 1102 if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) { 1103 errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString"); 1104 } 1105 1106 // test getTerminatedBuffer() 1107 test1=UnicodeString("This is another test.", ""); 1108 test2=UnicodeString("This is another test.", ""); 1109 q=test1.getTerminatedBuffer(); 1110 if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) { 1111 errln("getTerminatedBuffer()[length]!=0"); 1112 } 1113 1114 const UChar u[]={ 5, 6, 7, 8, 0 }; 1115 test1.setTo(FALSE, u, 3); 1116 q=test1.getTerminatedBuffer(); 1117 if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) { 1118 errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer"); 1119 } 1120 1121 test1.setTo(TRUE, u, -1); 1122 q=test1.getTerminatedBuffer(); 1123 if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) { 1124 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer"); 1125 } 1126 1127 // NOTE: Some compilers will optimize u"la" to point to the same static memory 1128 // as u" lila", offset by 3 code units 1129 test1=UnicodeString(TRUE, u"la", 2); 1130 test1.append(UnicodeString(TRUE, u" lila", 5).getTerminatedBuffer(), 0, -1); 1131 assertEquals("UnicodeString::append(const UChar *, start, length) failed", 1132 u"la lila", test1); 1133 1134 test1.insert(3, UnicodeString(TRUE, u"dudum ", 6), 0, INT32_MAX); 1135 assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed", 1136 u"la dudum lila", test1); 1137 1138 static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 }; 1139 test1.insert(9, ucs, -1); 1140 assertEquals("UnicodeString::insert(start, const UChar *, length) failed", 1141 u"la dudum hm lila", test1); 1142 1143 test1.replace(9, 2, (UChar)0x2b); 1144 assertEquals("UnicodeString::replace(start, length, UChar) failed", 1145 u"la dudum + lila", test1); 1146 1147 if(test1.hasMetaData() || UnicodeString().hasMetaData()) { 1148 errln("UnicodeString::hasMetaData() returns TRUE"); 1149 } 1150 1151 // test getTerminatedBuffer() on a truncated, shared, heap-allocated string 1152 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789."); 1153 test1.truncate(36); // ensure length()<getCapacity() 1154 test2=test1; // share the buffer 1155 test1.truncate(5); 1156 if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) { 1157 errln("UnicodeString(shared buffer).truncate() failed"); 1158 } 1159 if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) { 1160 errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() " 1161 "modified another copy of the string!"); 1162 } 1163 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789."); 1164 test1.truncate(36); // ensure length()<getCapacity() 1165 test2=test1; // share the buffer 1166 test1.remove(); 1167 if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) { 1168 errln("UnicodeString(shared buffer).remove() failed"); 1169 } 1170 if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) { 1171 errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() " 1172 "modified another copy of the string!"); 1173 } 1174 1175 // ticket #9740 1176 test1.setTo(TRUE, ucs, 3); 1177 assertEquals("length of read-only alias", 3, test1.length()); 1178 test1.trim(); 1179 assertEquals("length of read-only alias after trim()", 2, test1.length()); 1180 assertEquals("length of terminated buffer of read-only alias + trim()", 1181 2, u_strlen(test1.getTerminatedBuffer())); 1182 } 1183 1184 void 1185 UnicodeStringTest::TestStackAllocation() 1186 { 1187 UChar testString[] ={ 1188 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 }; 1189 UChar guardWord = 0x4DED; 1190 UnicodeString* test = 0; 1191 1192 test = new UnicodeString(testString); 1193 if (*test != "This is a crazy test.") 1194 errln("Test string failed to initialize properly."); 1195 if (guardWord != 0x04DED) 1196 errln("Test string initialization overwrote guard word!"); 1197 1198 test->insert(8, "only "); 1199 test->remove(15, 6); 1200 if (*test != "This is only a test.") 1201 errln("Manipulation of test string failed to work right."); 1202 if (guardWord != 0x4DED) 1203 errln("Manipulation of test string overwrote guard word!"); 1204 1205 // we have to deinitialize and release the backing store by calling the destructor 1206 // explicitly, since we can't overload operator delete 1207 delete test; 1208 1209 UChar workingBuffer[] = { 1210 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20, 1211 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20, 1212 0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1213 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 1215 UChar guardWord2 = 0x4DED; 1216 1217 test = new UnicodeString(workingBuffer, 35, 100); 1218 if (*test != "Now is the time for all men to come") 1219 errln("Stack-allocated backing store failed to initialize correctly."); 1220 if (guardWord2 != 0x4DED) 1221 errln("Stack-allocated backing store overwrote guard word!"); 1222 1223 test->insert(24, "good "); 1224 if (*test != "Now is the time for all good men to come") 1225 errln("insert() on stack-allocated UnicodeString didn't work right"); 1226 if (guardWord2 != 0x4DED) 1227 errln("insert() on stack-allocated UnicodeString overwrote guard word!"); 1228 1229 if (workingBuffer[24] != 0x67) 1230 errln("insert() on stack-allocated UnicodeString didn't affect backing store"); 1231 1232 *test += " to the aid of their country."; 1233 if (*test != "Now is the time for all good men to come to the aid of their country.") 1234 errln("Stack-allocated UnicodeString overflow didn't work"); 1235 if (guardWord2 != 0x4DED) 1236 errln("Stack-allocated UnicodeString overflow overwrote guard word!"); 1237 1238 *test = "ha!"; 1239 if (*test != "ha!") 1240 errln("Assignment to stack-allocated UnicodeString didn't work"); 1241 if (workingBuffer[0] != 0x4e) 1242 errln("Change to UnicodeString after overflow are still affecting original buffer"); 1243 if (guardWord2 != 0x4DED) 1244 errln("Change to UnicodeString after overflow overwrote guard word!"); 1245 1246 // test read-only aliasing with setTo() 1247 workingBuffer[0] = 0x20ac; 1248 workingBuffer[1] = 0x125; 1249 workingBuffer[2] = 0; 1250 test->setTo(TRUE, workingBuffer, 2); 1251 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) { 1252 errln("UnicodeString.setTo(readonly alias) does not alias correctly"); 1253 } 1254 1255 UnicodeString *c=(UnicodeString *)test->clone(); 1256 1257 workingBuffer[1] = 0x109; 1258 if(test->charAt(1) != 0x109) { 1259 errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer"); 1260 } 1261 1262 if(c->length() != 2 || c->charAt(1) != 0x125) { 1263 errln("clone(alias) did not copy the buffer"); 1264 } 1265 delete c; 1266 1267 test->setTo(TRUE, workingBuffer, -1); 1268 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) { 1269 errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly"); 1270 } 1271 1272 test->setTo(FALSE, workingBuffer, -1); 1273 if(!test->isBogus()) { 1274 errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()"); 1275 } 1276 1277 delete test; 1278 1279 test=new UnicodeString(); 1280 UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000}; 1281 test->setTo(buffer, 4, 10); 1282 if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 || 1283 test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){ 1284 errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test)); 1285 } 1286 delete test; 1287 1288 1289 // test the UChar32 constructor 1290 UnicodeString c32Test((UChar32)0x10ff2a); 1291 if( c32Test.length() != U16_LENGTH(0x10ff2a) || 1292 c32Test.char32At(c32Test.length() - 1) != 0x10ff2a 1293 ) { 1294 errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler"); 1295 } 1296 1297 // test the (new) capacity constructor 1298 UnicodeString capTest(5, (UChar32)0x2a, 5); 1299 if( capTest.length() != 5 * U16_LENGTH(0x2a) || 1300 capTest.char32At(0) != 0x2a || 1301 capTest.char32At(4) != 0x2a 1302 ) { 1303 errln("The UnicodeString capacity constructor does not work with an ASCII filler"); 1304 } 1305 1306 capTest = UnicodeString(5, (UChar32)0x10ff2a, 5); 1307 if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) || 1308 capTest.char32At(0) != 0x10ff2a || 1309 capTest.char32At(4) != 0x10ff2a 1310 ) { 1311 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler"); 1312 } 1313 1314 capTest = UnicodeString(5, (UChar32)0, 0); 1315 if(capTest.length() != 0) { 1316 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler"); 1317 } 1318 } 1319 1320 /** 1321 * Test the unescape() function. 1322 */ 1323 void UnicodeStringTest::TestUnescape(void) { 1324 UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV); 1325 UnicodeString OUT("abc"); 1326 OUT.append((UChar)0x4567); 1327 OUT.append(" "); 1328 OUT.append((UChar)0xA); 1329 OUT.append((UChar)0xD); 1330 OUT.append(" "); 1331 OUT.append((UChar32)0x00101234); 1332 OUT.append("xyz"); 1333 OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b); 1334 UnicodeString result = IN.unescape(); 1335 if (result != OUT) { 1336 errln("FAIL: " + prettify(IN) + ".unescape() -> " + 1337 prettify(result) + ", expected " + 1338 prettify(OUT)); 1339 } 1340 1341 // test that an empty string is returned in case of an error 1342 if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) { 1343 errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string"); 1344 } 1345 } 1346 1347 /* test code point counting functions --------------------------------------- */ 1348 1349 /* reference implementation of UnicodeString::hasMoreChar32Than() */ 1350 static int32_t 1351 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) { 1352 int32_t count=s.countChar32(start, length); 1353 return count>number; 1354 } 1355 1356 /* compare the real function against the reference */ 1357 void 1358 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) { 1359 if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) { 1360 errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n", 1361 start, length, number, s.hasMoreChar32Than(start, length, number)); 1362 } 1363 } 1364 1365 void 1366 UnicodeStringTest::TestCountChar32(void) { 1367 { 1368 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 1369 1370 // test countChar32() 1371 // note that this also calls and tests u_countChar32(length>=0) 1372 if( 1373 s.countChar32()!=4 || 1374 s.countChar32(1)!=4 || 1375 s.countChar32(2)!=3 || 1376 s.countChar32(2, 3)!=2 || 1377 s.countChar32(2, 0)!=0 1378 ) { 1379 errln("UnicodeString::countChar32() failed"); 1380 } 1381 1382 // NUL-terminate the string buffer and test u_countChar32(length=-1) 1383 const UChar *buffer=s.getTerminatedBuffer(); 1384 if( 1385 u_countChar32(buffer, -1)!=4 || 1386 u_countChar32(buffer+1, -1)!=4 || 1387 u_countChar32(buffer+2, -1)!=3 || 1388 u_countChar32(buffer+3, -1)!=3 || 1389 u_countChar32(buffer+4, -1)!=2 || 1390 u_countChar32(buffer+5, -1)!=1 || 1391 u_countChar32(buffer+6, -1)!=0 1392 ) { 1393 errln("u_countChar32(length=-1) failed"); 1394 } 1395 1396 // test u_countChar32() with bad input 1397 if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) { 1398 errln("u_countChar32(bad input) failed (returned non-zero counts)"); 1399 } 1400 } 1401 1402 /* test data and variables for hasMoreChar32Than() */ 1403 static const UChar str[]={ 1404 0x61, 0x62, 0xd800, 0xdc00, 1405 0xd801, 0xdc01, 0x63, 0xd802, 1406 0x64, 0xdc03, 0x65, 0x66, 1407 0xd804, 0xdc04, 0xd805, 0xdc05, 1408 0x67 1409 }; 1410 UnicodeString string(str, UPRV_LENGTHOF(str)); 1411 int32_t start, length, number; 1412 1413 /* test hasMoreChar32Than() */ 1414 for(length=string.length(); length>=0; --length) { 1415 for(start=0; start<=length; ++start) { 1416 for(number=-1; number<=((length-start)+2); ++number) { 1417 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number); 1418 } 1419 } 1420 } 1421 1422 /* test hasMoreChar32Than() with pinning */ 1423 for(start=-1; start<=string.length()+1; ++start) { 1424 for(number=-1; number<=((string.length()-start)+2); ++number) { 1425 _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number); 1426 } 1427 } 1428 1429 /* test hasMoreChar32Than() with a bogus string */ 1430 string.setToBogus(); 1431 for(length=-1; length<=1; ++length) { 1432 for(start=-1; start<=length; ++start) { 1433 for(number=-1; number<=((length-start)+2); ++number) { 1434 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number); 1435 } 1436 } 1437 } 1438 } 1439 1440 void 1441 UnicodeStringTest::TestBogus() { 1442 UnicodeString test1("This is a test"); 1443 UnicodeString test2("This is a test"); 1444 UnicodeString test3("Me too!"); 1445 1446 // test isBogus() and setToBogus() 1447 if (test1.isBogus() || test2.isBogus() || test3.isBogus()) { 1448 errln("A string returned TRUE for isBogus()!"); 1449 } 1450 1451 // NULL pointers are treated like empty strings 1452 // use other illegal arguments to make a bogus string 1453 test3.setTo(FALSE, test1.getBuffer(), -2); 1454 if(!test3.isBogus()) { 1455 errln("A bogus string returned FALSE for isBogus()!"); 1456 } 1457 if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) { 1458 errln("hashCode() failed"); 1459 } 1460 if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) { 1461 errln("bogus.getBuffer()!=0"); 1462 } 1463 if (test1.indexOf(test3) != -1) { 1464 errln("bogus.indexOf() != -1"); 1465 } 1466 if (test1.lastIndexOf(test3) != -1) { 1467 errln("bogus.lastIndexOf() != -1"); 1468 } 1469 if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) { 1470 errln("caseCompare() doesn't work with bogus strings"); 1471 } 1472 if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) { 1473 errln("compareCodePointOrder() doesn't work with bogus strings"); 1474 } 1475 1476 // verify that non-assignment modifications fail and do not revive a bogus string 1477 test3.setToBogus(); 1478 test3.append((UChar)0x61); 1479 if(!test3.isBogus() || test3.getBuffer()!=0) { 1480 errln("bogus.append('a') worked but must not"); 1481 } 1482 1483 test3.setToBogus(); 1484 test3.findAndReplace(UnicodeString((UChar)0x61), test2); 1485 if(!test3.isBogus() || test3.getBuffer()!=0) { 1486 errln("bogus.findAndReplace() worked but must not"); 1487 } 1488 1489 test3.setToBogus(); 1490 test3.trim(); 1491 if(!test3.isBogus() || test3.getBuffer()!=0) { 1492 errln("bogus.trim() revived bogus but must not"); 1493 } 1494 1495 test3.setToBogus(); 1496 test3.remove(1); 1497 if(!test3.isBogus() || test3.getBuffer()!=0) { 1498 errln("bogus.remove(1) revived bogus but must not"); 1499 } 1500 1501 test3.setToBogus(); 1502 if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) { 1503 errln("bogus.setCharAt(0, 'b') worked but must not"); 1504 } 1505 1506 test3.setToBogus(); 1507 if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) { 1508 errln("bogus.truncate(1) revived bogus but must not"); 1509 } 1510 1511 // verify that assignments revive a bogus string 1512 test3.setToBogus(); 1513 if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) { 1514 errln("bogus.operator=() failed"); 1515 } 1516 1517 test3.setToBogus(); 1518 if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) { 1519 errln("bogus.fastCopyFrom() failed"); 1520 } 1521 1522 test3.setToBogus(); 1523 if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) { 1524 errln("bogus.setTo(UniStr) failed"); 1525 } 1526 1527 test3.setToBogus(); 1528 if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) { 1529 errln("bogus.setTo(UniStr, 0) failed"); 1530 } 1531 1532 test3.setToBogus(); 1533 if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) { 1534 errln("bogus.setTo(UniStr, 0, len) failed"); 1535 } 1536 1537 test3.setToBogus(); 1538 if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) { 1539 errln("bogus.setTo(const UChar *, len) failed"); 1540 } 1541 1542 test3.setToBogus(); 1543 if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) { 1544 errln("bogus.setTo(UChar) failed"); 1545 } 1546 1547 test3.setToBogus(); 1548 if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) { 1549 errln("bogus.setTo(UChar32) failed"); 1550 } 1551 1552 test3.setToBogus(); 1553 if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) { 1554 errln("bogus.setTo(readonly alias) failed"); 1555 } 1556 1557 // writable alias to another string's buffer: very bad idea, just convenient for this test 1558 test3.setToBogus(); 1559 if(!test3.isBogus() || 1560 test3.setTo(const_cast<UChar *>(test1.getBuffer()), 1561 test1.length(), test1.getCapacity()).isBogus() || 1562 test3!=test1) { 1563 errln("bogus.setTo(writable alias) failed"); 1564 } 1565 1566 // verify simple, documented ways to turn a bogus string into an empty one 1567 test3.setToBogus(); 1568 if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) { 1569 errln("bogus.operator=(UnicodeString()) failed"); 1570 } 1571 1572 test3.setToBogus(); 1573 if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) { 1574 errln("bogus.setTo(UnicodeString()) failed"); 1575 } 1576 1577 test3.setToBogus(); 1578 if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) { 1579 errln("bogus.remove() failed"); 1580 } 1581 1582 test3.setToBogus(); 1583 if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) { 1584 errln("bogus.remove(0, INT32_MAX) failed"); 1585 } 1586 1587 test3.setToBogus(); 1588 if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) { 1589 errln("bogus.truncate(0) failed"); 1590 } 1591 1592 test3.setToBogus(); 1593 if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) { 1594 errln("bogus.setTo((UChar32)-1) failed"); 1595 } 1596 1597 static const UChar nul=0; 1598 1599 test3.setToBogus(); 1600 if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) { 1601 errln("bogus.setTo(&nul, 0) failed"); 1602 } 1603 1604 test3.setToBogus(); 1605 if(!test3.isBogus() || test3.getBuffer()!=0) { 1606 errln("setToBogus() failed to make a string bogus"); 1607 } 1608 1609 test3.setToBogus(); 1610 if(test1.isBogus() || !(test1=test3).isBogus()) { 1611 errln("normal=bogus failed to make the left string bogus"); 1612 } 1613 1614 // test that NULL primitive input string values are treated like 1615 // empty strings, not errors (bogus) 1616 test2.setTo((UChar32)0x10005); 1617 if(test2.insert(1, nullptr, 1).length()!=2) { 1618 errln("UniStr.insert(...nullptr...) should not modify the string but does"); 1619 } 1620 1621 UErrorCode errorCode=U_ZERO_ERROR; 1622 UnicodeString 1623 test4((const UChar *)NULL), 1624 test5(TRUE, (const UChar *)NULL, 1), 1625 test6((UChar *)NULL, 5, 5), 1626 test7((const char *)NULL, 3, NULL, errorCode); 1627 if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) { 1628 errln("a constructor set to bogus for a NULL input string, should be empty"); 1629 } 1630 1631 test4.setTo(NULL, 3); 1632 test5.setTo(TRUE, (const UChar *)NULL, 1); 1633 test6.setTo((UChar *)NULL, 5, 5); 1634 if(test4.isBogus() || test5.isBogus() || test6.isBogus()) { 1635 errln("a setTo() set to bogus for a NULL input string, should be empty"); 1636 } 1637 1638 // test that bogus==bogus<any 1639 if(test1!=test3 || test1.compare(test3)!=0) { 1640 errln("bogus==bogus failed"); 1641 } 1642 1643 test2.remove(); 1644 if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) { 1645 errln("bogus<empty failed"); 1646 } 1647 } 1648 1649 // StringEnumeration ------------------------------------------------------- *** 1650 // most of StringEnumeration is tested elsewhere 1651 // this test improves code coverage 1652 1653 static const char *const 1654 testEnumStrings[]={ 1655 "a", 1656 "b", 1657 "c", 1658 "this is a long string which helps us test some buffer limits", 1659 "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee" 1660 }; 1661 1662 class TestEnumeration : public StringEnumeration { 1663 public: 1664 TestEnumeration() : i(0) {} 1665 1666 virtual int32_t count(UErrorCode& /*status*/) const { 1667 return UPRV_LENGTHOF(testEnumStrings); 1668 } 1669 1670 virtual const UnicodeString *snext(UErrorCode &status) { 1671 if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) { 1672 unistr=UnicodeString(testEnumStrings[i++], ""); 1673 return &unistr; 1674 } 1675 1676 return NULL; 1677 } 1678 1679 virtual void reset(UErrorCode& /*status*/) { 1680 i=0; 1681 } 1682 1683 static inline UClassID getStaticClassID() { 1684 return (UClassID)&fgClassID; 1685 } 1686 virtual UClassID getDynamicClassID() const { 1687 return getStaticClassID(); 1688 } 1689 1690 private: 1691 static const char fgClassID; 1692 1693 int32_t i; 1694 }; 1695 1696 const char TestEnumeration::fgClassID=0; 1697 1698 void 1699 UnicodeStringTest::TestStringEnumeration() { 1700 UnicodeString s; 1701 TestEnumeration ten; 1702 int32_t i, length; 1703 UErrorCode status; 1704 1705 const UChar *pu; 1706 const char *pc; 1707 1708 // test the next() default implementation and ensureCharsCapacity() 1709 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) { 1710 status=U_ZERO_ERROR; 1711 pc=ten.next(&length, status); 1712 s=UnicodeString(testEnumStrings[i], ""); 1713 if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) { 1714 errln("StringEnumeration.next(%d) failed", i); 1715 } 1716 } 1717 status=U_ZERO_ERROR; 1718 if(ten.next(&length, status)!=NULL) { 1719 errln("StringEnumeration.next(done)!=NULL"); 1720 } 1721 1722 // test the unext() default implementation 1723 ten.reset(status); 1724 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) { 1725 status=U_ZERO_ERROR; 1726 pu=ten.unext(&length, status); 1727 s=UnicodeString(testEnumStrings[i], ""); 1728 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) { 1729 errln("StringEnumeration.unext(%d) failed", i); 1730 } 1731 } 1732 status=U_ZERO_ERROR; 1733 if(ten.unext(&length, status)!=NULL) { 1734 errln("StringEnumeration.unext(done)!=NULL"); 1735 } 1736 1737 // test that the default clone() implementation works, and returns NULL 1738 if(ten.clone()!=NULL) { 1739 errln("StringEnumeration.clone()!=NULL"); 1740 } 1741 1742 // test that uenum_openFromStringEnumeration() works 1743 // Need a heap allocated string enumeration because it is adopted by the UEnumeration. 1744 StringEnumeration *newTen = new TestEnumeration; 1745 status=U_ZERO_ERROR; 1746 UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status); 1747 if (uten==NULL || U_FAILURE(status)) { 1748 errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status)); 1749 return; 1750 } 1751 1752 // test uenum_next() 1753 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) { 1754 status=U_ZERO_ERROR; 1755 pc=uenum_next(uten, &length, &status); 1756 if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) { 1757 errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i); 1758 } 1759 } 1760 status=U_ZERO_ERROR; 1761 if(uenum_next(uten, &length, &status)!=NULL) { 1762 errln("File %s, line %d, uenum_next(done)!=NULL"); 1763 } 1764 1765 // test the uenum_unext() 1766 uenum_reset(uten, &status); 1767 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) { 1768 status=U_ZERO_ERROR; 1769 pu=uenum_unext(uten, &length, &status); 1770 s=UnicodeString(testEnumStrings[i], ""); 1771 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) { 1772 errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i); 1773 } 1774 } 1775 status=U_ZERO_ERROR; 1776 if(uenum_unext(uten, &length, &status)!=NULL) { 1777 errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__); 1778 } 1779 1780 uenum_close(uten); 1781 } 1782 1783 /* 1784 * Namespace test, to make sure that macros like UNICODE_STRING include the 1785 * namespace qualifier. 1786 * 1787 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity. 1788 */ 1789 namespace bogus { 1790 class UnicodeString { 1791 public: 1792 enum EInvariant { kInvariant }; 1793 UnicodeString() : i(1) {} 1794 UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;} 1795 UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/ 1796 ) : i(length) {} 1797 private: 1798 int32_t i; 1799 }; 1800 } 1801 1802 void 1803 UnicodeStringTest::TestNameSpace() { 1804 // Provoke name collision unless the UnicodeString macros properly 1805 // qualify the icu::UnicodeString class. 1806 using namespace bogus; 1807 1808 // Use all UnicodeString macros from unistr.h. 1809 icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV); 1810 icu::UnicodeString s2=UNICODE_STRING("def", 3); 1811 icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi"); 1812 1813 // Make sure the compiler does not optimize away instantiation of s1, s2, s3. 1814 icu::UnicodeString s4=s1+s2+s3; 1815 if(s4.length()!=9) { 1816 errln("Something wrong with UnicodeString::operator+()."); 1817 } 1818 } 1819 1820 void 1821 UnicodeStringTest::TestUTF32() { 1822 // Input string length US_STACKBUF_SIZE to cause overflow of the 1823 // initially chosen fStackBuffer due to supplementary characters. 1824 static const UChar32 utf32[] = { 1825 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 1826 0x10000, 0x20000, 0xe0000, 0x10ffff 1827 }; 1828 static const UChar expected_utf16[] = { 1829 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 1830 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff 1831 }; 1832 UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32)); 1833 UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16)); 1834 if(from32 != expected) { 1835 errln("UnicodeString::fromUTF32() did not create the expected string."); 1836 } 1837 1838 static const UChar utf16[] = { 1839 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff 1840 }; 1841 static const UChar32 expected_utf32[] = { 1842 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff 1843 }; 1844 UChar32 result32[16]; 1845 UErrorCode errorCode = U_ZERO_ERROR; 1846 int32_t length32 = 1847 UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)). 1848 toUTF32(result32, UPRV_LENGTHOF(result32), errorCode); 1849 if( length32 != UPRV_LENGTHOF(expected_utf32) || 1850 0 != uprv_memcmp(result32, expected_utf32, length32*4) || 1851 result32[length32] != 0 1852 ) { 1853 errln("UnicodeString::toUTF32() did not create the expected string."); 1854 } 1855 } 1856 1857 class TestCheckedArrayByteSink : public CheckedArrayByteSink { 1858 public: 1859 TestCheckedArrayByteSink(char* outbuf, int32_t capacity) 1860 : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {} 1861 virtual void Flush() { calledFlush = TRUE; } 1862 UBool calledFlush; 1863 }; 1864 1865 void 1866 UnicodeStringTest::TestUTF8() { 1867 static const uint8_t utf8[] = { 1868 // Code points: 1869 // 0x41, 0xd900, 1870 // 0x61, 0xdc00, 1871 // 0x110000, 0x5a, 1872 // 0x50000, 0x7a, 1873 // 0x10000, 0x20000, 1874 // 0xe0000, 0x10ffff 1875 0x41, 0xed, 0xa4, 0x80, 1876 0x61, 0xed, 0xb0, 0x80, 1877 0xf4, 0x90, 0x80, 0x80, 0x5a, 1878 0xf1, 0x90, 0x80, 0x80, 0x7a, 1879 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80, 1880 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf 1881 }; 1882 static const UChar expected_utf16[] = { 1883 0x41, 0xfffd, 0xfffd, 0xfffd, 1884 0x61, 0xfffd, 0xfffd, 0xfffd, 1885 0xfffd, 0xfffd, 0xfffd, 0xfffd,0x5a, 1886 0xd900, 0xdc00, 0x7a, 1887 0xd800, 0xdc00, 0xd840, 0xdc00, 1888 0xdb40, 0xdc00, 0xdbff, 0xdfff 1889 }; 1890 UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8))); 1891 UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16)); 1892 1893 if(from8 != expected) { 1894 errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string."); 1895 } 1896 std::string utf8_string((const char *)utf8, sizeof(utf8)); 1897 UnicodeString from8b = UnicodeString::fromUTF8(utf8_string); 1898 if(from8b != expected) { 1899 errln("UnicodeString::fromUTF8(std::string) did not create the expected string."); 1900 } 1901 1902 static const UChar utf16[] = { 1903 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff 1904 }; 1905 static const uint8_t expected_utf8[] = { 1906 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a, 1907 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf 1908 }; 1909 UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16)); 1910 1911 char buffer[64]; 1912 TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer)); 1913 us.toUTF8(sink); 1914 if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) || 1915 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8)) 1916 ) { 1917 errln("UnicodeString::toUTF8() did not create the expected string."); 1918 } 1919 if(!sink.calledFlush) { 1920 errln("UnicodeString::toUTF8(sink) did not sink.Flush()."); 1921 } 1922 // Initial contents for testing that toUTF8String() appends. 1923 std::string result8 = "-->"; 1924 std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8)); 1925 // Use the return value just for testing. 1926 std::string &result8r = us.toUTF8String(result8); 1927 if(result8r != expected8 || &result8r != &result8) { 1928 errln("UnicodeString::toUTF8String() did not create the expected string."); 1929 } 1930 } 1931 1932 // Test if this compiler supports Return Value Optimization of unnamed temporary objects. 1933 static UnicodeString wrapUChars(const UChar *uchars) { 1934 return UnicodeString(TRUE, uchars, -1); 1935 } 1936 1937 void 1938 UnicodeStringTest::TestReadOnlyAlias() { 1939 UChar uchars[]={ 0x61, 0x62, 0 }; 1940 UnicodeString alias(TRUE, uchars, 2); 1941 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) { 1942 errln("UnicodeString read-only-aliasing constructor does not behave as expected."); 1943 return; 1944 } 1945 alias.truncate(1); 1946 if(alias.length()!=1 || alias.getBuffer()!=uchars) { 1947 errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected."); 1948 } 1949 if(alias.getTerminatedBuffer()==uchars) { 1950 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() " 1951 "did not allocate and copy as expected."); 1952 } 1953 if(uchars[1]!=0x62) { 1954 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() " 1955 "modified the original buffer."); 1956 } 1957 if(1!=u_strlen(alias.getTerminatedBuffer())) { 1958 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() " 1959 "does not return a buffer terminated at the proper length."); 1960 } 1961 1962 alias.setTo(TRUE, uchars, 2); 1963 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) { 1964 errln("UnicodeString read-only-aliasing setTo() does not behave as expected."); 1965 return; 1966 } 1967 alias.remove(); 1968 if(alias.length()!=0) { 1969 errln("UnicodeString(read-only-alias).remove() did not work."); 1970 } 1971 if(alias.getTerminatedBuffer()==uchars) { 1972 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() " 1973 "did not un-alias as expected."); 1974 } 1975 if(uchars[0]!=0x61) { 1976 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() " 1977 "modified the original buffer."); 1978 } 1979 if(0!=u_strlen(alias.getTerminatedBuffer())) { 1980 errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() " 1981 "does not return a buffer terminated at length 0."); 1982 } 1983 1984 UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789"); 1985 alias.setTo(FALSE, longString.getBuffer(), longString.length()); 1986 alias.remove(0, 10); 1987 if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) { 1988 errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected."); 1989 } 1990 alias.setTo(FALSE, longString.getBuffer(), longString.length()); 1991 alias.remove(27, 99); 1992 if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) { 1993 errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected."); 1994 } 1995 alias.setTo(FALSE, longString.getBuffer(), longString.length()); 1996 alias.retainBetween(6, 30); 1997 if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) { 1998 errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected."); 1999 } 2000 2001 UChar abc[]={ 0x61, 0x62, 0x63, 0 }; 2002 UBool hasRVO= wrapUChars(abc).getBuffer()==abc; 2003 2004 UnicodeString temp; 2005 temp.fastCopyFrom(longString.tempSubString()); 2006 if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) { 2007 errln("UnicodeString.tempSubString() failed"); 2008 } 2009 temp.fastCopyFrom(longString.tempSubString(-3, 5)); 2010 if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) { 2011 errln("UnicodeString.tempSubString(-3, 5) failed"); 2012 } 2013 temp.fastCopyFrom(longString.tempSubString(17)); 2014 if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) { 2015 errln("UnicodeString.tempSubString(17) failed"); 2016 } 2017 temp.fastCopyFrom(longString.tempSubString(99)); 2018 if(!temp.isEmpty()) { 2019 errln("UnicodeString.tempSubString(99) failed"); 2020 } 2021 temp.fastCopyFrom(longString.tempSubStringBetween(6)); 2022 if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) { 2023 errln("UnicodeString.tempSubStringBetween(6) failed"); 2024 } 2025 temp.fastCopyFrom(longString.tempSubStringBetween(8, 18)); 2026 if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) { 2027 errln("UnicodeString.tempSubStringBetween(8, 18) failed"); 2028 } 2029 UnicodeString bogusString; 2030 bogusString.setToBogus(); 2031 temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18)); 2032 if(!temp.isBogus()) { 2033 errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed"); 2034 } 2035 } 2036 2037 void 2038 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) { 2039 static const UChar cde[3]={ 0x63, 0x64, 0x65 }; 2040 static const UChar fg[3]={ 0x66, 0x67, 0 }; 2041 if(!app.reserveAppendCapacity(12)) { 2042 errln("Appendable.reserve(12) failed"); 2043 } 2044 app.appendCodeUnit(0x61); 2045 app.appendCodePoint(0x62); 2046 app.appendCodePoint(0x50000); 2047 app.appendString(cde, 3); 2048 app.appendString(fg, -1); 2049 UChar scratch[3]; 2050 int32_t capacity=-1; 2051 UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity); 2052 if(capacity<3) { 2053 errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity); 2054 return; 2055 } 2056 static const UChar hij[3]={ 0x68, 0x69, 0x6a }; 2057 u_memcpy(buffer, hij, 3); 2058 app.appendString(buffer, 3); 2059 if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) { 2060 errln("Appendable.append(...) failed"); 2061 } 2062 buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity); 2063 if(buffer!=NULL || capacity!=0) { 2064 errln("Appendable.getAppendBuffer(min=0) failed"); 2065 } 2066 capacity=1; 2067 buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity); 2068 if(buffer!=NULL || capacity!=0) { 2069 errln("Appendable.getAppendBuffer(scratch<min) failed"); 2070 } 2071 } 2072 2073 class SimpleAppendable : public Appendable { 2074 public: 2075 explicit SimpleAppendable(UnicodeString &dest) : str(dest) {} 2076 virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; } 2077 SimpleAppendable &reset() { str.remove(); return *this; } 2078 private: 2079 UnicodeString &str; 2080 }; 2081 2082 void 2083 UnicodeStringTest::TestAppendable() { 2084 UnicodeString dest; 2085 SimpleAppendable app(dest); 2086 doTestAppendable(dest, app); 2087 } 2088 2089 void 2090 UnicodeStringTest::TestUnicodeStringImplementsAppendable() { 2091 UnicodeString dest; 2092 UnicodeStringAppendable app(dest); 2093 doTestAppendable(dest, app); 2094 } 2095 2096 void 2097 UnicodeStringTest::TestSizeofUnicodeString() { 2098 // See the comments in unistr.h near the declaration of UnicodeString's fields. 2099 // See the API comments for UNISTR_OBJECT_SIZE. 2100 size_t sizeofUniStr=sizeof(UnicodeString); 2101 size_t expected=UNISTR_OBJECT_SIZE; 2102 if(expected!=sizeofUniStr) { 2103 // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer), 2104 // of the compiler might add more internal padding than expected. 2105 errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d", 2106 (int)sizeofUniStr, (int)expected); 2107 } 2108 if(sizeofUniStr<32) { 2109 errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr); 2110 } 2111 // We assume that the entire UnicodeString object, 2112 // minus the vtable pointer and 2 bytes for flags and short length, 2113 // is available for internal storage of UChars. 2114 int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR; 2115 UnicodeString s; 2116 const UChar *emptyBuffer=s.getBuffer(); 2117 for(int32_t i=0; i<expectedStackBufferLength; ++i) { 2118 s.append((UChar)0x2e); 2119 } 2120 const UChar *fullBuffer=s.getBuffer(); 2121 if(fullBuffer!=emptyBuffer) { 2122 errln("unexpected reallocation when filling with assumed stack buffer size of %d", 2123 expectedStackBufferLength); 2124 } 2125 const UChar *terminatedBuffer=s.getTerminatedBuffer(); 2126 if(terminatedBuffer==emptyBuffer) { 2127 errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d", 2128 expectedStackBufferLength); 2129 } 2130 } 2131 2132 void 2133 UnicodeStringTest::TestMoveSwap() { 2134 static const UChar abc[3] = { 0x61, 0x62, 0x63 }; // "abc" 2135 UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc)); // read-only alias 2136 UnicodeString s2(100, 0x7a, 100); // 100 * 'z' should be on the heap 2137 UnicodeString s3("defg", 4, US_INV); // in stack buffer 2138 const UChar *p = s2.getBuffer(); 2139 s1.swap(s2); 2140 if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) { 2141 errln("UnicodeString.swap() did not swap"); 2142 } 2143 swap(s2, s3); 2144 if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) { 2145 errln("swap(UnicodeString) did not swap back"); 2146 } 2147 UnicodeString s4; 2148 s4.moveFrom(s1); 2149 if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) { 2150 errln("UnicodeString.moveFrom(heap) did not move"); 2151 } 2152 UnicodeString s5; 2153 s5.moveFrom(s2); 2154 if(s5 != UNICODE_STRING_SIMPLE("defg")) { 2155 errln("UnicodeString.moveFrom(stack) did not move"); 2156 } 2157 UnicodeString s6; 2158 s6.moveFrom(s3); 2159 if(s6.getBuffer() != abc || s6.length() != 3) { 2160 errln("UnicodeString.moveFrom(alias) did not move"); 2161 } 2162 infoln("TestMoveSwap() with rvalue references"); 2163 s1 = static_cast<UnicodeString &&>(s6); 2164 if(s1.getBuffer() != abc || s1.length() != 3) { 2165 errln("UnicodeString move assignment operator did not move"); 2166 } 2167 UnicodeString s7(static_cast<UnicodeString &&>(s4)); 2168 if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) { 2169 errln("UnicodeString move constructor did not move"); 2170 } 2171 2172 // Move self assignment leaves the object valid but in an undefined state. 2173 // Do it to make sure there is no crash, 2174 // but do not check for any particular resulting value. 2175 s1.moveFrom(s1); 2176 s2.moveFrom(s2); 2177 s3.moveFrom(s3); 2178 s4.moveFrom(s4); 2179 s5.moveFrom(s5); 2180 s6.moveFrom(s6); 2181 s7.moveFrom(s7); 2182 // Simple copy assignment must work. 2183 UnicodeString simple = UNICODE_STRING_SIMPLE("simple"); 2184 s1 = s6 = s4 = s7 = simple; 2185 if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) { 2186 errln("UnicodeString copy after self-move did not work"); 2187 } 2188 } 2189 2190 void 2191 UnicodeStringTest::TestUInt16Pointers() { 2192 static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 }; 2193 uint16_t arr[4]; 2194 2195 UnicodeString expected(u"abc"); 2196 assertEquals("abc from pointer", expected, UnicodeString(carr)); 2197 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3)); 2198 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3)); 2199 2200 UnicodeString alias(arr, 0, 4); 2201 alias.append(u'a').append(u'b').append(u'c'); 2202 assertEquals("abc from writable alias", expected, alias); 2203 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3)); 2204 2205 UErrorCode errorCode = U_ZERO_ERROR; 2206 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode); 2207 TEST_ASSERT_STATUS(errorCode); 2208 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length)); 2209 } 2210 2211 void 2212 UnicodeStringTest::TestWCharPointers() { 2213 #if U_SIZEOF_WCHAR_T==2 2214 static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 }; 2215 wchar_t arr[4]; 2216 2217 UnicodeString expected(u"abc"); 2218 assertEquals("abc from pointer", expected, UnicodeString(carr)); 2219 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3)); 2220 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3)); 2221 2222 UnicodeString alias(arr, 0, 4); 2223 alias.append(u'a').append(u'b').append(u'c'); 2224 assertEquals("abc from writable alias", expected, alias); 2225 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3)); 2226 2227 UErrorCode errorCode = U_ZERO_ERROR; 2228 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode); 2229 TEST_ASSERT_STATUS(errorCode); 2230 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length)); 2231 #endif 2232 } 2233 2234 void 2235 UnicodeStringTest::TestNullPointers() { 2236 assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty()); 2237 assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty()); 2238 assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty()); 2239 2240 UnicodeString alias(nullptr, 4, 4); // empty, no alias 2241 assertTrue("empty from writable alias", alias.isEmpty()); 2242 alias.append(u'a').append(u'b').append(u'c'); 2243 UnicodeString expected(u"abc"); 2244 assertEquals("abc from writable alias", expected, alias); 2245 2246 UErrorCode errorCode = U_ZERO_ERROR; 2247 UnicodeString(u"def").extract(nullptr, 0, errorCode); 2248 assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode); 2249 } 2250 2251 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() { 2252 IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf"); 2253 2254 // Test append operation 2255 UnicodeString str(u"foo "); 2256 str.append(str); 2257 str.append(str); 2258 str.append(str); 2259 assertEquals("", u"foo foo foo foo foo foo foo foo ", str); 2260 2261 // Test append operation with readonly alias to start 2262 str = UnicodeString(TRUE, u"foo ", 4); 2263 str.append(str); 2264 str.append(str); 2265 str.append(str); 2266 assertEquals("", u"foo foo foo foo foo foo foo foo ", str); 2267 2268 // Test append operation with aliased substring 2269 str = u"abcde"; 2270 UnicodeString sub = str.tempSubString(1, 2); 2271 str.append(sub); 2272 assertEquals("", u"abcdebc", str); 2273 2274 // Test append operation with double-aliased substring 2275 str = UnicodeString(TRUE, u"abcde", 5); 2276 sub = str.tempSubString(1, 2); 2277 str.append(sub); 2278 assertEquals("", u"abcdebc", str); 2279 2280 // Test insert operation 2281 str = u"a-*b"; 2282 str.insert(2, str); 2283 str.insert(4, str); 2284 str.insert(8, str); 2285 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str); 2286 2287 // Test insert operation with readonly alias to start 2288 str = UnicodeString(TRUE, u"a-*b", 4); 2289 str.insert(2, str); 2290 str.insert(4, str); 2291 str.insert(8, str); 2292 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str); 2293 2294 // Test insert operation with aliased substring 2295 str = u"abcde"; 2296 sub = str.tempSubString(1, 3); 2297 str.insert(2, sub); 2298 assertEquals("", u"abbcdcde", str); 2299 2300 // Test insert operation with double-aliased substring 2301 str = UnicodeString(TRUE, u"abcde", 5); 2302 sub = str.tempSubString(1, 3); 2303 str.insert(2, sub); 2304 assertEquals("", u"abbcdcde", str); 2305 } 2306