1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 #include "ustrtest.h" 8 #include "unicode/std_string.h" 9 #include "unicode/unistr.h" 10 #include "unicode/uchar.h" 11 #include "unicode/ustring.h" 12 #include "unicode/locid.h" 13 #include "unicode/ucnv.h" 14 #include "unicode/uenum.h" 15 #include "cmemory.h" 16 #include "charstr.h" 17 18 #if 0 19 #include "unicode/ustream.h" 20 21 #if U_IOSTREAM_SOURCE >= 199711 22 #include <iostream> 23 using namespace std; 24 #elif U_IOSTREAM_SOURCE >= 198506 25 #include <iostream.h> 26 #endif 27 28 #endif 29 30 #define LENGTHOF(array) (int32_t)((sizeof(array)/sizeof((array)[0]))) 31 32 UnicodeStringTest::~UnicodeStringTest() {} 33 34 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par) 35 { 36 if (exec) logln("TestSuite UnicodeStringTest: "); 37 switch (index) { 38 case 0: 39 name = "StringCaseTest"; 40 if (exec) { 41 logln("StringCaseTest---"); logln(""); 42 StringCaseTest test; 43 callTest(test, par); 44 } 45 break; 46 case 1: name = "TestBasicManipulation"; if (exec) TestBasicManipulation(); break; 47 case 2: name = "TestCompare"; if (exec) TestCompare(); break; 48 case 3: name = "TestExtract"; if (exec) TestExtract(); break; 49 case 4: name = "TestRemoveReplace"; if (exec) TestRemoveReplace(); break; 50 case 5: name = "TestSearching"; if (exec) TestSearching(); break; 51 case 6: name = "TestSpacePadding"; if (exec) TestSpacePadding(); break; 52 case 7: name = "TestPrefixAndSuffix"; if (exec) TestPrefixAndSuffix(); break; 53 case 8: name = "TestFindAndReplace"; if (exec) TestFindAndReplace(); break; 54 case 9: name = "TestBogus"; if (exec) TestBogus(); break; 55 case 10: name = "TestReverse"; if (exec) TestReverse(); break; 56 case 11: name = "TestMiscellaneous"; if (exec) TestMiscellaneous(); break; 57 case 12: name = "TestStackAllocation"; if (exec) TestStackAllocation(); break; 58 case 13: name = "TestUnescape"; if (exec) TestUnescape(); break; 59 case 14: name = "TestCountChar32"; if (exec) TestCountChar32(); break; 60 case 15: name = "TestStringEnumeration"; if (exec) TestStringEnumeration(); break; 61 case 16: name = "TestCharString"; if (exec) TestCharString(); break; 62 case 17: name = "TestNameSpace"; if (exec) TestNameSpace(); break; 63 case 18: name = "TestUTF32"; if (exec) TestUTF32(); break; 64 case 19: name = "TestUTF8"; if (exec) TestUTF8(); break; 65 case 20: name = "TestReadOnlyAlias"; if (exec) TestReadOnlyAlias(); break; 66 67 default: name = ""; break; //needed to end loop 68 } 69 } 70 71 void 72 UnicodeStringTest::TestBasicManipulation() 73 { 74 UnicodeString test1("Now is the time for all men to come swiftly to the aid of the party.\n"); 75 UnicodeString expectedValue; 76 UnicodeString *c; 77 78 c=(UnicodeString *)test1.clone(); 79 test1.insert(24, "good "); 80 expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n"; 81 if (test1 != expectedValue) 82 errln("insert() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\""); 83 84 c->insert(24, "good "); 85 if(*c != expectedValue) { 86 errln("clone()->insert() failed: expected \"" + expectedValue + "\"\n,got \"" + *c + "\""); 87 } 88 delete c; 89 90 test1.remove(41, 8); 91 expectedValue = "Now is the time for all good men to come to the aid of the party.\n"; 92 if (test1 != expectedValue) 93 errln("remove() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\""); 94 95 test1.replace(58, 6, "ir country"); 96 expectedValue = "Now is the time for all good men to come to the aid of their country.\n"; 97 if (test1 != expectedValue) 98 errln("replace() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\""); 99 100 UChar temp[80]; 101 test1.extract(0, 15, temp); 102 103 UnicodeString test2(temp, 15); 104 105 expectedValue = "Now is the time"; 106 if (test2 != expectedValue) 107 errln("extract() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\""); 108 109 test2 += " for me to go!\n"; 110 expectedValue = "Now is the time for me to go!\n"; 111 if (test2 != expectedValue) 112 errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\""); 113 114 if (test1.length() != 70) 115 errln("length() failed: expected 70, got " + test1.length()); 116 if (test2.length() != 30) 117 errln("length() failed: expected 30, got " + test2.length()); 118 119 UnicodeString test3; 120 test3.append((UChar32)0x20402); 121 if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){ 122 errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3)); 123 } 124 if(test3.length() != 2){ 125 errln("append or length failed for UChar32, expected 2, got " + test3.length()); 126 } 127 test3.append((UChar32)0x0074); 128 if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){ 129 errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3)); 130 } 131 if(test3.length() != 3){ 132 errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length()); 133 } 134 135 // test some UChar32 overloads 136 if( test3.setTo((UChar32)0x10330).length() != 2 || 137 test3.insert(0, (UChar32)0x20100).length() != 4 || 138 test3.replace(2, 2, (UChar32)0xe0061).length() != 4 || 139 (test3 = (UChar32)0x14001).length() != 2 140 ) { 141 errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed"); 142 } 143 144 { 145 // test moveIndex32() 146 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 147 148 if( 149 s.moveIndex32(2, -1)!=0 || 150 s.moveIndex32(2, 1)!=4 || 151 s.moveIndex32(2, 2)!=5 || 152 s.moveIndex32(5, -2)!=2 || 153 s.moveIndex32(0, -1)!=0 || 154 s.moveIndex32(6, 1)!=6 155 ) { 156 errln("UnicodeString::moveIndex32() failed"); 157 } 158 159 if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) { 160 errln("UnicodeString::getChar32Start() failed"); 161 } 162 163 if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) { 164 errln("UnicodeString::getChar32Limit() failed"); 165 } 166 } 167 168 { 169 // test new 2.2 constructors and setTo function that parallel Java's substring function. 170 UnicodeString src("Hello folks how are you?"); 171 UnicodeString target1("how are you?"); 172 if (target1 != UnicodeString(src, 12)) { 173 errln("UnicodeString(const UnicodeString&, int32_t) failed"); 174 } 175 UnicodeString target2("folks"); 176 if (target2 != UnicodeString(src, 6, 5)) { 177 errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed"); 178 } 179 if (target1 != target2.setTo(src, 12)) { 180 errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed"); 181 } 182 } 183 184 { 185 // op+ is new in ICU 2.8 186 UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", ""); 187 if(s!=UnicodeString("abcdefghi", "")) { 188 errln("operator+(UniStr, UniStr) failed"); 189 } 190 } 191 192 { 193 // tests for Jitterbug 2360 194 // verify that APIs with source pointer + length accept length == -1 195 // mostly test only where modified, only few functions did not already do this 196 if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) { 197 errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1"); 198 } 199 200 UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff }; 201 UnicodeString s, t(buffer, -1, LENGTHOF(buffer)); 202 203 if(s.setTo(buffer, -1, LENGTHOF(buffer)).length()!=u_strlen(buffer)) { 204 errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1"); 205 } 206 if(t.length()!=u_strlen(buffer)) { 207 errln("UnicodeString(buffer, length, capacity) does not work with length==-1"); 208 } 209 210 if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) { 211 errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1"); 212 } 213 if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) { 214 errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work"); 215 } 216 217 buffer[u_strlen(buffer)]=0xe4; 218 UnicodeString u(buffer, -1, LENGTHOF(buffer)); 219 if(s.setTo(buffer, -1, LENGTHOF(buffer)).length()!=LENGTHOF(buffer)) { 220 errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1"); 221 } 222 if(u.length()!=LENGTHOF(buffer)) { 223 errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1"); 224 } 225 226 static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 }; 227 UConverter *cnv; 228 UErrorCode errorCode=U_ZERO_ERROR; 229 230 cnv=ucnv_open("ISO-8859-1", &errorCode); 231 UnicodeString v(cs, -1, cnv, errorCode); 232 ucnv_close(cnv); 233 if(v!=CharsToUnicodeString("a\\xe4\\x85")) { 234 errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1"); 235 } 236 } 237 238 #if U_CHARSET_IS_UTF8 239 { 240 // Test the hardcoded-UTF-8 UnicodeString optimizations. 241 static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 }; 242 static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 }; 243 UnicodeString from8a = UnicodeString((const char *)utf8); 244 UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1); 245 UnicodeString from16(FALSE, utf16, LENGTHOF(utf16)); 246 if(from8a != from16 || from8b != from16) { 247 errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed"); 248 } 249 char buffer[16]; 250 int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer)); 251 if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) { 252 errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed"); 253 } 254 length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer)); 255 if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) { 256 errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed"); 257 } 258 } 259 #endif 260 } 261 262 void 263 UnicodeStringTest::TestCompare() 264 { 265 UnicodeString test1("this is a test"); 266 UnicodeString test2("this is a test"); 267 UnicodeString test3("this is a test of the emergency broadcast system"); 268 UnicodeString test4("never say, \"this is a test\"!!"); 269 270 UnicodeString test5((UChar)0x5000); 271 UnicodeString test6((UChar)0x5100); 272 273 UChar uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 274 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 }; 275 char chars[] = "this is a test"; 276 277 // test operator== and operator!= 278 if (test1 != test2 || test1 == test3 || test1 == test4) 279 errln("operator== or operator!= failed"); 280 281 // test operator> and operator< 282 if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) || 283 !(test5 < test6) 284 ) { 285 errln("operator> or operator< failed"); 286 } 287 288 // test operator>= and operator<= 289 if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4)) 290 errln("operator>= or operator<= failed"); 291 292 // test compare(UnicodeString) 293 if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0) 294 errln("compare(UnicodeString) failed"); 295 296 //test compare(offset, length, UnicodeString) 297 if(test1.compare(0, 14, test2) != 0 || 298 test3.compare(0, 14, test2) != 0 || 299 test4.compare(12, 14, test2) != 0 || 300 test3.compare(0, 18, test1) <=0 ) 301 errln("compare(offset, length, UnicodeString) failes"); 302 303 // test compare(UChar*) 304 if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0) 305 errln("compare(UChar*) failed"); 306 307 // test compare(char*) 308 if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0) 309 errln("compare(char*) failed"); 310 311 // test compare(UChar*, length) 312 if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0) 313 errln("compare(UChar*, length) failed"); 314 315 // test compare(thisOffset, thisLength, that, thatOffset, thatLength) 316 if (test1.compare(0, 14, test2, 0, 14) != 0 317 || test1.compare(0, 14, test3, 0, 14) != 0 318 || test1.compare(0, 14, test4, 12, 14) != 0) 319 errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed"); 320 321 if (test1.compare(10, 4, test2, 0, 4) >= 0 322 || test1.compare(10, 4, test3, 22, 9) <= 0 323 || test1.compare(10, 4, test4, 22, 4) != 0) 324 errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed"); 325 326 // test compareBetween 327 if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0 328 || test1.compareBetween(0, 14, test4, 12, 26) != 0) 329 errln("compareBetween failed"); 330 331 if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0 332 || test1.compareBetween(10, 14, test4, 22, 26) != 0) 333 errln("compareBetween failed"); 334 335 // test compare() etc. with strings that share a buffer but are not equal 336 test2=test1; // share the buffer, length() too large for the stackBuffer 337 test2.truncate(1); // change only the length, not the buffer 338 if( test1==test2 || test1<=test2 || 339 test1.compare(test2)<=0 || 340 test1.compareCodePointOrder(test2)<=0 || 341 test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 || 342 test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 || 343 test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 || 344 test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0 345 ) { 346 errln("UnicodeStrings that share a buffer but have different lengths compare as equal"); 347 } 348 349 /* test compareCodePointOrder() */ 350 { 351 /* these strings are in ascending order */ 352 static const UChar strings[][4]={ 353 { 0x61, 0 }, /* U+0061 */ 354 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */ 355 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */ 356 { 0xd800, 0 }, /* U+d800 */ 357 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */ 358 { 0xdfff, 0 }, /* U+dfff */ 359 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */ 360 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */ 361 { 0xd800, 0xdc02, 0 }, /* U+10002 */ 362 { 0xd84d, 0xdc56, 0 } /* U+23456 */ 363 }; 364 UnicodeString u[20]; // must be at least as long as strings[] 365 int32_t i; 366 367 for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])); ++i) { 368 u[i]=UnicodeString(TRUE, strings[i], -1); 369 } 370 371 for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])-1); ++i) { 372 if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) { 373 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i); 374 } 375 } 376 } 377 378 /* test caseCompare() */ 379 { 380 static const UChar 381 _mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 }, 382 _otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 }, 383 _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 }, 384 _different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 }; 385 386 UnicodeString 387 mixed(TRUE, _mixed, -1), 388 otherDefault(TRUE, _otherDefault, -1), 389 otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1), 390 different(TRUE, _different, -1); 391 392 int8_t result; 393 394 /* test caseCompare() */ 395 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT); 396 if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) { 397 errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result); 398 } 399 result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I); 400 if(result!=0) { 401 errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result); 402 } 403 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I); 404 if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) { 405 errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n"); 406 } 407 408 /* test caseCompare() */ 409 result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT); 410 if(result<=0) { 411 errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result); 412 } 413 414 /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */ 415 result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT); 416 if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) { 417 errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result); 418 } 419 420 /* test caseCompare() - stop in the middle of the sharp s (U+00df) */ 421 result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT); 422 if(result<=0) { 423 errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result); 424 } 425 } 426 427 // test that srcLength=-1 is handled in functions that 428 // take input const UChar */int32_t srcLength (j785) 429 { 430 static const UChar u[]={ 0x61, 0x308, 0x62, 0 }; 431 UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape(); 432 433 if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) { 434 errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work"); 435 } 436 437 if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) { 438 errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work"); 439 } 440 441 if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) { 442 errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work"); 443 } 444 445 if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) { 446 errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work"); 447 } 448 449 if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) { 450 errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work"); 451 } 452 453 UnicodeString s2, s3; 454 s2.replace(0, 0, u+1, -1); 455 s3.replace(0, 0, u, 1, -1); 456 if(s.compare(1, 999, s2)!=0 || s2!=s3) { 457 errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work"); 458 } 459 } 460 } 461 462 void 463 UnicodeStringTest::TestExtract() 464 { 465 UnicodeString test1("Now is the time for all good men to come to the aid of their country.", ""); 466 UnicodeString test2; 467 UChar test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13}; 468 char test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13}; 469 UnicodeString test5; 470 char test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13}; 471 472 test1.extract(11, 12, test2); 473 test1.extract(11, 12, test3); 474 if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) { 475 errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer."); 476 } 477 478 // test proper pinning in extractBetween() 479 test1.extractBetween(-3, 7, test5); 480 if(test5!=UNICODE_STRING("Now is ", 7)) { 481 errln("UnicodeString.extractBetween(-3, 7) did not pin properly."); 482 } 483 484 test1.extractBetween(11, 23, test5); 485 if (test1.extract(60, 71, test6) != 9) { 486 errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer."); 487 } 488 if (test1.extract(11, 12, test6) != 12) { 489 errln("UnicodeString.extract() failed to return the correct size of destination buffer."); 490 } 491 492 // convert test4 back to Unicode for comparison 493 UnicodeString test4b(test4, 12); 494 495 if (test1.extract(11, 12, (char *)NULL) != 12) { 496 errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer."); 497 } 498 if (test1.extract(11, -1, test6) != 0) { 499 errln("UnicodeString.extract(-1) failed to stop reading the string."); 500 } 501 502 for (int32_t i = 0; i < 12; i++) { 503 if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) { 504 errln(UnicodeString("extracting into a UnicodeString failed at position ") + i); 505 break; 506 } 507 if (test1.charAt((int32_t)(11 + i)) != test3[i]) { 508 errln(UnicodeString("extracting into an array of UChar failed at position ") + i); 509 break; 510 } 511 if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) { 512 errln(UnicodeString("extracting into an array of char failed at position ") + i); 513 break; 514 } 515 if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) { 516 errln(UnicodeString("extracting with extractBetween failed at position ") + i); 517 break; 518 } 519 } 520 521 // test preflighting and overflows with invariant conversion 522 if (test1.extract(0, 10, (char *)NULL, "") != 10) { 523 errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10"); 524 } 525 526 test4[2] = (char)0xff; 527 if (test1.extract(0, 10, test4, 2, "") != 10) { 528 errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10"); 529 } 530 if (test4[2] != (char)0xff) { 531 errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]"); 532 } 533 534 { 535 // test new, NUL-terminating extract() function 536 UnicodeString s("terminate", ""); 537 UChar dest[20]={ 538 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 539 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5 540 }; 541 UErrorCode errorCode; 542 int32_t length; 543 544 errorCode=U_ZERO_ERROR; 545 length=s.extract((UChar *)NULL, 0, errorCode); 546 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) { 547 errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode)); 548 } 549 550 errorCode=U_ZERO_ERROR; 551 length=s.extract(dest, s.length()-1, errorCode); 552 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) { 553 errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", 554 length, u_errorName(errorCode), s.length()); 555 } 556 557 errorCode=U_ZERO_ERROR; 558 length=s.extract(dest, s.length(), errorCode); 559 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) { 560 errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)", 561 length, u_errorName(errorCode), s.length()); 562 } 563 if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) { 564 errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly"); 565 } 566 567 errorCode=U_ZERO_ERROR; 568 length=s.extract(dest, s.length()+1, errorCode); 569 if(errorCode!=U_ZERO_ERROR || length!=s.length()) { 570 errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)", 571 length, u_errorName(errorCode), s.length()); 572 } 573 if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) { 574 errln("UnicodeString.extract(dest large enough) did not extract the string correctly"); 575 } 576 } 577 578 { 579 // test new UConverter extract() and constructor 580 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 581 char buffer[32]; 582 static const char expect[]={ 583 (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99, 584 (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f, 585 (char)0xc3, (char)0x84, 586 (char)0xe1, (char)0xbb, (char)0x90 587 }; 588 UErrorCode errorCode=U_ZERO_ERROR; 589 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 590 int32_t length; 591 592 if(U_SUCCESS(errorCode)) { 593 // test preflighting 594 if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 || 595 errorCode!=U_BUFFER_OVERFLOW_ERROR 596 ) { 597 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)", 598 length, u_errorName(errorCode)); 599 } 600 errorCode=U_ZERO_ERROR; 601 if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 || 602 errorCode!=U_BUFFER_OVERFLOW_ERROR 603 ) { 604 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)", 605 length, u_errorName(errorCode)); 606 } 607 608 // try error cases 609 errorCode=U_ZERO_ERROR; 610 if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) { 611 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination"); 612 } 613 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 614 if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) { 615 errln("UnicodeString::extract(UConverter) succeeded with a previous error code"); 616 } 617 errorCode=U_ZERO_ERROR; 618 619 // extract for real 620 if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 || 621 uprv_memcmp(buffer, expect, 13)!=0 || 622 buffer[13]!=0 || 623 U_FAILURE(errorCode) 624 ) { 625 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)", 626 length, u_errorName(errorCode)); 627 } 628 // Test again with just the converter name. 629 if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 || 630 uprv_memcmp(buffer, expect, 13)!=0 || 631 buffer[13]!=0 || 632 U_FAILURE(errorCode) 633 ) { 634 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)", 635 length, u_errorName(errorCode)); 636 } 637 638 // try the constructor 639 UnicodeString t(expect, sizeof(expect), cnv, errorCode); 640 if(U_FAILURE(errorCode) || s!=t) { 641 errln("UnicodeString(UConverter) conversion failed (%s)", 642 u_errorName(errorCode)); 643 } 644 645 ucnv_close(cnv); 646 } 647 } 648 } 649 650 void 651 UnicodeStringTest::TestRemoveReplace() 652 { 653 UnicodeString test1("The rain in Spain stays mainly on the plain"); 654 UnicodeString test2("eat SPAMburgers!"); 655 UChar test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 }; 656 char test4[] = "SPAM"; 657 UnicodeString& test5 = test1; 658 659 test1.replace(4, 4, test2, 4, 4); 660 test1.replace(12, 5, test3, 4); 661 test3[4] = 0; 662 test1.replace(17, 4, test3); 663 test1.replace(23, 4, test4); 664 test1.replaceBetween(37, 42, test2, 4, 8); 665 666 if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM") 667 errln("One of the replace methods failed:\n" 668 " expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n" 669 " got \"" + test1 + "\""); 670 671 test1.remove(21, 1); 672 test1.removeBetween(26, 28); 673 674 if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM") 675 errln("One of the remove methods failed:\n" 676 " expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n" 677 " got \"" + test1 + "\""); 678 679 for (int32_t i = 0; i < test1.length(); i++) { 680 if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) { 681 test1.setCharAt(i, 0x78); 682 } 683 } 684 685 if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM") 686 errln("One of the remove methods failed:\n" 687 " expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n" 688 " got \"" + test1 + "\""); 689 690 test1.remove(); 691 if (test1.length() != 0) 692 errln("Remove() failed: expected empty string, got \"" + test1 + "\""); 693 } 694 695 void 696 UnicodeStringTest::TestSearching() 697 { 698 UnicodeString test1("test test ttest tetest testesteststt"); 699 UnicodeString test2("test"); 700 UChar testChar = 0x74; 701 702 UChar32 testChar32 = 0x20402; 703 UChar testData[]={ 704 // 0 1 2 3 4 5 6 7 705 0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02, 706 707 // 8 9 10 11 12 13 14 15 708 0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071, 709 710 // 16 17 18 19 711 0xdc02, 0xd841, 0x0073, 0x0000 712 }; 713 UnicodeString test3(testData); 714 UnicodeString test4(testChar32); 715 716 uint16_t occurrences = 0; 717 int32_t startPos = 0; 718 for ( ; 719 startPos != -1 && startPos < test1.length(); 720 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0) 721 ; 722 if (occurrences != 6) 723 errln("indexOf failed: expected to find 6 occurrences, found " + occurrences); 724 725 for ( occurrences = 0, startPos = 10; 726 startPos != -1 && startPos < test1.length(); 727 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0) 728 ; 729 if (occurrences != 4) 730 errln("indexOf with starting offset failed: expected to find 4 occurrences, found " + occurrences); 731 732 int32_t endPos = 28; 733 for ( occurrences = 0, startPos = 5; 734 startPos != -1 && startPos < test1.length(); 735 (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0) 736 ; 737 if (occurrences != 4) 738 errln("indexOf with starting and ending offsets failed: expected to find 4 occurrences, found " + occurrences); 739 740 //using UChar32 string 741 for ( startPos=0, occurrences=0; 742 startPos != -1 && startPos < test3.length(); 743 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0) 744 ; 745 if (occurrences != 4) 746 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences); 747 748 for ( startPos=10, occurrences=0; 749 startPos != -1 && startPos < test3.length(); 750 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0) 751 ; 752 if (occurrences != 2) 753 errln("indexOf failed: expected to find 2 occurrences, found " + occurrences); 754 //--- 755 756 for ( occurrences = 0, startPos = 0; 757 startPos != -1 && startPos < test1.length(); 758 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 759 ; 760 if (occurrences != 16) 761 errln("indexOf with character failed: expected to find 16 occurrences, found " + occurrences); 762 763 for ( occurrences = 0, startPos = 10; 764 startPos != -1 && startPos < test1.length(); 765 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 766 ; 767 if (occurrences != 12) 768 errln("indexOf with character & start offset failed: expected to find 12 occurrences, found " + occurrences); 769 770 for ( occurrences = 0, startPos = 5, endPos = 28; 771 startPos != -1 && startPos < test1.length(); 772 (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 773 ; 774 if (occurrences != 10) 775 errln("indexOf with character & start & end offsets failed: expected to find 10 occurrences, found " + occurrences); 776 777 //testing for UChar32 778 UnicodeString subString; 779 for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){ 780 subString.append(test3, startPos, test3.length()); 781 if(subString.indexOf(testChar32) != -1 ){ 782 ++occurrences; 783 } 784 subString.remove(); 785 } 786 if (occurrences != 14) 787 errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences); 788 789 for ( occurrences = 0, startPos = 0; 790 startPos != -1 && startPos < test3.length(); 791 (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 792 ; 793 if (occurrences != 4) 794 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences); 795 796 endPos=test3.length(); 797 for ( occurrences = 0, startPos = 5; 798 startPos != -1 && startPos < test3.length(); 799 (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 800 ; 801 if (occurrences != 3) 802 errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences); 803 //--- 804 805 if(test1.lastIndexOf(test2)!=29) { 806 errln("test1.lastIndexOf(test2)!=29"); 807 } 808 809 if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) { 810 errln("test1.lastIndexOf(test2, start) failed"); 811 } 812 813 for ( occurrences = 0, startPos = 32; 814 startPos != -1; 815 (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0) 816 ; 817 if (occurrences != 4) 818 errln("lastIndexOf with starting and ending offsets failed: expected to find 4 occurrences, found " + occurrences); 819 820 for ( occurrences = 0, startPos = 32; 821 startPos != -1; 822 (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0) 823 ; 824 if (occurrences != 11) 825 errln("lastIndexOf with character & start & end offsets failed: expected to find 11 occurrences, found " + occurrences); 826 827 //testing UChar32 828 startPos=test3.length(); 829 for ( occurrences = 0; 830 startPos != -1; 831 (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0) 832 ; 833 if (occurrences != 3) 834 errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences); 835 836 837 for ( occurrences = 0, endPos = test3.length(); endPos > 0; endPos -= 1){ 838 subString.remove(); 839 subString.append(test3, 0, endPos); 840 if(subString.lastIndexOf(testChar32) != -1 ){ 841 ++occurrences; 842 } 843 } 844 if (occurrences != 18) 845 errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences); 846 //--- 847 848 // test that indexOf(UChar32) and lastIndexOf(UChar32) 849 // do not find surrogate code points when they are part of matched pairs 850 // (= part of supplementary code points) 851 // Jitterbug 1542 852 if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) { 853 errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point"); 854 } 855 if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 || 856 UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 || 857 test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16 858 ) { 859 errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point"); 860 } 861 } 862 863 void 864 UnicodeStringTest::TestSpacePadding() 865 { 866 UnicodeString test1("hello"); 867 UnicodeString test2(" there"); 868 UnicodeString test3("Hi! How ya doin'? Beautiful day, isn't it?"); 869 UnicodeString test4; 870 UBool returnVal; 871 UnicodeString expectedValue; 872 873 returnVal = test1.padLeading(15); 874 expectedValue = " hello"; 875 if (returnVal == FALSE || test1 != expectedValue) 876 errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 877 878 returnVal = test2.padTrailing(15); 879 expectedValue = " there "; 880 if (returnVal == FALSE || test2 != expectedValue) 881 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\"."); 882 883 expectedValue = test3; 884 returnVal = test3.padTrailing(15); 885 if (returnVal == TRUE || test3 != expectedValue) 886 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\"."); 887 888 expectedValue = "hello"; 889 test4.setTo(test1).trim(); 890 891 if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue) 892 errln("trim(UnicodeString&) failed"); 893 894 test1.trim(); 895 if (test1 != expectedValue) 896 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 897 898 test2.trim(); 899 expectedValue = "there"; 900 if (test2 != expectedValue) 901 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\"."); 902 903 test3.trim(); 904 expectedValue = "Hi! How ya doin'? Beautiful day, isn't it?"; 905 if (test3 != expectedValue) 906 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\"."); 907 908 returnVal = test1.truncate(15); 909 expectedValue = "hello"; 910 if (returnVal == TRUE || test1 != expectedValue) 911 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 912 913 returnVal = test2.truncate(15); 914 expectedValue = "there"; 915 if (returnVal == TRUE || test2 != expectedValue) 916 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\"."); 917 918 returnVal = test3.truncate(15); 919 expectedValue = "Hi! How ya doi"; 920 if (returnVal == FALSE || test3 != expectedValue) 921 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\"."); 922 } 923 924 void 925 UnicodeStringTest::TestPrefixAndSuffix() 926 { 927 UnicodeString test1("Now is the time for all good men to come to the aid of their country."); 928 UnicodeString test2("Now"); 929 UnicodeString test3("country."); 930 UnicodeString test4("count"); 931 932 if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) { 933 errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\"."); 934 } 935 936 if (test1.startsWith(test3) || 937 test1.startsWith(test3.getBuffer(), test3.length()) || 938 test1.startsWith(test3.getTerminatedBuffer(), 0, -1) 939 ) { 940 errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\"."); 941 } 942 943 if (test1.endsWith(test2)) { 944 errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\"."); 945 } 946 947 if (!test1.endsWith(test3)) { 948 errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 949 } 950 if (!test1.endsWith(test3, 0, INT32_MAX)) { 951 errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 952 } 953 954 if(!test1.endsWith(test3.getBuffer(), test3.length())) { 955 errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 956 } 957 if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) { 958 errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 959 } 960 961 if (!test3.startsWith(test4)) { 962 errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\"."); 963 } 964 965 if (test4.startsWith(test3)) { 966 errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\"."); 967 } 968 } 969 970 void 971 UnicodeStringTest::TestFindAndReplace() 972 { 973 UnicodeString test1("One potato, two potato, three potato, four\n"); 974 UnicodeString test2("potato"); 975 UnicodeString test3("MISSISSIPPI"); 976 977 UnicodeString expectedValue; 978 979 test1.findAndReplace(test2, test3); 980 expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n"; 981 if (test1 != expectedValue) 982 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 983 test1.findAndReplace(2, 32, test3, test2); 984 expectedValue = "One potato, two potato, three MISSISSIPPI, four\n"; 985 if (test1 != expectedValue) 986 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 987 } 988 989 void 990 UnicodeStringTest::TestReverse() 991 { 992 UnicodeString test("backwards words say to used I"); 993 994 test.reverse(); 995 test.reverse(2, 4); 996 test.reverse(7, 2); 997 test.reverse(10, 3); 998 test.reverse(14, 5); 999 test.reverse(20, 9); 1000 1001 if (test != "I used to say words backwards") 1002 errln("reverse() failed: Expected \"I used to say words backwards\",\n got \"" 1003 + test + "\""); 1004 1005 test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 1006 test.reverse(); 1007 if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) { 1008 errln("reverse() failed with supplementary characters"); 1009 } 1010 } 1011 1012 void 1013 UnicodeStringTest::TestMiscellaneous() 1014 { 1015 UnicodeString test1("This is a test"); 1016 UnicodeString test2("This is a test"); 1017 UnicodeString test3("Me too!"); 1018 1019 // test getBuffer(minCapacity) and releaseBuffer() 1020 test1=UnicodeString(); // make sure that it starts with its stackBuffer 1021 UChar *p=test1.getBuffer(20); 1022 if(test1.getCapacity()<20) { 1023 errln("UnicodeString::getBuffer(20).getCapacity()<20"); 1024 } 1025 1026 test1.append((UChar)7); // must not be able to modify the string here 1027 test1.setCharAt(3, 7); 1028 test1.reverse(); 1029 if( test1.length()!=0 || 1030 test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff || 1031 test1.getBuffer(10)!=0 || test1.getBuffer()!=0 1032 ) { 1033 errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString"); 1034 } 1035 1036 p[0]=1; 1037 p[1]=2; 1038 p[2]=3; 1039 test1.releaseBuffer(3); 1040 test1.append((UChar)4); 1041 1042 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) { 1043 errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString"); 1044 } 1045 1046 // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect 1047 test1.releaseBuffer(1); 1048 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) { 1049 errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString"); 1050 } 1051 1052 // test getBuffer(const) 1053 const UChar *q=test1.getBuffer(), *r=test1.getBuffer(); 1054 if( test1.length()!=4 || 1055 q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 || 1056 r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4 1057 ) { 1058 errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer"); 1059 } 1060 1061 // test releaseBuffer() with a NUL-terminated buffer 1062 test1.getBuffer(20)[2]=0; 1063 test1.releaseBuffer(); // implicit -1 1064 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) { 1065 errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString"); 1066 } 1067 1068 // test releaseBuffer() with a non-NUL-terminated buffer 1069 p=test1.getBuffer(256); 1070 for(int32_t i=0; i<test1.getCapacity(); ++i) { 1071 p[i]=(UChar)1; // fill the buffer with all non-NUL code units 1072 } 1073 test1.releaseBuffer(); // implicit -1 1074 if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) { 1075 errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString"); 1076 } 1077 1078 // test getTerminatedBuffer() 1079 test1=UnicodeString("This is another test.", ""); 1080 test2=UnicodeString("This is another test.", ""); 1081 q=test1.getTerminatedBuffer(); 1082 if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) { 1083 errln("getTerminatedBuffer()[length]!=0"); 1084 } 1085 1086 const UChar u[]={ 5, 6, 7, 8, 0 }; 1087 test1.setTo(FALSE, u, 3); 1088 q=test1.getTerminatedBuffer(); 1089 if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) { 1090 errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer"); 1091 } 1092 1093 test1.setTo(TRUE, u, -1); 1094 q=test1.getTerminatedBuffer(); 1095 if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) { 1096 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer"); 1097 } 1098 1099 test1=UNICODE_STRING("la", 2); 1100 test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1); 1101 if(test1!=UNICODE_STRING("la lila", 7)) { 1102 errln("UnicodeString::append(const UChar *, start, length) failed"); 1103 } 1104 1105 test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX); 1106 if(test1!=UNICODE_STRING("la dudum lila", 13)) { 1107 errln("UnicodeString::insert(start, const UniStr &, start, length) failed"); 1108 } 1109 1110 static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 }; 1111 test1.insert(9, ucs, -1); 1112 if(test1!=UNICODE_STRING("la dudum hm lila", 16)) { 1113 errln("UnicodeString::insert(start, const UChar *, length) failed"); 1114 } 1115 1116 test1.replace(9, 2, (UChar)0x2b); 1117 if(test1!=UNICODE_STRING("la dudum + lila", 15)) { 1118 errln("UnicodeString::replace(start, length, UChar) failed"); 1119 } 1120 1121 if(test1.hasMetaData() || UnicodeString().hasMetaData()) { 1122 errln("UnicodeString::hasMetaData() returns TRUE"); 1123 } 1124 1125 // test getTerminatedBuffer() on a truncated, shared, heap-allocated string 1126 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789."); 1127 test1.truncate(36); // ensure length()<getCapacity() 1128 test2=test1; // share the buffer 1129 test1.truncate(5); 1130 if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) { 1131 errln("UnicodeString(shared buffer).truncate() failed"); 1132 } 1133 if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) { 1134 errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() " 1135 "modified another copy of the string!"); 1136 } 1137 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789."); 1138 test1.truncate(36); // ensure length()<getCapacity() 1139 test2=test1; // share the buffer 1140 test1.remove(); 1141 if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) { 1142 errln("UnicodeString(shared buffer).remove() failed"); 1143 } 1144 if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) { 1145 errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() " 1146 "modified another copy of the string!"); 1147 } 1148 } 1149 1150 void 1151 UnicodeStringTest::TestStackAllocation() 1152 { 1153 UChar testString[] ={ 1154 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 }; 1155 UChar guardWord = 0x4DED; 1156 UnicodeString* test = 0; 1157 1158 test = new UnicodeString(testString); 1159 if (*test != "This is a crazy test.") 1160 errln("Test string failed to initialize properly."); 1161 if (guardWord != 0x04DED) 1162 errln("Test string initialization overwrote guard word!"); 1163 1164 test->insert(8, "only "); 1165 test->remove(15, 6); 1166 if (*test != "This is only a test.") 1167 errln("Manipulation of test string failed to work right."); 1168 if (guardWord != 0x4DED) 1169 errln("Manipulation of test string overwrote guard word!"); 1170 1171 // we have to deinitialize and release the backing store by calling the destructor 1172 // explicitly, since we can't overload operator delete 1173 delete test; 1174 1175 UChar workingBuffer[] = { 1176 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20, 1177 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20, 1178 0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 1181 UChar guardWord2 = 0x4DED; 1182 1183 test = new UnicodeString(workingBuffer, 35, 100); 1184 if (*test != "Now is the time for all men to come") 1185 errln("Stack-allocated backing store failed to initialize correctly."); 1186 if (guardWord2 != 0x4DED) 1187 errln("Stack-allocated backing store overwrote guard word!"); 1188 1189 test->insert(24, "good "); 1190 if (*test != "Now is the time for all good men to come") 1191 errln("insert() on stack-allocated UnicodeString didn't work right"); 1192 if (guardWord2 != 0x4DED) 1193 errln("insert() on stack-allocated UnicodeString overwrote guard word!"); 1194 1195 if (workingBuffer[24] != 0x67) 1196 errln("insert() on stack-allocated UnicodeString didn't affect backing store"); 1197 1198 *test += " to the aid of their country."; 1199 if (*test != "Now is the time for all good men to come to the aid of their country.") 1200 errln("Stack-allocated UnicodeString overflow didn't work"); 1201 if (guardWord2 != 0x4DED) 1202 errln("Stack-allocated UnicodeString overflow overwrote guard word!"); 1203 1204 *test = "ha!"; 1205 if (*test != "ha!") 1206 errln("Assignment to stack-allocated UnicodeString didn't work"); 1207 if (workingBuffer[0] != 0x4e) 1208 errln("Change to UnicodeString after overflow are still affecting original buffer"); 1209 if (guardWord2 != 0x4DED) 1210 errln("Change to UnicodeString after overflow overwrote guard word!"); 1211 1212 // test read-only aliasing with setTo() 1213 workingBuffer[0] = 0x20ac; 1214 workingBuffer[1] = 0x125; 1215 workingBuffer[2] = 0; 1216 test->setTo(TRUE, workingBuffer, 2); 1217 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) { 1218 errln("UnicodeString.setTo(readonly alias) does not alias correctly"); 1219 } 1220 1221 UnicodeString *c=(UnicodeString *)test->clone(); 1222 1223 workingBuffer[1] = 0x109; 1224 if(test->charAt(1) != 0x109) { 1225 errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer"); 1226 } 1227 1228 if(c->length() != 2 || c->charAt(1) != 0x125) { 1229 errln("clone(alias) did not copy the buffer"); 1230 } 1231 delete c; 1232 1233 test->setTo(TRUE, workingBuffer, -1); 1234 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) { 1235 errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly"); 1236 } 1237 1238 test->setTo(FALSE, workingBuffer, -1); 1239 if(!test->isBogus()) { 1240 errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()"); 1241 } 1242 1243 delete test; 1244 1245 test=new UnicodeString(); 1246 UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000}; 1247 test->setTo(buffer, 4, 10); 1248 if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 || 1249 test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){ 1250 errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test)); 1251 } 1252 delete test; 1253 1254 1255 // test the UChar32 constructor 1256 UnicodeString c32Test((UChar32)0x10ff2a); 1257 if( c32Test.length() != UTF_CHAR_LENGTH(0x10ff2a) || 1258 c32Test.char32At(c32Test.length() - 1) != 0x10ff2a 1259 ) { 1260 errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler"); 1261 } 1262 1263 // test the (new) capacity constructor 1264 UnicodeString capTest(5, (UChar32)0x2a, 5); 1265 if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x2a) || 1266 capTest.char32At(0) != 0x2a || 1267 capTest.char32At(4) != 0x2a 1268 ) { 1269 errln("The UnicodeString capacity constructor does not work with an ASCII filler"); 1270 } 1271 1272 capTest = UnicodeString(5, (UChar32)0x10ff2a, 5); 1273 if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x10ff2a) || 1274 capTest.char32At(0) != 0x10ff2a || 1275 capTest.char32At(4) != 0x10ff2a 1276 ) { 1277 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler"); 1278 } 1279 1280 capTest = UnicodeString(5, (UChar32)0, 0); 1281 if(capTest.length() != 0) { 1282 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler"); 1283 } 1284 } 1285 1286 /** 1287 * Test the unescape() function. 1288 */ 1289 void UnicodeStringTest::TestUnescape(void) { 1290 UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV); 1291 UnicodeString OUT("abc"); 1292 OUT.append((UChar)0x4567); 1293 OUT.append(" "); 1294 OUT.append((UChar)0xA); 1295 OUT.append((UChar)0xD); 1296 OUT.append(" "); 1297 OUT.append((UChar32)0x00101234); 1298 OUT.append("xyz"); 1299 OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b); 1300 UnicodeString result = IN.unescape(); 1301 if (result != OUT) { 1302 errln("FAIL: " + prettify(IN) + ".unescape() -> " + 1303 prettify(result) + ", expected " + 1304 prettify(OUT)); 1305 } 1306 1307 // test that an empty string is returned in case of an error 1308 if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) { 1309 errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string"); 1310 } 1311 } 1312 1313 /* test code point counting functions --------------------------------------- */ 1314 1315 /* reference implementation of UnicodeString::hasMoreChar32Than() */ 1316 static int32_t 1317 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) { 1318 int32_t count=s.countChar32(start, length); 1319 return count>number; 1320 } 1321 1322 /* compare the real function against the reference */ 1323 void 1324 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) { 1325 if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) { 1326 errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n", 1327 start, length, number, s.hasMoreChar32Than(start, length, number)); 1328 } 1329 } 1330 1331 void 1332 UnicodeStringTest::TestCountChar32(void) { 1333 { 1334 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 1335 1336 // test countChar32() 1337 // note that this also calls and tests u_countChar32(length>=0) 1338 if( 1339 s.countChar32()!=4 || 1340 s.countChar32(1)!=4 || 1341 s.countChar32(2)!=3 || 1342 s.countChar32(2, 3)!=2 || 1343 s.countChar32(2, 0)!=0 1344 ) { 1345 errln("UnicodeString::countChar32() failed"); 1346 } 1347 1348 // NUL-terminate the string buffer and test u_countChar32(length=-1) 1349 const UChar *buffer=s.getTerminatedBuffer(); 1350 if( 1351 u_countChar32(buffer, -1)!=4 || 1352 u_countChar32(buffer+1, -1)!=4 || 1353 u_countChar32(buffer+2, -1)!=3 || 1354 u_countChar32(buffer+3, -1)!=3 || 1355 u_countChar32(buffer+4, -1)!=2 || 1356 u_countChar32(buffer+5, -1)!=1 || 1357 u_countChar32(buffer+6, -1)!=0 1358 ) { 1359 errln("u_countChar32(length=-1) failed"); 1360 } 1361 1362 // test u_countChar32() with bad input 1363 if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) { 1364 errln("u_countChar32(bad input) failed (returned non-zero counts)"); 1365 } 1366 } 1367 1368 /* test data and variables for hasMoreChar32Than() */ 1369 static const UChar str[]={ 1370 0x61, 0x62, 0xd800, 0xdc00, 1371 0xd801, 0xdc01, 0x63, 0xd802, 1372 0x64, 0xdc03, 0x65, 0x66, 1373 0xd804, 0xdc04, 0xd805, 0xdc05, 1374 0x67 1375 }; 1376 UnicodeString string(str, LENGTHOF(str)); 1377 int32_t start, length, number; 1378 1379 /* test hasMoreChar32Than() */ 1380 for(length=string.length(); length>=0; --length) { 1381 for(start=0; start<=length; ++start) { 1382 for(number=-1; number<=((length-start)+2); ++number) { 1383 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number); 1384 } 1385 } 1386 } 1387 1388 /* test hasMoreChar32Than() with pinning */ 1389 for(start=-1; start<=string.length()+1; ++start) { 1390 for(number=-1; number<=((string.length()-start)+2); ++number) { 1391 _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number); 1392 } 1393 } 1394 1395 /* test hasMoreChar32Than() with a bogus string */ 1396 string.setToBogus(); 1397 for(length=-1; length<=1; ++length) { 1398 for(start=-1; start<=length; ++start) { 1399 for(number=-1; number<=((length-start)+2); ++number) { 1400 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number); 1401 } 1402 } 1403 } 1404 } 1405 1406 void 1407 UnicodeStringTest::TestBogus() { 1408 UnicodeString test1("This is a test"); 1409 UnicodeString test2("This is a test"); 1410 UnicodeString test3("Me too!"); 1411 1412 // test isBogus() and setToBogus() 1413 if (test1.isBogus() || test2.isBogus() || test3.isBogus()) { 1414 errln("A string returned TRUE for isBogus()!"); 1415 } 1416 1417 // NULL pointers are treated like empty strings 1418 // use other illegal arguments to make a bogus string 1419 test3.setTo(FALSE, test1.getBuffer(), -2); 1420 if(!test3.isBogus()) { 1421 errln("A bogus string returned FALSE for isBogus()!"); 1422 } 1423 if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) { 1424 errln("hashCode() failed"); 1425 } 1426 if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) { 1427 errln("bogus.getBuffer()!=0"); 1428 } 1429 if (test1.indexOf(test3) != -1) { 1430 errln("bogus.indexOf() != -1"); 1431 } 1432 if (test1.lastIndexOf(test3) != -1) { 1433 errln("bogus.lastIndexOf() != -1"); 1434 } 1435 if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) { 1436 errln("caseCompare() doesn't work with bogus strings"); 1437 } 1438 if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) { 1439 errln("compareCodePointOrder() doesn't work with bogus strings"); 1440 } 1441 1442 // verify that non-assignment modifications fail and do not revive a bogus string 1443 test3.setToBogus(); 1444 test3.append((UChar)0x61); 1445 if(!test3.isBogus() || test3.getBuffer()!=0) { 1446 errln("bogus.append('a') worked but must not"); 1447 } 1448 1449 test3.setToBogus(); 1450 test3.findAndReplace(UnicodeString((UChar)0x61), test2); 1451 if(!test3.isBogus() || test3.getBuffer()!=0) { 1452 errln("bogus.findAndReplace() worked but must not"); 1453 } 1454 1455 test3.setToBogus(); 1456 test3.trim(); 1457 if(!test3.isBogus() || test3.getBuffer()!=0) { 1458 errln("bogus.trim() revived bogus but must not"); 1459 } 1460 1461 test3.setToBogus(); 1462 test3.remove(1); 1463 if(!test3.isBogus() || test3.getBuffer()!=0) { 1464 errln("bogus.remove(1) revived bogus but must not"); 1465 } 1466 1467 test3.setToBogus(); 1468 if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) { 1469 errln("bogus.setCharAt(0, 'b') worked but must not"); 1470 } 1471 1472 test3.setToBogus(); 1473 if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) { 1474 errln("bogus.truncate(1) revived bogus but must not"); 1475 } 1476 1477 // verify that assignments revive a bogus string 1478 test3.setToBogus(); 1479 if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) { 1480 errln("bogus.operator=() failed"); 1481 } 1482 1483 test3.setToBogus(); 1484 if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) { 1485 errln("bogus.fastCopyFrom() failed"); 1486 } 1487 1488 test3.setToBogus(); 1489 if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) { 1490 errln("bogus.setTo(UniStr) failed"); 1491 } 1492 1493 test3.setToBogus(); 1494 if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) { 1495 errln("bogus.setTo(UniStr, 0) failed"); 1496 } 1497 1498 test3.setToBogus(); 1499 if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) { 1500 errln("bogus.setTo(UniStr, 0, len) failed"); 1501 } 1502 1503 test3.setToBogus(); 1504 if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) { 1505 errln("bogus.setTo(const UChar *, len) failed"); 1506 } 1507 1508 test3.setToBogus(); 1509 if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) { 1510 errln("bogus.setTo(UChar) failed"); 1511 } 1512 1513 test3.setToBogus(); 1514 if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) { 1515 errln("bogus.setTo(UChar32) failed"); 1516 } 1517 1518 test3.setToBogus(); 1519 if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) { 1520 errln("bogus.setTo(readonly alias) failed"); 1521 } 1522 1523 // writable alias to another string's buffer: very bad idea, just convenient for this test 1524 test3.setToBogus(); 1525 if(!test3.isBogus() || test3.setTo((UChar *)test1.getBuffer(), test1.length(), test1.getCapacity()).isBogus() || test3!=test1) { 1526 errln("bogus.setTo(writable alias) failed"); 1527 } 1528 1529 // verify simple, documented ways to turn a bogus string into an empty one 1530 test3.setToBogus(); 1531 if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) { 1532 errln("bogus.operator=(UnicodeString()) failed"); 1533 } 1534 1535 test3.setToBogus(); 1536 if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) { 1537 errln("bogus.setTo(UnicodeString()) failed"); 1538 } 1539 1540 test3.setToBogus(); 1541 if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) { 1542 errln("bogus.remove() failed"); 1543 } 1544 1545 test3.setToBogus(); 1546 if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) { 1547 errln("bogus.remove(0, INT32_MAX) failed"); 1548 } 1549 1550 test3.setToBogus(); 1551 if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) { 1552 errln("bogus.truncate(0) failed"); 1553 } 1554 1555 test3.setToBogus(); 1556 if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) { 1557 errln("bogus.setTo((UChar32)-1) failed"); 1558 } 1559 1560 static const UChar nul=0; 1561 1562 test3.setToBogus(); 1563 if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) { 1564 errln("bogus.setTo(&nul, 0) failed"); 1565 } 1566 1567 test3.setToBogus(); 1568 if(!test3.isBogus() || test3.getBuffer()!=0) { 1569 errln("setToBogus() failed to make a string bogus"); 1570 } 1571 1572 test3.setToBogus(); 1573 if(test1.isBogus() || !(test1=test3).isBogus()) { 1574 errln("normal=bogus failed to make the left string bogus"); 1575 } 1576 1577 // test that NULL primitive input string values are treated like 1578 // empty strings, not errors (bogus) 1579 test2.setTo((UChar32)0x10005); 1580 if(test2.insert(1, NULL, 1).length()!=2) { 1581 errln("UniStr.insert(...NULL...) should not modify the string but does"); 1582 } 1583 1584 UErrorCode errorCode=U_ZERO_ERROR; 1585 UnicodeString 1586 test4((const UChar *)NULL), 1587 test5(TRUE, (const UChar *)NULL, 1), 1588 test6((UChar *)NULL, 5, 5), 1589 test7((const char *)NULL, 3, NULL, errorCode); 1590 if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) { 1591 errln("a constructor set to bogus for a NULL input string, should be empty"); 1592 } 1593 1594 test4.setTo(NULL, 3); 1595 test5.setTo(TRUE, (const UChar *)NULL, 1); 1596 test6.setTo((UChar *)NULL, 5, 5); 1597 if(test4.isBogus() || test5.isBogus() || test6.isBogus()) { 1598 errln("a setTo() set to bogus for a NULL input string, should be empty"); 1599 } 1600 1601 // test that bogus==bogus<any 1602 if(test1!=test3 || test1.compare(test3)!=0) { 1603 errln("bogus==bogus failed"); 1604 } 1605 1606 test2.remove(); 1607 if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) { 1608 errln("bogus<empty failed"); 1609 } 1610 } 1611 1612 // StringEnumeration ------------------------------------------------------- *** 1613 // most of StringEnumeration is tested elsewhere 1614 // this test improves code coverage 1615 1616 static const char *const 1617 testEnumStrings[]={ 1618 "a", 1619 "b", 1620 "c", 1621 "this is a long string which helps us test some buffer limits", 1622 "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee" 1623 }; 1624 1625 class TestEnumeration : public StringEnumeration { 1626 public: 1627 TestEnumeration() : i(0) {} 1628 1629 virtual int32_t count(UErrorCode& /*status*/) const { 1630 return LENGTHOF(testEnumStrings); 1631 } 1632 1633 virtual const UnicodeString *snext(UErrorCode &status) { 1634 if(U_SUCCESS(status) && i<LENGTHOF(testEnumStrings)) { 1635 unistr=UnicodeString(testEnumStrings[i++], ""); 1636 return &unistr; 1637 } 1638 1639 return NULL; 1640 } 1641 1642 virtual void reset(UErrorCode& /*status*/) { 1643 i=0; 1644 } 1645 1646 static inline UClassID getStaticClassID() { 1647 return (UClassID)&fgClassID; 1648 } 1649 virtual UClassID getDynamicClassID() const { 1650 return getStaticClassID(); 1651 } 1652 1653 private: 1654 static const char fgClassID; 1655 1656 int32_t i, length; 1657 }; 1658 1659 const char TestEnumeration::fgClassID=0; 1660 1661 void 1662 UnicodeStringTest::TestStringEnumeration() { 1663 UnicodeString s; 1664 TestEnumeration ten; 1665 int32_t i, length; 1666 UErrorCode status; 1667 1668 const UChar *pu; 1669 const char *pc; 1670 1671 // test the next() default implementation and ensureCharsCapacity() 1672 for(i=0; i<LENGTHOF(testEnumStrings); ++i) { 1673 status=U_ZERO_ERROR; 1674 pc=ten.next(&length, status); 1675 s=UnicodeString(testEnumStrings[i], ""); 1676 if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) { 1677 errln("StringEnumeration.next(%d) failed", i); 1678 } 1679 } 1680 status=U_ZERO_ERROR; 1681 if(ten.next(&length, status)!=NULL) { 1682 errln("StringEnumeration.next(done)!=NULL"); 1683 } 1684 1685 // test the unext() default implementation 1686 ten.reset(status); 1687 for(i=0; i<LENGTHOF(testEnumStrings); ++i) { 1688 status=U_ZERO_ERROR; 1689 pu=ten.unext(&length, status); 1690 s=UnicodeString(testEnumStrings[i], ""); 1691 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) { 1692 errln("StringEnumeration.unext(%d) failed", i); 1693 } 1694 } 1695 status=U_ZERO_ERROR; 1696 if(ten.unext(&length, status)!=NULL) { 1697 errln("StringEnumeration.unext(done)!=NULL"); 1698 } 1699 1700 // test that the default clone() implementation works, and returns NULL 1701 if(ten.clone()!=NULL) { 1702 errln("StringEnumeration.clone()!=NULL"); 1703 } 1704 1705 // test that uenum_openFromStringEnumeration() works 1706 // Need a heap allocated string enumeration because it is adopted by the UEnumeration. 1707 StringEnumeration *newTen = new TestEnumeration; 1708 status=U_ZERO_ERROR; 1709 UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status); 1710 if (uten==NULL || U_FAILURE(status)) { 1711 errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status)); 1712 return; 1713 } 1714 1715 // test uenum_next() 1716 for(i=0; i<LENGTHOF(testEnumStrings); ++i) { 1717 status=U_ZERO_ERROR; 1718 pc=uenum_next(uten, &length, &status); 1719 if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) { 1720 errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i); 1721 } 1722 } 1723 status=U_ZERO_ERROR; 1724 if(uenum_next(uten, &length, &status)!=NULL) { 1725 errln("File %s, line %d, uenum_next(done)!=NULL"); 1726 } 1727 1728 // test the uenum_unext() 1729 uenum_reset(uten, &status); 1730 for(i=0; i<LENGTHOF(testEnumStrings); ++i) { 1731 status=U_ZERO_ERROR; 1732 pu=uenum_unext(uten, &length, &status); 1733 s=UnicodeString(testEnumStrings[i], ""); 1734 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) { 1735 errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i); 1736 } 1737 } 1738 status=U_ZERO_ERROR; 1739 if(uenum_unext(uten, &length, &status)!=NULL) { 1740 errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__); 1741 } 1742 1743 uenum_close(uten); 1744 } 1745 1746 void 1747 UnicodeStringTest::TestCharString() { 1748 static const char originalCStr[] = 1749 "This is a large string that is meant to over flow the internal buffer of CharString. At the time of writing this test, the internal buffer is 128 bytes."; 1750 CharString chStr(originalCStr); 1751 if (strcmp(originalCStr, chStr) != 0) { 1752 errln("CharString doesn't work with large strings."); 1753 } 1754 } 1755 1756 /* 1757 * Namespace test, to make sure that macros like UNICODE_STRING include the 1758 * namespace qualifier. 1759 * 1760 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity. 1761 */ 1762 #if U_HAVE_NAMESPACE 1763 namespace bogus { 1764 class UnicodeString { 1765 public: 1766 enum EInvariant { kInvariant }; 1767 UnicodeString() : i(1) {} 1768 UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {} 1769 UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/ 1770 ) : i(length) {} 1771 private: 1772 int32_t i; 1773 }; 1774 } 1775 #endif 1776 1777 void 1778 UnicodeStringTest::TestNameSpace() { 1779 #if U_HAVE_NAMESPACE 1780 // Provoke name collision unless the UnicodeString macros properly 1781 // qualify the icu::UnicodeString class. 1782 using namespace bogus; 1783 1784 // Use all UnicodeString macros from unistr.h. 1785 icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV); 1786 icu::UnicodeString s2=UNICODE_STRING("def", 3); 1787 icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi"); 1788 1789 // Make sure the compiler does not optimize away instantiation of s1, s2, s3. 1790 icu::UnicodeString s4=s1+s2+s3; 1791 if(s4.length()!=9) { 1792 errln("Something wrong with UnicodeString::operator+()."); 1793 } 1794 #endif 1795 } 1796 1797 void 1798 UnicodeStringTest::TestUTF32() { 1799 // Input string length US_STACKBUF_SIZE to cause overflow of the 1800 // initially chosen fStackBuffer due to supplementary characters. 1801 static const UChar32 utf32[] = { 1802 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 1803 0x10000, 0x20000, 0xe0000, 0x10ffff 1804 }; 1805 static const UChar expected_utf16[] = { 1806 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 1807 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff 1808 }; 1809 UnicodeString from32 = UnicodeString::fromUTF32(utf32, LENGTHOF(utf32)); 1810 UnicodeString expected(FALSE, expected_utf16, LENGTHOF(expected_utf16)); 1811 if(from32 != expected) { 1812 errln("UnicodeString::fromUTF32() did not create the expected string."); 1813 } 1814 1815 static const UChar utf16[] = { 1816 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff 1817 }; 1818 static const UChar32 expected_utf32[] = { 1819 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff 1820 }; 1821 UChar32 result32[16]; 1822 UErrorCode errorCode = U_ZERO_ERROR; 1823 int32_t length32 = 1824 UnicodeString(FALSE, utf16, LENGTHOF(utf16)). 1825 toUTF32(result32, LENGTHOF(result32), errorCode); 1826 if( length32 != LENGTHOF(expected_utf32) || 1827 0 != uprv_memcmp(result32, expected_utf32, length32*4) || 1828 result32[length32] != 0 1829 ) { 1830 errln("UnicodeString::toUTF32() did not create the expected string."); 1831 } 1832 } 1833 1834 void 1835 UnicodeStringTest::TestUTF8() { 1836 static const uint8_t utf8[] = { 1837 // Code points: 1838 // 0x41, 0xd900, 1839 // 0x61, 0xdc00, 1840 // 0x110000, 0x5a, 1841 // 0x50000, 0x7a, 1842 // 0x10000, 0x20000, 1843 // 0xe0000, 0x10ffff 1844 0x41, 0xed, 0xa4, 0x80, 1845 0x61, 0xed, 0xb0, 0x80, 1846 0xf4, 0x90, 0x80, 0x80, 0x5a, 1847 0xf1, 0x90, 0x80, 0x80, 0x7a, 1848 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80, 1849 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf 1850 }; 1851 static const UChar expected_utf16[] = { 1852 0x41, 0xfffd, 1853 0x61, 0xfffd, 1854 0xfffd, 0x5a, 1855 0xd900, 0xdc00, 0x7a, 1856 0xd800, 0xdc00, 0xd840, 0xdc00, 1857 0xdb40, 0xdc00, 0xdbff, 0xdfff 1858 }; 1859 UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8))); 1860 UnicodeString expected(FALSE, expected_utf16, LENGTHOF(expected_utf16)); 1861 1862 if(from8 != expected) { 1863 errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string."); 1864 } 1865 #if U_HAVE_STD_STRING 1866 U_STD_NSQ string utf8_string((const char *)utf8, sizeof(utf8)); 1867 UnicodeString from8b = UnicodeString::fromUTF8(utf8_string); 1868 if(from8b != expected) { 1869 errln("UnicodeString::fromUTF8(std::string) did not create the expected string."); 1870 } 1871 #endif 1872 1873 static const UChar utf16[] = { 1874 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff 1875 }; 1876 static const uint8_t expected_utf8[] = { 1877 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a, 1878 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf 1879 }; 1880 UnicodeString us(FALSE, utf16, LENGTHOF(utf16)); 1881 1882 char buffer[64]; 1883 CheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer)); 1884 us.toUTF8(sink); 1885 if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) || 1886 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8)) 1887 ) { 1888 errln("UnicodeString::toUTF8() did not create the expected string."); 1889 } 1890 #if U_HAVE_STD_STRING 1891 // Initial contents for testing that toUTF8String() appends. 1892 U_STD_NSQ string result8 = "-->"; 1893 U_STD_NSQ string expected8 = "-->" + U_STD_NSQ string((const char *)expected_utf8, sizeof(expected_utf8)); 1894 // Use the return value just for testing. 1895 U_STD_NSQ string &result8r = us.toUTF8String(result8); 1896 if(result8r != expected8 || &result8r != &result8) { 1897 errln("UnicodeString::toUTF8String() did not create the expected string."); 1898 } 1899 #endif 1900 } 1901 1902 // Test if this compiler supports Return Value Optimization of unnamed temporary objects. 1903 static UnicodeString wrapUChars(const UChar *uchars) { 1904 return UnicodeString(TRUE, uchars, -1); 1905 } 1906 1907 void 1908 UnicodeStringTest::TestReadOnlyAlias() { 1909 UChar uchars[]={ 0x61, 0x62, 0 }; 1910 UnicodeString alias(TRUE, uchars, 2); 1911 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) { 1912 errln("UnicodeString read-only-aliasing constructor does not behave as expected."); 1913 return; 1914 } 1915 alias.truncate(1); 1916 if(alias.length()!=1 || alias.getBuffer()!=uchars) { 1917 errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected."); 1918 } 1919 if(alias.getTerminatedBuffer()==uchars) { 1920 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() " 1921 "did not allocate and copy as expected."); 1922 } 1923 if(uchars[1]!=0x62) { 1924 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() " 1925 "modified the original buffer."); 1926 } 1927 if(1!=u_strlen(alias.getTerminatedBuffer())) { 1928 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() " 1929 "does not return a buffer terminated at the proper length."); 1930 } 1931 1932 alias.setTo(TRUE, uchars, 2); 1933 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) { 1934 errln("UnicodeString read-only-aliasing setTo() does not behave as expected."); 1935 return; 1936 } 1937 alias.remove(); 1938 if(alias.length()!=0) { 1939 errln("UnicodeString(read-only-alias).remove() did not work."); 1940 } 1941 if(alias.getTerminatedBuffer()==uchars) { 1942 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() " 1943 "did not un-alias as expected."); 1944 } 1945 if(uchars[0]!=0x61) { 1946 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() " 1947 "modified the original buffer."); 1948 } 1949 if(0!=u_strlen(alias.getTerminatedBuffer())) { 1950 errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() " 1951 "does not return a buffer terminated at length 0."); 1952 } 1953 1954 UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789"); 1955 alias.setTo(FALSE, longString.getBuffer(), longString.length()); 1956 alias.remove(0, 10); 1957 if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) { 1958 errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected."); 1959 } 1960 alias.setTo(FALSE, longString.getBuffer(), longString.length()); 1961 alias.remove(27, 99); 1962 if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) { 1963 errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected."); 1964 } 1965 alias.setTo(FALSE, longString.getBuffer(), longString.length()); 1966 alias.retainBetween(6, 30); 1967 if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) { 1968 errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected."); 1969 } 1970 1971 UChar abc[]={ 0x61, 0x62, 0x63, 0 }; 1972 UBool hasRVO= wrapUChars(abc).getBuffer()==abc; 1973 1974 UnicodeString temp; 1975 temp.fastCopyFrom(longString.tempSubString()); 1976 if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) { 1977 errln("UnicodeString.tempSubString() failed"); 1978 } 1979 temp.fastCopyFrom(longString.tempSubString(-3, 5)); 1980 if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) { 1981 errln("UnicodeString.tempSubString(-3, 5) failed"); 1982 } 1983 temp.fastCopyFrom(longString.tempSubString(17)); 1984 if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) { 1985 errln("UnicodeString.tempSubString(17) failed"); 1986 } 1987 temp.fastCopyFrom(longString.tempSubString(99)); 1988 if(!temp.isEmpty()) { 1989 errln("UnicodeString.tempSubString(99) failed"); 1990 } 1991 temp.fastCopyFrom(longString.tempSubStringBetween(6)); 1992 if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) { 1993 errln("UnicodeString.tempSubStringBetween(6) failed"); 1994 } 1995 temp.fastCopyFrom(longString.tempSubStringBetween(8, 18)); 1996 if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) { 1997 errln("UnicodeString.tempSubStringBetween(8, 18) failed"); 1998 } 1999 UnicodeString bogusString; 2000 bogusString.setToBogus(); 2001 temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18)); 2002 if(!temp.isBogus()) { 2003 errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed"); 2004 } 2005 } 2006