1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 #include "ustrtest.h" 8 #include "unicode/std_string.h" 9 #include "unicode/unistr.h" 10 #include "unicode/uchar.h" 11 #include "unicode/ustring.h" 12 #include "unicode/locid.h" 13 #include "unicode/ucnv.h" 14 #include "unicode/uenum.h" 15 #include "cmemory.h" 16 #include "charstr.h" 17 18 #if 0 19 #include "unicode/ustream.h" 20 21 #if U_IOSTREAM_SOURCE >= 199711 22 #include <iostream> 23 using namespace std; 24 #elif U_IOSTREAM_SOURCE >= 198506 25 #include <iostream.h> 26 #endif 27 28 #endif 29 30 #define LENGTHOF(array) (int32_t)((sizeof(array)/sizeof((array)[0]))) 31 32 UnicodeStringTest::~UnicodeStringTest() {} 33 34 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par) 35 { 36 if (exec) logln("TestSuite UnicodeStringTest: "); 37 switch (index) { 38 case 0: 39 name = "StringCaseTest"; 40 if (exec) { 41 logln("StringCaseTest---"); logln(""); 42 StringCaseTest test; 43 callTest(test, par); 44 } 45 break; 46 case 1: name = "TestBasicManipulation"; if (exec) TestBasicManipulation(); break; 47 case 2: name = "TestCompare"; if (exec) TestCompare(); break; 48 case 3: name = "TestExtract"; if (exec) TestExtract(); break; 49 case 4: name = "TestRemoveReplace"; if (exec) TestRemoveReplace(); break; 50 case 5: name = "TestSearching"; if (exec) TestSearching(); break; 51 case 6: name = "TestSpacePadding"; if (exec) TestSpacePadding(); break; 52 case 7: name = "TestPrefixAndSuffix"; if (exec) TestPrefixAndSuffix(); break; 53 case 8: name = "TestFindAndReplace"; if (exec) TestFindAndReplace(); break; 54 case 9: name = "TestBogus"; if (exec) TestBogus(); break; 55 case 10: name = "TestReverse"; if (exec) TestReverse(); break; 56 case 11: name = "TestMiscellaneous"; if (exec) TestMiscellaneous(); break; 57 case 12: name = "TestStackAllocation"; if (exec) TestStackAllocation(); break; 58 case 13: name = "TestUnescape"; if (exec) TestUnescape(); break; 59 case 14: name = "TestCountChar32"; if (exec) TestCountChar32(); break; 60 case 15: name = "TestStringEnumeration"; if (exec) TestStringEnumeration(); break; 61 case 16: name = "TestCharString"; if (exec) TestCharString(); break; 62 case 17: name = "TestNameSpace"; if (exec) TestNameSpace(); break; 63 case 18: name = "TestUTF32"; if (exec) TestUTF32(); break; 64 case 19: name = "TestUTF8"; if (exec) TestUTF8(); break; 65 66 default: name = ""; break; //needed to end loop 67 } 68 } 69 70 void 71 UnicodeStringTest::TestBasicManipulation() 72 { 73 UnicodeString test1("Now is the time for all men to come swiftly to the aid of the party.\n"); 74 UnicodeString expectedValue; 75 UnicodeString *c; 76 77 c=(UnicodeString *)test1.clone(); 78 test1.insert(24, "good "); 79 expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n"; 80 if (test1 != expectedValue) 81 errln("insert() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\""); 82 83 c->insert(24, "good "); 84 if(*c != expectedValue) { 85 errln("clone()->insert() failed: expected \"" + expectedValue + "\"\n,got \"" + *c + "\""); 86 } 87 delete c; 88 89 test1.remove(41, 8); 90 expectedValue = "Now is the time for all good men to come to the aid of the party.\n"; 91 if (test1 != expectedValue) 92 errln("remove() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\""); 93 94 test1.replace(58, 6, "ir country"); 95 expectedValue = "Now is the time for all good men to come to the aid of their country.\n"; 96 if (test1 != expectedValue) 97 errln("replace() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\""); 98 99 UChar temp[80]; 100 test1.extract(0, 15, temp); 101 102 UnicodeString test2(temp, 15); 103 104 expectedValue = "Now is the time"; 105 if (test2 != expectedValue) 106 errln("extract() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\""); 107 108 test2 += " for me to go!\n"; 109 expectedValue = "Now is the time for me to go!\n"; 110 if (test2 != expectedValue) 111 errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\""); 112 113 if (test1.length() != 70) 114 errln("length() failed: expected 70, got " + test1.length()); 115 if (test2.length() != 30) 116 errln("length() failed: expected 30, got " + test2.length()); 117 118 UnicodeString test3; 119 test3.append((UChar32)0x20402); 120 if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){ 121 errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3)); 122 } 123 if(test3.length() != 2){ 124 errln("append or length failed for UChar32, expected 2, got " + test3.length()); 125 } 126 test3.append((UChar32)0x0074); 127 if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){ 128 errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3)); 129 } 130 if(test3.length() != 3){ 131 errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length()); 132 } 133 134 // test some UChar32 overloads 135 if( test3.setTo((UChar32)0x10330).length() != 2 || 136 test3.insert(0, (UChar32)0x20100).length() != 4 || 137 test3.replace(2, 2, (UChar32)0xe0061).length() != 4 || 138 (test3 = (UChar32)0x14001).length() != 2 139 ) { 140 errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed"); 141 } 142 143 { 144 // test moveIndex32() 145 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 146 147 if( 148 s.moveIndex32(2, -1)!=0 || 149 s.moveIndex32(2, 1)!=4 || 150 s.moveIndex32(2, 2)!=5 || 151 s.moveIndex32(5, -2)!=2 || 152 s.moveIndex32(0, -1)!=0 || 153 s.moveIndex32(6, 1)!=6 154 ) { 155 errln("UnicodeString::moveIndex32() failed"); 156 } 157 158 if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) { 159 errln("UnicodeString::getChar32Start() failed"); 160 } 161 162 if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) { 163 errln("UnicodeString::getChar32Limit() failed"); 164 } 165 } 166 167 { 168 // test new 2.2 constructors and setTo function that parallel Java's substring function. 169 UnicodeString src("Hello folks how are you?"); 170 UnicodeString target1("how are you?"); 171 if (target1 != UnicodeString(src, 12)) { 172 errln("UnicodeString(const UnicodeString&, int32_t) failed"); 173 } 174 UnicodeString target2("folks"); 175 if (target2 != UnicodeString(src, 6, 5)) { 176 errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed"); 177 } 178 if (target1 != target2.setTo(src, 12)) { 179 errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed"); 180 } 181 } 182 183 { 184 // op+ is new in ICU 2.8 185 UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", ""); 186 if(s!=UnicodeString("abcdefghi", "")) { 187 errln("operator+(UniStr, UniStr) failed"); 188 } 189 } 190 191 { 192 // tests for Jitterbug 2360 193 // verify that APIs with source pointer + length accept length == -1 194 // mostly test only where modified, only few functions did not already do this 195 if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) { 196 errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1"); 197 } 198 199 UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff }; 200 UnicodeString s, t(buffer, -1, LENGTHOF(buffer)); 201 202 if(s.setTo(buffer, -1, LENGTHOF(buffer)).length()!=u_strlen(buffer)) { 203 errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1"); 204 } 205 if(t.length()!=u_strlen(buffer)) { 206 errln("UnicodeString(buffer, length, capacity) does not work with length==-1"); 207 } 208 209 if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) { 210 errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1"); 211 } 212 if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) { 213 errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work"); 214 } 215 216 buffer[u_strlen(buffer)]=0xe4; 217 UnicodeString u(buffer, -1, LENGTHOF(buffer)); 218 if(s.setTo(buffer, -1, LENGTHOF(buffer)).length()!=LENGTHOF(buffer)) { 219 errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1"); 220 } 221 if(u.length()!=LENGTHOF(buffer)) { 222 errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1"); 223 } 224 225 static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 }; 226 UConverter *cnv; 227 UErrorCode errorCode=U_ZERO_ERROR; 228 229 cnv=ucnv_open("ISO-8859-1", &errorCode); 230 UnicodeString v(cs, -1, cnv, errorCode); 231 ucnv_close(cnv); 232 if(v!=CharsToUnicodeString("a\\xe4\\x85")) { 233 errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1"); 234 } 235 } 236 237 #if U_CHARSET_IS_UTF8 238 { 239 // Test the hardcoded-UTF-8 UnicodeString optimizations. 240 static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 }; 241 static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 }; 242 UnicodeString from8a = UnicodeString((const char *)utf8); 243 UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1); 244 UnicodeString from16(FALSE, utf16, LENGTHOF(utf16)); 245 if(from8a != from16 || from8b != from16) { 246 errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed"); 247 } 248 char buffer[16]; 249 int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer)); 250 if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) { 251 errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed"); 252 } 253 length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer)); 254 if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) { 255 errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed"); 256 } 257 } 258 #endif 259 } 260 261 void 262 UnicodeStringTest::TestCompare() 263 { 264 UnicodeString test1("this is a test"); 265 UnicodeString test2("this is a test"); 266 UnicodeString test3("this is a test of the emergency broadcast system"); 267 UnicodeString test4("never say, \"this is a test\"!!"); 268 269 UnicodeString test5((UChar)0x5000); 270 UnicodeString test6((UChar)0x5100); 271 272 UChar uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 273 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 }; 274 char chars[] = "this is a test"; 275 276 // test operator== and operator!= 277 if (test1 != test2 || test1 == test3 || test1 == test4) 278 errln("operator== or operator!= failed"); 279 280 // test operator> and operator< 281 if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) || 282 !(test5 < test6) 283 ) { 284 errln("operator> or operator< failed"); 285 } 286 287 // test operator>= and operator<= 288 if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4)) 289 errln("operator>= or operator<= failed"); 290 291 // test compare(UnicodeString) 292 if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0) 293 errln("compare(UnicodeString) failed"); 294 295 //test compare(offset, length, UnicodeString) 296 if(test1.compare(0, 14, test2) != 0 || 297 test3.compare(0, 14, test2) != 0 || 298 test4.compare(12, 14, test2) != 0 || 299 test3.compare(0, 18, test1) <=0 ) 300 errln("compare(offset, length, UnicodeString) failes"); 301 302 // test compare(UChar*) 303 if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0) 304 errln("compare(UChar*) failed"); 305 306 // test compare(char*) 307 if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0) 308 errln("compare(char*) failed"); 309 310 // test compare(UChar*, length) 311 if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0) 312 errln("compare(UChar*, length) failed"); 313 314 // test compare(thisOffset, thisLength, that, thatOffset, thatLength) 315 if (test1.compare(0, 14, test2, 0, 14) != 0 316 || test1.compare(0, 14, test3, 0, 14) != 0 317 || test1.compare(0, 14, test4, 12, 14) != 0) 318 errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed"); 319 320 if (test1.compare(10, 4, test2, 0, 4) >= 0 321 || test1.compare(10, 4, test3, 22, 9) <= 0 322 || test1.compare(10, 4, test4, 22, 4) != 0) 323 errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed"); 324 325 // test compareBetween 326 if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0 327 || test1.compareBetween(0, 14, test4, 12, 26) != 0) 328 errln("compareBetween failed"); 329 330 if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0 331 || test1.compareBetween(10, 14, test4, 22, 26) != 0) 332 errln("compareBetween failed"); 333 334 // test compare() etc. with strings that share a buffer but are not equal 335 test2=test1; // share the buffer, length() too large for the stackBuffer 336 test2.truncate(1); // change only the length, not the buffer 337 if( test1==test2 || test1<=test2 || 338 test1.compare(test2)<=0 || 339 test1.compareCodePointOrder(test2)<=0 || 340 test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 || 341 test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 || 342 test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 || 343 test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0 344 ) { 345 errln("UnicodeStrings that share a buffer but have different lengths compare as equal"); 346 } 347 348 /* test compareCodePointOrder() */ 349 { 350 /* these strings are in ascending order */ 351 static const UChar strings[][4]={ 352 { 0x61, 0 }, /* U+0061 */ 353 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */ 354 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */ 355 { 0xd800, 0 }, /* U+d800 */ 356 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */ 357 { 0xdfff, 0 }, /* U+dfff */ 358 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */ 359 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */ 360 { 0xd800, 0xdc02, 0 }, /* U+10002 */ 361 { 0xd84d, 0xdc56, 0 } /* U+23456 */ 362 }; 363 UnicodeString u[20]; // must be at least as long as strings[] 364 int32_t i; 365 366 for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])); ++i) { 367 u[i]=UnicodeString(TRUE, strings[i], -1); 368 } 369 370 for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])-1); ++i) { 371 if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) { 372 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i); 373 } 374 } 375 } 376 377 /* test caseCompare() */ 378 { 379 static const UChar 380 _mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 }, 381 _otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 }, 382 _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 }, 383 _different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 }; 384 385 UnicodeString 386 mixed(TRUE, _mixed, -1), 387 otherDefault(TRUE, _otherDefault, -1), 388 otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1), 389 different(TRUE, _different, -1); 390 391 int8_t result; 392 393 /* test caseCompare() */ 394 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT); 395 if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) { 396 errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result); 397 } 398 result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I); 399 if(result!=0) { 400 errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result); 401 } 402 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I); 403 if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) { 404 errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n"); 405 } 406 407 /* test caseCompare() */ 408 result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT); 409 if(result<=0) { 410 errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result); 411 } 412 413 /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */ 414 result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT); 415 if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) { 416 errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result); 417 } 418 419 /* test caseCompare() - stop in the middle of the sharp s (U+00df) */ 420 result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT); 421 if(result<=0) { 422 errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result); 423 } 424 } 425 426 // test that srcLength=-1 is handled in functions that 427 // take input const UChar */int32_t srcLength (j785) 428 { 429 static const UChar u[]={ 0x61, 0x308, 0x62, 0 }; 430 UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape(); 431 432 if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) { 433 errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work"); 434 } 435 436 if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) { 437 errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work"); 438 } 439 440 if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) { 441 errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work"); 442 } 443 444 if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) { 445 errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work"); 446 } 447 448 if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) { 449 errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work"); 450 } 451 452 UnicodeString s2, s3; 453 s2.replace(0, 0, u+1, -1); 454 s3.replace(0, 0, u, 1, -1); 455 if(s.compare(1, 999, s2)!=0 || s2!=s3) { 456 errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work"); 457 } 458 } 459 } 460 461 void 462 UnicodeStringTest::TestExtract() 463 { 464 UnicodeString test1("Now is the time for all good men to come to the aid of their country.", ""); 465 UnicodeString test2; 466 UChar test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13}; 467 char test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13}; 468 UnicodeString test5; 469 char test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13}; 470 471 test1.extract(11, 12, test2); 472 test1.extract(11, 12, test3); 473 if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) { 474 errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer."); 475 } 476 477 // test proper pinning in extractBetween() 478 test1.extractBetween(-3, 7, test5); 479 if(test5!=UNICODE_STRING("Now is ", 7)) { 480 errln("UnicodeString.extractBetween(-3, 7) did not pin properly."); 481 } 482 483 test1.extractBetween(11, 23, test5); 484 if (test1.extract(60, 71, test6) != 9) { 485 errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer."); 486 } 487 if (test1.extract(11, 12, test6) != 12) { 488 errln("UnicodeString.extract() failed to return the correct size of destination buffer."); 489 } 490 491 // convert test4 back to Unicode for comparison 492 UnicodeString test4b(test4, 12); 493 494 if (test1.extract(11, 12, (char *)NULL) != 12) { 495 errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer."); 496 } 497 if (test1.extract(11, -1, test6) != 0) { 498 errln("UnicodeString.extract(-1) failed to stop reading the string."); 499 } 500 501 for (int32_t i = 0; i < 12; i++) { 502 if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) { 503 errln(UnicodeString("extracting into a UnicodeString failed at position ") + i); 504 break; 505 } 506 if (test1.charAt((int32_t)(11 + i)) != test3[i]) { 507 errln(UnicodeString("extracting into an array of UChar failed at position ") + i); 508 break; 509 } 510 if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) { 511 errln(UnicodeString("extracting into an array of char failed at position ") + i); 512 break; 513 } 514 if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) { 515 errln(UnicodeString("extracting with extractBetween failed at position ") + i); 516 break; 517 } 518 } 519 520 // test preflighting and overflows with invariant conversion 521 if (test1.extract(0, 10, (char *)NULL, "") != 10) { 522 errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10"); 523 } 524 525 test4[2] = (char)0xff; 526 if (test1.extract(0, 10, test4, 2, "") != 10) { 527 errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10"); 528 } 529 if (test4[2] != (char)0xff) { 530 errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]"); 531 } 532 533 { 534 // test new, NUL-terminating extract() function 535 UnicodeString s("terminate", ""); 536 UChar dest[20]={ 537 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 538 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5 539 }; 540 UErrorCode errorCode; 541 int32_t length; 542 543 errorCode=U_ZERO_ERROR; 544 length=s.extract((UChar *)NULL, 0, errorCode); 545 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) { 546 errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode)); 547 } 548 549 errorCode=U_ZERO_ERROR; 550 length=s.extract(dest, s.length()-1, errorCode); 551 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) { 552 errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", 553 length, u_errorName(errorCode), s.length()); 554 } 555 556 errorCode=U_ZERO_ERROR; 557 length=s.extract(dest, s.length(), errorCode); 558 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) { 559 errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)", 560 length, u_errorName(errorCode), s.length()); 561 } 562 if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) { 563 errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly"); 564 } 565 566 errorCode=U_ZERO_ERROR; 567 length=s.extract(dest, s.length()+1, errorCode); 568 if(errorCode!=U_ZERO_ERROR || length!=s.length()) { 569 errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)", 570 length, u_errorName(errorCode), s.length()); 571 } 572 if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) { 573 errln("UnicodeString.extract(dest large enough) did not extract the string correctly"); 574 } 575 } 576 577 { 578 // test new UConverter extract() and constructor 579 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 580 char buffer[32]; 581 static const char expect[]={ 582 (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99, 583 (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f, 584 (char)0xc3, (char)0x84, 585 (char)0xe1, (char)0xbb, (char)0x90 586 }; 587 UErrorCode errorCode=U_ZERO_ERROR; 588 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 589 int32_t length; 590 591 if(U_SUCCESS(errorCode)) { 592 // test preflighting 593 if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 || 594 errorCode!=U_BUFFER_OVERFLOW_ERROR 595 ) { 596 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)", 597 length, u_errorName(errorCode)); 598 } 599 errorCode=U_ZERO_ERROR; 600 if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 || 601 errorCode!=U_BUFFER_OVERFLOW_ERROR 602 ) { 603 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)", 604 length, u_errorName(errorCode)); 605 } 606 607 // try error cases 608 errorCode=U_ZERO_ERROR; 609 if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) { 610 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination"); 611 } 612 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 613 if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) { 614 errln("UnicodeString::extract(UConverter) succeeded with a previous error code"); 615 } 616 errorCode=U_ZERO_ERROR; 617 618 // extract for real 619 if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 || 620 uprv_memcmp(buffer, expect, 13)!=0 || 621 buffer[13]!=0 || 622 U_FAILURE(errorCode) 623 ) { 624 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)", 625 length, u_errorName(errorCode)); 626 } 627 // Test again with just the converter name. 628 if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 || 629 uprv_memcmp(buffer, expect, 13)!=0 || 630 buffer[13]!=0 || 631 U_FAILURE(errorCode) 632 ) { 633 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)", 634 length, u_errorName(errorCode)); 635 } 636 637 // try the constructor 638 UnicodeString t(expect, sizeof(expect), cnv, errorCode); 639 if(U_FAILURE(errorCode) || s!=t) { 640 errln("UnicodeString(UConverter) conversion failed (%s)", 641 u_errorName(errorCode)); 642 } 643 644 ucnv_close(cnv); 645 } 646 } 647 } 648 649 void 650 UnicodeStringTest::TestRemoveReplace() 651 { 652 UnicodeString test1("The rain in Spain stays mainly on the plain"); 653 UnicodeString test2("eat SPAMburgers!"); 654 UChar test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 }; 655 char test4[] = "SPAM"; 656 UnicodeString& test5 = test1; 657 658 test1.replace(4, 4, test2, 4, 4); 659 test1.replace(12, 5, test3, 4); 660 test3[4] = 0; 661 test1.replace(17, 4, test3); 662 test1.replace(23, 4, test4); 663 test1.replaceBetween(37, 42, test2, 4, 8); 664 665 if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM") 666 errln("One of the replace methods failed:\n" 667 " expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n" 668 " got \"" + test1 + "\""); 669 670 test1.remove(21, 1); 671 test1.removeBetween(26, 28); 672 673 if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM") 674 errln("One of the remove methods failed:\n" 675 " expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n" 676 " got \"" + test1 + "\""); 677 678 for (int32_t i = 0; i < test1.length(); i++) { 679 if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) { 680 test1.setCharAt(i, 0x78); 681 } 682 } 683 684 if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM") 685 errln("One of the remove methods failed:\n" 686 " expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n" 687 " got \"" + test1 + "\""); 688 689 test1.remove(); 690 if (test1.length() != 0) 691 errln("Remove() failed: expected empty string, got \"" + test1 + "\""); 692 } 693 694 void 695 UnicodeStringTest::TestSearching() 696 { 697 UnicodeString test1("test test ttest tetest testesteststt"); 698 UnicodeString test2("test"); 699 UChar testChar = 0x74; 700 701 UChar32 testChar32 = 0x20402; 702 UChar testData[]={ 703 // 0 1 2 3 4 5 6 7 704 0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02, 705 706 // 8 9 10 11 12 13 14 15 707 0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071, 708 709 // 16 17 18 19 710 0xdc02, 0xd841, 0x0073, 0x0000 711 }; 712 UnicodeString test3(testData); 713 UnicodeString test4(testChar32); 714 715 uint16_t occurrences = 0; 716 int32_t startPos = 0; 717 for ( ; 718 startPos != -1 && startPos < test1.length(); 719 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0) 720 ; 721 if (occurrences != 6) 722 errln("indexOf failed: expected to find 6 occurrences, found " + occurrences); 723 724 for ( occurrences = 0, startPos = 10; 725 startPos != -1 && startPos < test1.length(); 726 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0) 727 ; 728 if (occurrences != 4) 729 errln("indexOf with starting offset failed: expected to find 4 occurrences, found " + occurrences); 730 731 int32_t endPos = 28; 732 for ( occurrences = 0, startPos = 5; 733 startPos != -1 && startPos < test1.length(); 734 (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0) 735 ; 736 if (occurrences != 4) 737 errln("indexOf with starting and ending offsets failed: expected to find 4 occurrences, found " + occurrences); 738 739 //using UChar32 string 740 for ( startPos=0, occurrences=0; 741 startPos != -1 && startPos < test3.length(); 742 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0) 743 ; 744 if (occurrences != 4) 745 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences); 746 747 for ( startPos=10, occurrences=0; 748 startPos != -1 && startPos < test3.length(); 749 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0) 750 ; 751 if (occurrences != 2) 752 errln("indexOf failed: expected to find 2 occurrences, found " + occurrences); 753 //--- 754 755 for ( occurrences = 0, startPos = 0; 756 startPos != -1 && startPos < test1.length(); 757 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 758 ; 759 if (occurrences != 16) 760 errln("indexOf with character failed: expected to find 16 occurrences, found " + occurrences); 761 762 for ( occurrences = 0, startPos = 10; 763 startPos != -1 && startPos < test1.length(); 764 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 765 ; 766 if (occurrences != 12) 767 errln("indexOf with character & start offset failed: expected to find 12 occurrences, found " + occurrences); 768 769 for ( occurrences = 0, startPos = 5, endPos = 28; 770 startPos != -1 && startPos < test1.length(); 771 (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 772 ; 773 if (occurrences != 10) 774 errln("indexOf with character & start & end offsets failed: expected to find 10 occurrences, found " + occurrences); 775 776 //testing for UChar32 777 UnicodeString subString; 778 for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){ 779 subString.append(test3, startPos, test3.length()); 780 if(subString.indexOf(testChar32) != -1 ){ 781 ++occurrences; 782 } 783 subString.remove(); 784 } 785 if (occurrences != 14) 786 errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences); 787 788 for ( occurrences = 0, startPos = 0; 789 startPos != -1 && startPos < test3.length(); 790 (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 791 ; 792 if (occurrences != 4) 793 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences); 794 795 endPos=test3.length(); 796 for ( occurrences = 0, startPos = 5; 797 startPos != -1 && startPos < test3.length(); 798 (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0) 799 ; 800 if (occurrences != 3) 801 errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences); 802 //--- 803 804 if(test1.lastIndexOf(test2)!=29) { 805 errln("test1.lastIndexOf(test2)!=29"); 806 } 807 808 if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) { 809 errln("test1.lastIndexOf(test2, start) failed"); 810 } 811 812 for ( occurrences = 0, startPos = 32; 813 startPos != -1; 814 (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0) 815 ; 816 if (occurrences != 4) 817 errln("lastIndexOf with starting and ending offsets failed: expected to find 4 occurrences, found " + occurrences); 818 819 for ( occurrences = 0, startPos = 32; 820 startPos != -1; 821 (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0) 822 ; 823 if (occurrences != 11) 824 errln("lastIndexOf with character & start & end offsets failed: expected to find 11 occurrences, found " + occurrences); 825 826 //testing UChar32 827 startPos=test3.length(); 828 for ( occurrences = 0; 829 startPos != -1; 830 (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0) 831 ; 832 if (occurrences != 3) 833 errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences); 834 835 836 for ( occurrences = 0, endPos = test3.length(); endPos > 0; endPos -= 1){ 837 subString.remove(); 838 subString.append(test3, 0, endPos); 839 if(subString.lastIndexOf(testChar32) != -1 ){ 840 ++occurrences; 841 } 842 } 843 if (occurrences != 18) 844 errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences); 845 //--- 846 847 // test that indexOf(UChar32) and lastIndexOf(UChar32) 848 // do not find surrogate code points when they are part of matched pairs 849 // (= part of supplementary code points) 850 // Jitterbug 1542 851 if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) { 852 errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point"); 853 } 854 if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 || 855 UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 || 856 test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16 857 ) { 858 errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point"); 859 } 860 } 861 862 void 863 UnicodeStringTest::TestSpacePadding() 864 { 865 UnicodeString test1("hello"); 866 UnicodeString test2(" there"); 867 UnicodeString test3("Hi! How ya doin'? Beautiful day, isn't it?"); 868 UnicodeString test4; 869 UBool returnVal; 870 UnicodeString expectedValue; 871 872 returnVal = test1.padLeading(15); 873 expectedValue = " hello"; 874 if (returnVal == FALSE || test1 != expectedValue) 875 errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 876 877 returnVal = test2.padTrailing(15); 878 expectedValue = " there "; 879 if (returnVal == FALSE || test2 != expectedValue) 880 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\"."); 881 882 expectedValue = test3; 883 returnVal = test3.padTrailing(15); 884 if (returnVal == TRUE || test3 != expectedValue) 885 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\"."); 886 887 expectedValue = "hello"; 888 test4.setTo(test1).trim(); 889 890 if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue) 891 errln("trim(UnicodeString&) failed"); 892 893 test1.trim(); 894 if (test1 != expectedValue) 895 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 896 897 test2.trim(); 898 expectedValue = "there"; 899 if (test2 != expectedValue) 900 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\"."); 901 902 test3.trim(); 903 expectedValue = "Hi! How ya doin'? Beautiful day, isn't it?"; 904 if (test3 != expectedValue) 905 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\"."); 906 907 returnVal = test1.truncate(15); 908 expectedValue = "hello"; 909 if (returnVal == TRUE || test1 != expectedValue) 910 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 911 912 returnVal = test2.truncate(15); 913 expectedValue = "there"; 914 if (returnVal == TRUE || test2 != expectedValue) 915 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\"."); 916 917 returnVal = test3.truncate(15); 918 expectedValue = "Hi! How ya doi"; 919 if (returnVal == FALSE || test3 != expectedValue) 920 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\"."); 921 } 922 923 void 924 UnicodeStringTest::TestPrefixAndSuffix() 925 { 926 UnicodeString test1("Now is the time for all good men to come to the aid of their country."); 927 UnicodeString test2("Now"); 928 UnicodeString test3("country."); 929 UnicodeString test4("count"); 930 931 if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) { 932 errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\"."); 933 } 934 935 if (test1.startsWith(test3) || 936 test1.startsWith(test3.getBuffer(), test3.length()) || 937 test1.startsWith(test3.getTerminatedBuffer(), 0, -1) 938 ) { 939 errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\"."); 940 } 941 942 if (test1.endsWith(test2)) { 943 errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\"."); 944 } 945 946 if (!test1.endsWith(test3)) { 947 errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 948 } 949 if (!test1.endsWith(test3, 0, INT32_MAX)) { 950 errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 951 } 952 953 if(!test1.endsWith(test3.getBuffer(), test3.length())) { 954 errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 955 } 956 if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) { 957 errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\"."); 958 } 959 960 if (!test3.startsWith(test4)) { 961 errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\"."); 962 } 963 964 if (test4.startsWith(test3)) { 965 errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\"."); 966 } 967 } 968 969 void 970 UnicodeStringTest::TestFindAndReplace() 971 { 972 UnicodeString test1("One potato, two potato, three potato, four\n"); 973 UnicodeString test2("potato"); 974 UnicodeString test3("MISSISSIPPI"); 975 976 UnicodeString expectedValue; 977 978 test1.findAndReplace(test2, test3); 979 expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n"; 980 if (test1 != expectedValue) 981 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 982 test1.findAndReplace(2, 32, test3, test2); 983 expectedValue = "One potato, two potato, three MISSISSIPPI, four\n"; 984 if (test1 != expectedValue) 985 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\"."); 986 } 987 988 void 989 UnicodeStringTest::TestReverse() 990 { 991 UnicodeString test("backwards words say to used I"); 992 993 test.reverse(); 994 test.reverse(2, 4); 995 test.reverse(7, 2); 996 test.reverse(10, 3); 997 test.reverse(14, 5); 998 test.reverse(20, 9); 999 1000 if (test != "I used to say words backwards") 1001 errln("reverse() failed: Expected \"I used to say words backwards\",\n got \"" 1002 + test + "\""); 1003 1004 test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 1005 test.reverse(); 1006 if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) { 1007 errln("reverse() failed with supplementary characters"); 1008 } 1009 } 1010 1011 void 1012 UnicodeStringTest::TestMiscellaneous() 1013 { 1014 UnicodeString test1("This is a test"); 1015 UnicodeString test2("This is a test"); 1016 UnicodeString test3("Me too!"); 1017 1018 // test getBuffer(minCapacity) and releaseBuffer() 1019 test1=UnicodeString(); // make sure that it starts with its stackBuffer 1020 UChar *p=test1.getBuffer(20); 1021 if(test1.getCapacity()<20) { 1022 errln("UnicodeString::getBuffer(20).getCapacity()<20"); 1023 } 1024 1025 test1.append((UChar)7); // must not be able to modify the string here 1026 test1.setCharAt(3, 7); 1027 test1.reverse(); 1028 if( test1.length()!=0 || 1029 test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff || 1030 test1.getBuffer(10)!=0 || test1.getBuffer()!=0 1031 ) { 1032 errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString"); 1033 } 1034 1035 p[0]=1; 1036 p[1]=2; 1037 p[2]=3; 1038 test1.releaseBuffer(3); 1039 test1.append((UChar)4); 1040 1041 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) { 1042 errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString"); 1043 } 1044 1045 // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect 1046 test1.releaseBuffer(1); 1047 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) { 1048 errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString"); 1049 } 1050 1051 // test getBuffer(const) 1052 const UChar *q=test1.getBuffer(), *r=test1.getBuffer(); 1053 if( test1.length()!=4 || 1054 q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 || 1055 r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4 1056 ) { 1057 errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer"); 1058 } 1059 1060 // test releaseBuffer() with a NUL-terminated buffer 1061 test1.getBuffer(20)[2]=0; 1062 test1.releaseBuffer(); // implicit -1 1063 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) { 1064 errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString"); 1065 } 1066 1067 // test releaseBuffer() with a non-NUL-terminated buffer 1068 p=test1.getBuffer(256); 1069 for(int32_t i=0; i<test1.getCapacity(); ++i) { 1070 p[i]=(UChar)1; // fill the buffer with all non-NUL code units 1071 } 1072 test1.releaseBuffer(); // implicit -1 1073 if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) { 1074 errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString"); 1075 } 1076 1077 // test getTerminatedBuffer() 1078 test1=UnicodeString("This is another test.", ""); 1079 test2=UnicodeString("This is another test.", ""); 1080 q=test1.getTerminatedBuffer(); 1081 if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) { 1082 errln("getTerminatedBuffer()[length]!=0"); 1083 } 1084 1085 const UChar u[]={ 5, 6, 7, 8, 0 }; 1086 test1.setTo(FALSE, u, 3); 1087 q=test1.getTerminatedBuffer(); 1088 if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) { 1089 errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer"); 1090 } 1091 1092 test1.setTo(TRUE, u, -1); 1093 q=test1.getTerminatedBuffer(); 1094 #ifndef U_VALGRIND 1095 // The VALGRIND option always copies the buffer for getTerminatedBuffer(), 1096 // to avoid reading uninitialized memory when checking for the termination. 1097 if(q!=u) { 1098 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer"); 1099 } 1100 #endif 1101 1102 if(test1.length()!=4 || q[3]!=8 || q[4]!=0) { 1103 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer"); 1104 } 1105 1106 test1=UNICODE_STRING("la", 2); 1107 test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1); 1108 if(test1!=UNICODE_STRING("la lila", 7)) { 1109 errln("UnicodeString::append(const UChar *, start, length) failed"); 1110 } 1111 1112 test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX); 1113 if(test1!=UNICODE_STRING("la dudum lila", 13)) { 1114 errln("UnicodeString::insert(start, const UniStr &, start, length) failed"); 1115 } 1116 1117 static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 }; 1118 test1.insert(9, ucs, -1); 1119 if(test1!=UNICODE_STRING("la dudum hm lila", 16)) { 1120 errln("UnicodeString::insert(start, const UChar *, length) failed"); 1121 } 1122 1123 test1.replace(9, 2, (UChar)0x2b); 1124 if(test1!=UNICODE_STRING("la dudum + lila", 15)) { 1125 errln("UnicodeString::replace(start, length, UChar) failed"); 1126 } 1127 1128 if(test1.hasMetaData() || UnicodeString().hasMetaData()) { 1129 errln("UnicodeString::hasMetaData() returns TRUE"); 1130 } 1131 } 1132 1133 void 1134 UnicodeStringTest::TestStackAllocation() 1135 { 1136 UChar testString[] ={ 1137 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 }; 1138 UChar guardWord = 0x4DED; 1139 UnicodeString* test = 0; 1140 1141 test = new UnicodeString(testString); 1142 if (*test != "This is a crazy test.") 1143 errln("Test string failed to initialize properly."); 1144 if (guardWord != 0x04DED) 1145 errln("Test string initialization overwrote guard word!"); 1146 1147 test->insert(8, "only "); 1148 test->remove(15, 6); 1149 if (*test != "This is only a test.") 1150 errln("Manipulation of test string failed to work right."); 1151 if (guardWord != 0x4DED) 1152 errln("Manipulation of test string overwrote guard word!"); 1153 1154 // we have to deinitialize and release the backing store by calling the destructor 1155 // explicitly, since we can't overload operator delete 1156 delete test; 1157 1158 UChar workingBuffer[] = { 1159 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20, 1160 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20, 1161 0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1162 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1163 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 1164 UChar guardWord2 = 0x4DED; 1165 1166 test = new UnicodeString(workingBuffer, 35, 100); 1167 if (*test != "Now is the time for all men to come") 1168 errln("Stack-allocated backing store failed to initialize correctly."); 1169 if (guardWord2 != 0x4DED) 1170 errln("Stack-allocated backing store overwrote guard word!"); 1171 1172 test->insert(24, "good "); 1173 if (*test != "Now is the time for all good men to come") 1174 errln("insert() on stack-allocated UnicodeString didn't work right"); 1175 if (guardWord2 != 0x4DED) 1176 errln("insert() on stack-allocated UnicodeString overwrote guard word!"); 1177 1178 if (workingBuffer[24] != 0x67) 1179 errln("insert() on stack-allocated UnicodeString didn't affect backing store"); 1180 1181 *test += " to the aid of their country."; 1182 if (*test != "Now is the time for all good men to come to the aid of their country.") 1183 errln("Stack-allocated UnicodeString overflow didn't work"); 1184 if (guardWord2 != 0x4DED) 1185 errln("Stack-allocated UnicodeString overflow overwrote guard word!"); 1186 1187 *test = "ha!"; 1188 if (*test != "ha!") 1189 errln("Assignment to stack-allocated UnicodeString didn't work"); 1190 if (workingBuffer[0] != 0x4e) 1191 errln("Change to UnicodeString after overflow are still affecting original buffer"); 1192 if (guardWord2 != 0x4DED) 1193 errln("Change to UnicodeString after overflow overwrote guard word!"); 1194 1195 // test read-only aliasing with setTo() 1196 workingBuffer[0] = 0x20ac; 1197 workingBuffer[1] = 0x125; 1198 workingBuffer[2] = 0; 1199 test->setTo(TRUE, workingBuffer, 2); 1200 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) { 1201 errln("UnicodeString.setTo(readonly alias) does not alias correctly"); 1202 } 1203 1204 UnicodeString *c=(UnicodeString *)test->clone(); 1205 1206 workingBuffer[1] = 0x109; 1207 if(test->charAt(1) != 0x109) { 1208 errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer"); 1209 } 1210 1211 if(c->length() != 2 || c->charAt(1) != 0x125) { 1212 errln("clone(alias) did not copy the buffer"); 1213 } 1214 delete c; 1215 1216 test->setTo(TRUE, workingBuffer, -1); 1217 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) { 1218 errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly"); 1219 } 1220 1221 test->setTo(FALSE, workingBuffer, -1); 1222 if(!test->isBogus()) { 1223 errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()"); 1224 } 1225 1226 delete test; 1227 1228 test=new UnicodeString(); 1229 UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000}; 1230 test->setTo(buffer, 4, 10); 1231 if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 || 1232 test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){ 1233 errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test)); 1234 } 1235 delete test; 1236 1237 1238 // test the UChar32 constructor 1239 UnicodeString c32Test((UChar32)0x10ff2a); 1240 if( c32Test.length() != UTF_CHAR_LENGTH(0x10ff2a) || 1241 c32Test.char32At(c32Test.length() - 1) != 0x10ff2a 1242 ) { 1243 errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler"); 1244 } 1245 1246 // test the (new) capacity constructor 1247 UnicodeString capTest(5, (UChar32)0x2a, 5); 1248 if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x2a) || 1249 capTest.char32At(0) != 0x2a || 1250 capTest.char32At(4) != 0x2a 1251 ) { 1252 errln("The UnicodeString capacity constructor does not work with an ASCII filler"); 1253 } 1254 1255 capTest = UnicodeString(5, (UChar32)0x10ff2a, 5); 1256 if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x10ff2a) || 1257 capTest.char32At(0) != 0x10ff2a || 1258 capTest.char32At(4) != 0x10ff2a 1259 ) { 1260 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler"); 1261 } 1262 1263 capTest = UnicodeString(5, (UChar32)0, 0); 1264 if(capTest.length() != 0) { 1265 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler"); 1266 } 1267 } 1268 1269 /** 1270 * Test the unescape() function. 1271 */ 1272 void UnicodeStringTest::TestUnescape(void) { 1273 UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV); 1274 UnicodeString OUT("abc"); 1275 OUT.append((UChar)0x4567); 1276 OUT.append(" "); 1277 OUT.append((UChar)0xA); 1278 OUT.append((UChar)0xD); 1279 OUT.append(" "); 1280 OUT.append((UChar32)0x00101234); 1281 OUT.append("xyz"); 1282 OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b); 1283 UnicodeString result = IN.unescape(); 1284 if (result != OUT) { 1285 errln("FAIL: " + prettify(IN) + ".unescape() -> " + 1286 prettify(result) + ", expected " + 1287 prettify(OUT)); 1288 } 1289 1290 // test that an empty string is returned in case of an error 1291 if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) { 1292 errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string"); 1293 } 1294 } 1295 1296 /* test code point counting functions --------------------------------------- */ 1297 1298 /* reference implementation of UnicodeString::hasMoreChar32Than() */ 1299 static int32_t 1300 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) { 1301 int32_t count=s.countChar32(start, length); 1302 return count>number; 1303 } 1304 1305 /* compare the real function against the reference */ 1306 void 1307 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) { 1308 if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) { 1309 errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n", 1310 start, length, number, s.hasMoreChar32Than(start, length, number)); 1311 } 1312 } 1313 1314 void 1315 UnicodeStringTest::TestCountChar32(void) { 1316 { 1317 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape(); 1318 1319 // test countChar32() 1320 // note that this also calls and tests u_countChar32(length>=0) 1321 if( 1322 s.countChar32()!=4 || 1323 s.countChar32(1)!=4 || 1324 s.countChar32(2)!=3 || 1325 s.countChar32(2, 3)!=2 || 1326 s.countChar32(2, 0)!=0 1327 ) { 1328 errln("UnicodeString::countChar32() failed"); 1329 } 1330 1331 // NUL-terminate the string buffer and test u_countChar32(length=-1) 1332 const UChar *buffer=s.getTerminatedBuffer(); 1333 if( 1334 u_countChar32(buffer, -1)!=4 || 1335 u_countChar32(buffer+1, -1)!=4 || 1336 u_countChar32(buffer+2, -1)!=3 || 1337 u_countChar32(buffer+3, -1)!=3 || 1338 u_countChar32(buffer+4, -1)!=2 || 1339 u_countChar32(buffer+5, -1)!=1 || 1340 u_countChar32(buffer+6, -1)!=0 1341 ) { 1342 errln("u_countChar32(length=-1) failed"); 1343 } 1344 1345 // test u_countChar32() with bad input 1346 if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) { 1347 errln("u_countChar32(bad input) failed (returned non-zero counts)"); 1348 } 1349 } 1350 1351 /* test data and variables for hasMoreChar32Than() */ 1352 static const UChar str[]={ 1353 0x61, 0x62, 0xd800, 0xdc00, 1354 0xd801, 0xdc01, 0x63, 0xd802, 1355 0x64, 0xdc03, 0x65, 0x66, 1356 0xd804, 0xdc04, 0xd805, 0xdc05, 1357 0x67 1358 }; 1359 UnicodeString string(str, LENGTHOF(str)); 1360 int32_t start, length, number; 1361 1362 /* test hasMoreChar32Than() */ 1363 for(length=string.length(); length>=0; --length) { 1364 for(start=0; start<=length; ++start) { 1365 for(number=-1; number<=((length-start)+2); ++number) { 1366 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number); 1367 } 1368 } 1369 } 1370 1371 /* test hasMoreChar32Than() with pinning */ 1372 for(start=-1; start<=string.length()+1; ++start) { 1373 for(number=-1; number<=((string.length()-start)+2); ++number) { 1374 _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number); 1375 } 1376 } 1377 1378 /* test hasMoreChar32Than() with a bogus string */ 1379 string.setToBogus(); 1380 for(length=-1; length<=1; ++length) { 1381 for(start=-1; start<=length; ++start) { 1382 for(number=-1; number<=((length-start)+2); ++number) { 1383 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number); 1384 } 1385 } 1386 } 1387 } 1388 1389 void 1390 UnicodeStringTest::TestBogus() { 1391 UnicodeString test1("This is a test"); 1392 UnicodeString test2("This is a test"); 1393 UnicodeString test3("Me too!"); 1394 1395 // test isBogus() and setToBogus() 1396 if (test1.isBogus() || test2.isBogus() || test3.isBogus()) { 1397 errln("A string returned TRUE for isBogus()!"); 1398 } 1399 1400 // NULL pointers are treated like empty strings 1401 // use other illegal arguments to make a bogus string 1402 test3.setTo(FALSE, test1.getBuffer(), -2); 1403 if(!test3.isBogus()) { 1404 errln("A bogus string returned FALSE for isBogus()!"); 1405 } 1406 if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) { 1407 errln("hashCode() failed"); 1408 } 1409 if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) { 1410 errln("bogus.getBuffer()!=0"); 1411 } 1412 if (test1.indexOf(test3) != -1) { 1413 errln("bogus.indexOf() != -1"); 1414 } 1415 if (test1.lastIndexOf(test3) != -1) { 1416 errln("bogus.lastIndexOf() != -1"); 1417 } 1418 if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) { 1419 errln("caseCompare() doesn't work with bogus strings"); 1420 } 1421 if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) { 1422 errln("compareCodePointOrder() doesn't work with bogus strings"); 1423 } 1424 1425 // verify that non-assignment modifications fail and do not revive a bogus string 1426 test3.setToBogus(); 1427 test3.append((UChar)0x61); 1428 if(!test3.isBogus() || test3.getBuffer()!=0) { 1429 errln("bogus.append('a') worked but must not"); 1430 } 1431 1432 test3.setToBogus(); 1433 test3.findAndReplace(UnicodeString((UChar)0x61), test2); 1434 if(!test3.isBogus() || test3.getBuffer()!=0) { 1435 errln("bogus.findAndReplace() worked but must not"); 1436 } 1437 1438 test3.setToBogus(); 1439 test3.trim(); 1440 if(!test3.isBogus() || test3.getBuffer()!=0) { 1441 errln("bogus.trim() revived bogus but must not"); 1442 } 1443 1444 test3.setToBogus(); 1445 test3.remove(1); 1446 if(!test3.isBogus() || test3.getBuffer()!=0) { 1447 errln("bogus.remove(1) revived bogus but must not"); 1448 } 1449 1450 test3.setToBogus(); 1451 if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) { 1452 errln("bogus.setCharAt(0, 'b') worked but must not"); 1453 } 1454 1455 test3.setToBogus(); 1456 if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) { 1457 errln("bogus.truncate(1) revived bogus but must not"); 1458 } 1459 1460 // verify that assignments revive a bogus string 1461 test3.setToBogus(); 1462 if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) { 1463 errln("bogus.operator=() failed"); 1464 } 1465 1466 test3.setToBogus(); 1467 if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) { 1468 errln("bogus.fastCopyFrom() failed"); 1469 } 1470 1471 test3.setToBogus(); 1472 if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) { 1473 errln("bogus.setTo(UniStr) failed"); 1474 } 1475 1476 test3.setToBogus(); 1477 if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) { 1478 errln("bogus.setTo(UniStr, 0) failed"); 1479 } 1480 1481 test3.setToBogus(); 1482 if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) { 1483 errln("bogus.setTo(UniStr, 0, len) failed"); 1484 } 1485 1486 test3.setToBogus(); 1487 if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) { 1488 errln("bogus.setTo(const UChar *, len) failed"); 1489 } 1490 1491 test3.setToBogus(); 1492 if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) { 1493 errln("bogus.setTo(UChar) failed"); 1494 } 1495 1496 test3.setToBogus(); 1497 if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) { 1498 errln("bogus.setTo(UChar32) failed"); 1499 } 1500 1501 test3.setToBogus(); 1502 if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) { 1503 errln("bogus.setTo(readonly alias) failed"); 1504 } 1505 1506 // writable alias to another string's buffer: very bad idea, just convenient for this test 1507 test3.setToBogus(); 1508 if(!test3.isBogus() || test3.setTo((UChar *)test1.getBuffer(), test1.length(), test1.getCapacity()).isBogus() || test3!=test1) { 1509 errln("bogus.setTo(writable alias) failed"); 1510 } 1511 1512 // verify simple, documented ways to turn a bogus string into an empty one 1513 test3.setToBogus(); 1514 if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) { 1515 errln("bogus.operator=(UnicodeString()) failed"); 1516 } 1517 1518 test3.setToBogus(); 1519 if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) { 1520 errln("bogus.setTo(UnicodeString()) failed"); 1521 } 1522 1523 test3.setToBogus(); 1524 if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) { 1525 errln("bogus.remove() failed"); 1526 } 1527 1528 test3.setToBogus(); 1529 if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) { 1530 errln("bogus.remove(0, INT32_MAX) failed"); 1531 } 1532 1533 test3.setToBogus(); 1534 if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) { 1535 errln("bogus.truncate(0) failed"); 1536 } 1537 1538 test3.setToBogus(); 1539 if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) { 1540 errln("bogus.setTo((UChar32)-1) failed"); 1541 } 1542 1543 static const UChar nul=0; 1544 1545 test3.setToBogus(); 1546 if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) { 1547 errln("bogus.setTo(&nul, 0) failed"); 1548 } 1549 1550 test3.setToBogus(); 1551 if(!test3.isBogus() || test3.getBuffer()!=0) { 1552 errln("setToBogus() failed to make a string bogus"); 1553 } 1554 1555 test3.setToBogus(); 1556 if(test1.isBogus() || !(test1=test3).isBogus()) { 1557 errln("normal=bogus failed to make the left string bogus"); 1558 } 1559 1560 // test that NULL primitive input string values are treated like 1561 // empty strings, not errors (bogus) 1562 test2.setTo((UChar32)0x10005); 1563 if(test2.insert(1, NULL, 1).length()!=2) { 1564 errln("UniStr.insert(...NULL...) should not modify the string but does"); 1565 } 1566 1567 UErrorCode errorCode=U_ZERO_ERROR; 1568 UnicodeString 1569 test4((const UChar *)NULL), 1570 test5(TRUE, (const UChar *)NULL, 1), 1571 test6((UChar *)NULL, 5, 5), 1572 test7((const char *)NULL, 3, NULL, errorCode); 1573 if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) { 1574 errln("a constructor set to bogus for a NULL input string, should be empty"); 1575 } 1576 1577 test4.setTo(NULL, 3); 1578 test5.setTo(TRUE, (const UChar *)NULL, 1); 1579 test6.setTo((UChar *)NULL, 5, 5); 1580 if(test4.isBogus() || test5.isBogus() || test6.isBogus()) { 1581 errln("a setTo() set to bogus for a NULL input string, should be empty"); 1582 } 1583 1584 // test that bogus==bogus<any 1585 if(test1!=test3 || test1.compare(test3)!=0) { 1586 errln("bogus==bogus failed"); 1587 } 1588 1589 test2.remove(); 1590 if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) { 1591 errln("bogus<empty failed"); 1592 } 1593 } 1594 1595 // StringEnumeration ------------------------------------------------------- *** 1596 // most of StringEnumeration is tested elsewhere 1597 // this test improves code coverage 1598 1599 static const char *const 1600 testEnumStrings[]={ 1601 "a", 1602 "b", 1603 "c", 1604 "this is a long string which helps us test some buffer limits", 1605 "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee" 1606 }; 1607 1608 class TestEnumeration : public StringEnumeration { 1609 public: 1610 TestEnumeration() : i(0) {} 1611 1612 virtual int32_t count(UErrorCode& /*status*/) const { 1613 return LENGTHOF(testEnumStrings); 1614 } 1615 1616 virtual const UnicodeString *snext(UErrorCode &status) { 1617 if(U_SUCCESS(status) && i<LENGTHOF(testEnumStrings)) { 1618 unistr=UnicodeString(testEnumStrings[i++], ""); 1619 return &unistr; 1620 } 1621 1622 return NULL; 1623 } 1624 1625 virtual void reset(UErrorCode& /*status*/) { 1626 i=0; 1627 } 1628 1629 static inline UClassID getStaticClassID() { 1630 return (UClassID)&fgClassID; 1631 } 1632 virtual UClassID getDynamicClassID() const { 1633 return getStaticClassID(); 1634 } 1635 1636 private: 1637 static const char fgClassID; 1638 1639 int32_t i, length; 1640 }; 1641 1642 const char TestEnumeration::fgClassID=0; 1643 1644 void 1645 UnicodeStringTest::TestStringEnumeration() { 1646 UnicodeString s; 1647 TestEnumeration ten; 1648 int32_t i, length; 1649 UErrorCode status; 1650 1651 const UChar *pu; 1652 const char *pc; 1653 1654 // test the next() default implementation and ensureCharsCapacity() 1655 for(i=0; i<LENGTHOF(testEnumStrings); ++i) { 1656 status=U_ZERO_ERROR; 1657 pc=ten.next(&length, status); 1658 s=UnicodeString(testEnumStrings[i], ""); 1659 if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) { 1660 errln("StringEnumeration.next(%d) failed", i); 1661 } 1662 } 1663 status=U_ZERO_ERROR; 1664 if(ten.next(&length, status)!=NULL) { 1665 errln("StringEnumeration.next(done)!=NULL"); 1666 } 1667 1668 // test the unext() default implementation 1669 ten.reset(status); 1670 for(i=0; i<LENGTHOF(testEnumStrings); ++i) { 1671 status=U_ZERO_ERROR; 1672 pu=ten.unext(&length, status); 1673 s=UnicodeString(testEnumStrings[i], ""); 1674 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) { 1675 errln("StringEnumeration.unext(%d) failed", i); 1676 } 1677 } 1678 status=U_ZERO_ERROR; 1679 if(ten.unext(&length, status)!=NULL) { 1680 errln("StringEnumeration.unext(done)!=NULL"); 1681 } 1682 1683 // test that the default clone() implementation works, and returns NULL 1684 if(ten.clone()!=NULL) { 1685 errln("StringEnumeration.clone()!=NULL"); 1686 } 1687 1688 // test that uenum_openFromStringEnumeration() works 1689 // Need a heap allocated string enumeration because it is adopted by the UEnumeration. 1690 StringEnumeration *newTen = new TestEnumeration; 1691 status=U_ZERO_ERROR; 1692 UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status); 1693 if (uten==NULL || U_FAILURE(status)) { 1694 errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status)); 1695 return; 1696 } 1697 1698 // test uenum_next() 1699 for(i=0; i<LENGTHOF(testEnumStrings); ++i) { 1700 status=U_ZERO_ERROR; 1701 pc=uenum_next(uten, &length, &status); 1702 if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) { 1703 errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i); 1704 } 1705 } 1706 status=U_ZERO_ERROR; 1707 if(uenum_next(uten, &length, &status)!=NULL) { 1708 errln("File %s, line %d, uenum_next(done)!=NULL"); 1709 } 1710 1711 // test the uenum_unext() 1712 uenum_reset(uten, &status); 1713 for(i=0; i<LENGTHOF(testEnumStrings); ++i) { 1714 status=U_ZERO_ERROR; 1715 pu=uenum_unext(uten, &length, &status); 1716 s=UnicodeString(testEnumStrings[i], ""); 1717 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) { 1718 errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i); 1719 } 1720 } 1721 status=U_ZERO_ERROR; 1722 if(uenum_unext(uten, &length, &status)!=NULL) { 1723 errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__); 1724 } 1725 1726 uenum_close(uten); 1727 } 1728 1729 void 1730 UnicodeStringTest::TestCharString() { 1731 static const char originalCStr[] = 1732 "This is a large string that is meant to over flow the internal buffer of CharString. At the time of writing this test, the internal buffer is 128 bytes."; 1733 CharString chStr(originalCStr); 1734 if (strcmp(originalCStr, chStr) != 0) { 1735 errln("CharString doesn't work with large strings."); 1736 } 1737 } 1738 1739 /* 1740 * Namespace test, to make sure that macros like UNICODE_STRING include the 1741 * namespace qualifier. 1742 * 1743 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity. 1744 */ 1745 #if U_HAVE_NAMESPACE 1746 namespace bogus { 1747 class UnicodeString { 1748 public: 1749 enum EInvariant { kInvariant }; 1750 UnicodeString() : i(1) {} 1751 UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {} 1752 UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/ 1753 ) : i(length) {} 1754 private: 1755 int32_t i; 1756 }; 1757 } 1758 #endif 1759 1760 void 1761 UnicodeStringTest::TestNameSpace() { 1762 #if U_HAVE_NAMESPACE 1763 // Provoke name collision unless the UnicodeString macros properly 1764 // qualify the icu::UnicodeString class. 1765 using namespace bogus; 1766 1767 // Use all UnicodeString macros from unistr.h. 1768 icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV); 1769 icu::UnicodeString s2=UNICODE_STRING("def", 3); 1770 icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi"); 1771 1772 // Make sure the compiler does not optimize away instantiation of s1, s2, s3. 1773 icu::UnicodeString s4=s1+s2+s3; 1774 if(s4.length()!=9) { 1775 errln("Something wrong with UnicodeString::operator+()."); 1776 } 1777 #endif 1778 } 1779 1780 void 1781 UnicodeStringTest::TestUTF32() { 1782 // Input string length US_STACKBUF_SIZE to cause overflow of the 1783 // initially chosen fStackBuffer due to supplementary characters. 1784 static const UChar32 utf32[] = { 1785 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 1786 0x10000, 0x20000, 0xe0000, 0x10ffff 1787 }; 1788 static const UChar expected_utf16[] = { 1789 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 1790 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff 1791 }; 1792 UnicodeString from32 = UnicodeString::fromUTF32(utf32, LENGTHOF(utf32)); 1793 UnicodeString expected(FALSE, expected_utf16, LENGTHOF(expected_utf16)); 1794 if(from32 != expected) { 1795 errln("UnicodeString::fromUTF32() did not create the expected string."); 1796 } 1797 1798 static const UChar utf16[] = { 1799 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff 1800 }; 1801 static const UChar32 expected_utf32[] = { 1802 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff 1803 }; 1804 UChar32 result32[16]; 1805 UErrorCode errorCode = U_ZERO_ERROR; 1806 int32_t length32 = 1807 UnicodeString(FALSE, utf16, LENGTHOF(utf16)). 1808 toUTF32(result32, LENGTHOF(result32), errorCode); 1809 if( length32 != LENGTHOF(expected_utf32) || 1810 0 != uprv_memcmp(result32, expected_utf32, length32*4) || 1811 result32[length32] != 0 1812 ) { 1813 errln("UnicodeString::toUTF32() did not create the expected string."); 1814 } 1815 } 1816 1817 void 1818 UnicodeStringTest::TestUTF8() { 1819 static const uint8_t utf8[] = { 1820 // Code points: 1821 // 0x41, 0xd900, 1822 // 0x61, 0xdc00, 1823 // 0x110000, 0x5a, 1824 // 0x50000, 0x7a, 1825 // 0x10000, 0x20000, 1826 // 0xe0000, 0x10ffff 1827 0x41, 0xed, 0xa4, 0x80, 1828 0x61, 0xed, 0xb0, 0x80, 1829 0xf4, 0x90, 0x80, 0x80, 0x5a, 1830 0xf1, 0x90, 0x80, 0x80, 0x7a, 1831 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80, 1832 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf 1833 }; 1834 static const UChar expected_utf16[] = { 1835 0x41, 0xfffd, 1836 0x61, 0xfffd, 1837 0xfffd, 0x5a, 1838 0xd900, 0xdc00, 0x7a, 1839 0xd800, 0xdc00, 0xd840, 0xdc00, 1840 0xdb40, 0xdc00, 0xdbff, 0xdfff 1841 }; 1842 UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8))); 1843 UnicodeString expected(FALSE, expected_utf16, LENGTHOF(expected_utf16)); 1844 1845 if(from8 != expected) { 1846 errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string."); 1847 } 1848 #if U_HAVE_STD_STRING 1849 U_STD_NSQ string utf8_string((const char *)utf8, sizeof(utf8)); 1850 UnicodeString from8b = UnicodeString::fromUTF8(utf8_string); 1851 if(from8b != expected) { 1852 errln("UnicodeString::fromUTF8(std::string) did not create the expected string."); 1853 } 1854 #endif 1855 1856 static const UChar utf16[] = { 1857 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff 1858 }; 1859 static const uint8_t expected_utf8[] = { 1860 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a, 1861 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf 1862 }; 1863 UnicodeString us(FALSE, utf16, LENGTHOF(utf16)); 1864 1865 char buffer[64]; 1866 CheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer)); 1867 us.toUTF8(sink); 1868 if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) || 1869 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8)) 1870 ) { 1871 errln("UnicodeString::toUTF8() did not create the expected string."); 1872 } 1873 #if U_HAVE_STD_STRING 1874 // Initial contents for testing that toUTF8String() appends. 1875 U_STD_NSQ string result8 = "-->"; 1876 U_STD_NSQ string expected8 = "-->" + U_STD_NSQ string((const char *)expected_utf8, sizeof(expected_utf8)); 1877 // Use the return value just for testing. 1878 U_STD_NSQ string &result8r = us.toUTF8String(result8); 1879 if(result8r != expected8 || &result8r != &result8) { 1880 errln("UnicodeString::toUTF8String() did not create the expected string."); 1881 } 1882 #endif 1883 } 1884