1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2002-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: custrtst.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2002oct09 14 * created by: Markus W. Scherer 15 * 16 * Tests of ustring.h Unicode string API functions. 17 */ 18 19 #include "unicode/ustring.h" 20 #include "unicode/ucnv.h" 21 #include "unicode/uiter.h" 22 #include "cintltst.h" 23 #include <string.h> 24 25 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 26 27 /* get the sign of an integer */ 28 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1) 29 30 /* test setup --------------------------------------------------------------- */ 31 32 static void setUpDataTable(void); 33 static void TestStringCopy(void); 34 static void TestStringFunctions(void); 35 static void TestStringSearching(void); 36 static void TestSurrogateSearching(void); 37 static void TestUnescape(void); 38 static void TestCountChar32(void); 39 static void TestUCharIterator(void); 40 static void TestUNormIterator(void); 41 static void TestBadUNormIterator(void); 42 43 void addUStringTest(TestNode** root); 44 45 void addUStringTest(TestNode** root) 46 { 47 addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy"); 48 addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions"); 49 addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching"); 50 addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching"); 51 addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape"); 52 addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32"); 53 addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator"); 54 addTest(root, &TestUNormIterator, "tsutil/custrtst/TestUNormIterator"); 55 addTest(root, &TestBadUNormIterator, "tsutil/custrtst/TestBadUNormIterator"); 56 } 57 58 /* test data for TestStringFunctions ---------------------------------------- */ 59 60 UChar*** dataTable = NULL; 61 62 static const char* raw[3][4] = { 63 64 /* First String */ 65 { "English_", "French_", "Croatian_", "English_"}, 66 /* Second String */ 67 { "United States", "France", "Croatia", "Unites States"}, 68 69 /* Concatenated string */ 70 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"} 71 }; 72 73 static void setUpDataTable() 74 { 75 int32_t i,j; 76 if(dataTable == NULL) { 77 dataTable = (UChar***)calloc(sizeof(UChar**),3); 78 79 for (i = 0; i < 3; i++) { 80 dataTable[i] = (UChar**)calloc(sizeof(UChar*),4); 81 for (j = 0; j < 4; j++){ 82 dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1)); 83 u_uastrcpy(dataTable[i][j],raw[i][j]); 84 } 85 } 86 } 87 } 88 89 static void cleanUpDataTable() 90 { 91 int32_t i,j; 92 if(dataTable != NULL) { 93 for (i=0; i<3; i++) { 94 for(j = 0; j<4; j++) { 95 free(dataTable[i][j]); 96 } 97 free(dataTable[i]); 98 } 99 free(dataTable); 100 } 101 dataTable = NULL; 102 } 103 104 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */ 105 static void TestStringFunctions() 106 { 107 int32_t i,j,k; 108 UChar temp[512]; 109 UChar nullTemp[512]; 110 char test[512]; 111 char tempOut[512]; 112 113 setUpDataTable(); 114 115 log_verbose("Testing u_strlen()\n"); 116 if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2])) 117 log_err("There is an error in u_strlen()"); 118 119 log_verbose("Testing u_memcpy() and u_memcmp()\n"); 120 121 for(i=0;i<3;++i) 122 { 123 for(j=0;j<4;++j) 124 { 125 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j])); 126 temp[0] = 0; 127 temp[7] = 0xA4; /* Mark the end */ 128 u_memcpy(temp,dataTable[i][j], 7); 129 130 if(temp[7] != 0xA4) 131 log_err("an error occured in u_memcpy()\n"); 132 if(u_memcmp(temp, dataTable[i][j], 7)!=0) 133 log_err("an error occured in u_memcpy() or u_memcmp()\n"); 134 } 135 } 136 if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0) 137 log_err("an error occured in u_memcmp()\n"); 138 139 log_verbose("Testing u_memset()\n"); 140 nullTemp[0] = 0; 141 nullTemp[7] = 0; 142 u_memset(nullTemp, 0xa4, 7); 143 for (i = 0; i < 7; i++) { 144 if(nullTemp[i] != 0xa4) { 145 log_err("an error occured in u_memset()\n"); 146 } 147 } 148 if(nullTemp[7] != 0) { 149 log_err("u_memset() went too far\n"); 150 } 151 152 u_memset(nullTemp, 0, 7); 153 nullTemp[7] = 0xa4; 154 temp[7] = 0; 155 u_memcpy(temp,nullTemp, 7); 156 if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0) 157 log_err("an error occured in u_memcpy() or u_memcmp()\n"); 158 159 160 log_verbose("Testing u_memmove()\n"); 161 for (i = 0; i < 7; i++) { 162 temp[i] = (UChar)i; 163 } 164 u_memmove(temp + 1, temp, 7); 165 if(temp[0] != 0) { 166 log_err("an error occured in u_memmove()\n"); 167 } 168 for (i = 1; i <= 7; i++) { 169 if(temp[i] != (i - 1)) { 170 log_err("an error occured in u_memmove()\n"); 171 } 172 } 173 174 log_verbose("Testing u_strcpy() and u_strcmp()\n"); 175 176 for(i=0;i<3;++i) 177 { 178 for(j=0;j<4;++j) 179 { 180 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j])); 181 temp[0] = 0; 182 u_strcpy(temp,dataTable[i][j]); 183 184 if(u_strcmp(temp,dataTable[i][j])!=0) 185 log_err("something threw an error in u_strcpy() or u_strcmp()\n"); 186 } 187 } 188 if(u_strcmp(dataTable[0][0], dataTable[1][1])==0) 189 log_err("an error occured in u_memcmp()\n"); 190 191 log_verbose("testing u_strcat()\n"); 192 i=0; 193 for(j=0; j<2;++j) 194 { 195 u_uastrcpy(temp, ""); 196 u_strcpy(temp,dataTable[i][j]); 197 u_strcat(temp,dataTable[i+1][j]); 198 if(u_strcmp(temp,dataTable[i+2][j])!=0) 199 log_err("something threw an error in u_strcat()\n"); 200 201 } 202 log_verbose("Testing u_strncmp()\n"); 203 for(i=0,j=0;j<4; ++j) 204 { 205 k=u_strlen(dataTable[i][j]); 206 if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0) 207 log_err("Something threw an error in u_strncmp\n"); 208 } 209 if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0) 210 log_err("an error occured in u_memcmp()\n"); 211 212 213 log_verbose("Testing u_strncat\n"); 214 for(i=0,j=0;j<4; ++j) 215 { 216 k=u_strlen(dataTable[i][j]); 217 218 u_uastrcpy(temp,""); 219 220 if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0) 221 log_err("something threw an error in u_strncat or u_uastrcpy()\n"); 222 223 } 224 225 log_verbose("Testing u_strncpy() and u_uastrcpy()\n"); 226 for(i=2,j=0;j<4; ++j) 227 { 228 k=u_strlen(dataTable[i][j]); 229 u_strncpy(temp, dataTable[i][j],k); 230 temp[k] = 0xa4; 231 232 if(u_strncmp(temp, dataTable[i][j],k)!=0) 233 log_err("something threw an error in u_strncpy()\n"); 234 235 if(temp[k] != 0xa4) 236 log_err("something threw an error in u_strncpy()\n"); 237 238 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); 239 u_uastrncpy(temp, raw[i][j], k-1); 240 if(u_strncmp(temp, dataTable[i][j],k-1)!=0) 241 log_err("something threw an error in u_uastrncpy(k-1)\n"); 242 243 if(temp[k-1] != 0x3F) 244 log_err("something threw an error in u_uastrncpy(k-1)\n"); 245 246 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); 247 u_uastrncpy(temp, raw[i][j], k+1); 248 if(u_strcmp(temp, dataTable[i][j])!=0) 249 log_err("something threw an error in u_uastrncpy(k+1)\n"); 250 251 if(temp[k] != 0) 252 log_err("something threw an error in u_uastrncpy(k+1)\n"); 253 254 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); 255 u_uastrncpy(temp, raw[i][j], k); 256 if(u_strncmp(temp, dataTable[i][j], k)!=0) 257 log_err("something threw an error in u_uastrncpy(k)\n"); 258 259 if(temp[k] != 0x3F) 260 log_err("something threw an error in u_uastrncpy(k)\n"); 261 } 262 263 log_verbose("Testing u_strchr() and u_memchr()\n"); 264 265 for(i=2,j=0;j<4;j++) 266 { 267 UChar saveVal = dataTable[i][j][0]; 268 UChar *findPtr = u_strchr(dataTable[i][j], 0x005F); 269 int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1); 270 271 log_verbose("%s ", u_austrcpy(tempOut, findPtr)); 272 273 if (findPtr == NULL || *findPtr != 0x005F) { 274 log_err("u_strchr can't find '_' in the string\n"); 275 } 276 277 findPtr = u_strchr32(dataTable[i][j], 0x005F); 278 if (findPtr == NULL || *findPtr != 0x005F) { 279 log_err("u_strchr32 can't find '_' in the string\n"); 280 } 281 282 findPtr = u_strchr(dataTable[i][j], 0); 283 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 284 log_err("u_strchr can't find NULL in the string\n"); 285 } 286 287 findPtr = u_strchr32(dataTable[i][j], 0); 288 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 289 log_err("u_strchr32 can't find NULL in the string\n"); 290 } 291 292 findPtr = u_memchr(dataTable[i][j], 0, dataSize); 293 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 294 log_err("u_memchr can't find NULL in the string\n"); 295 } 296 297 findPtr = u_memchr32(dataTable[i][j], 0, dataSize); 298 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 299 log_err("u_memchr32 can't find NULL in the string\n"); 300 } 301 302 dataTable[i][j][0] = 0; 303 /* Make sure we skip over the NULL termination */ 304 findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize); 305 if (findPtr == NULL || *findPtr != 0x005F) { 306 log_err("u_memchr can't find '_' in the string\n"); 307 } 308 309 findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize); 310 if (findPtr == NULL || *findPtr != 0x005F) { 311 log_err("u_memchr32 can't find '_' in the string\n"); 312 } 313 findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize); 314 if (findPtr != NULL) { 315 log_err("Should have found NULL when the character is not there.\n"); 316 } 317 dataTable[i][j][0] = saveVal; /* Put it back for the other tests */ 318 } 319 320 /* 321 * test that u_strchr32() 322 * does not find surrogate code points when they are part of matched pairs 323 * (= part of supplementary code points) 324 * Jitterbug 1542 325 */ 326 { 327 static const UChar s[]={ 328 /* 0 1 2 3 4 5 6 7 8 9 */ 329 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0 330 }; 331 332 if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) { 333 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n"); 334 } 335 if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) { 336 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n"); 337 } 338 } 339 340 log_verbose("Testing u_austrcpy()"); 341 u_austrcpy(test,dataTable[0][0]); 342 if(strcmp(test,raw[0][0])!=0) 343 log_err("There is an error in u_austrcpy()"); 344 345 346 log_verbose("Testing u_strtok_r()"); 347 { 348 const char tokString[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n"; 349 const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"}; 350 UChar delimBuf[sizeof(test)]; 351 UChar currTokenBuf[sizeof(tokString)]; 352 UChar *state; 353 uint32_t currToken = 0; 354 UChar *ptr; 355 356 u_uastrcpy(temp, tokString); 357 u_uastrcpy(delimBuf, " "); 358 359 ptr = u_strtok_r(temp, delimBuf, &state); 360 u_uastrcpy(delimBuf, " ,"); 361 while (ptr != NULL) { 362 u_uastrcpy(currTokenBuf, tokens[currToken]); 363 if (u_strcmp(ptr, currTokenBuf) != 0) { 364 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]); 365 } 366 ptr = u_strtok_r(NULL, delimBuf, &state); 367 currToken++; 368 } 369 370 if (currToken != sizeof(tokens)/sizeof(tokens[0])) { 371 log_err("Didn't get correct number of tokens\n"); 372 } 373 state = delimBuf; /* Give it an "invalid" saveState */ 374 u_uastrcpy(currTokenBuf, ""); 375 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) { 376 log_err("Didn't get NULL for empty string\n"); 377 } 378 if (state != NULL) { 379 log_err("State should be NULL for empty string\n"); 380 } 381 state = delimBuf; /* Give it an "invalid" saveState */ 382 u_uastrcpy(currTokenBuf, ", ,"); 383 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) { 384 log_err("Didn't get NULL for a string of delimiters\n"); 385 } 386 if (state != NULL) { 387 log_err("State should be NULL for a string of delimiters\n"); 388 } 389 390 state = delimBuf; /* Give it an "invalid" saveState */ 391 u_uastrcpy(currTokenBuf, "q, ,"); 392 if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) { 393 log_err("Got NULL for a string that does not begin with delimiters\n"); 394 } 395 if (u_strtok_r(NULL, delimBuf, &state) != NULL) { 396 log_err("Didn't get NULL for a string that ends in delimiters\n"); 397 } 398 if (state != NULL) { 399 log_err("State should be NULL for empty string\n"); 400 } 401 402 state = delimBuf; /* Give it an "invalid" saveState */ 403 u_uastrcpy(currTokenBuf, tokString); 404 u_uastrcpy(temp, tokString); 405 u_uastrcpy(delimBuf, "q"); /* Give it a delimiter that it can't find. */ 406 ptr = u_strtok_r(currTokenBuf, delimBuf, &state); 407 if (ptr == NULL || u_strcmp(ptr, temp) != 0) { 408 log_err("Should have recieved the same string when there are no delimiters\n"); 409 } 410 if (u_strtok_r(NULL, delimBuf, &state) != NULL) { 411 log_err("Should not have found another token in a one token string\n"); 412 } 413 } 414 415 /* test u_strcmpCodePointOrder() */ 416 { 417 /* these strings are in ascending order */ 418 static const UChar strings[][4]={ 419 { 0x61, 0 }, /* U+0061 */ 420 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */ 421 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */ 422 { 0xd800, 0 }, /* U+d800 */ 423 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */ 424 { 0xdfff, 0 }, /* U+dfff */ 425 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */ 426 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */ 427 { 0xd800, 0xdc02, 0 }, /* U+10002 */ 428 { 0xd84d, 0xdc56, 0 } /* U+23456 */ 429 }; 430 431 UCharIterator iter1, iter2; 432 int32_t len1, len2, r1, r2; 433 434 for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) { 435 if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) { 436 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i); 437 } 438 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) { 439 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i); 440 } 441 442 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */ 443 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) { 444 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i); 445 } 446 447 /* test u_strCompare(TRUE) */ 448 len1=u_strlen(strings[i]); 449 len2=u_strlen(strings[i+1]); 450 if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 || 451 u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 || 452 u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 || 453 u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0 454 ) { 455 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i); 456 } 457 458 /* test u_strCompare(FALSE) */ 459 r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE); 460 r2=u_strcmp(strings[i], strings[i+1]); 461 if(_SIGN(r1)!=_SIGN(r2)) { 462 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i); 463 } 464 465 /* test u_strCompareIter() */ 466 uiter_setString(&iter1, strings[i], len1); 467 uiter_setString(&iter2, strings[i+1], len2); 468 if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) { 469 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i); 470 } 471 r1=u_strCompareIter(&iter1, &iter2, FALSE); 472 if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) { 473 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i); 474 } 475 } 476 } 477 478 cleanUpDataTable(); 479 } 480 481 static void TestStringSearching() 482 { 483 const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0}; 484 const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0}; 485 const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0}; 486 const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0}; 487 const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0}; 488 const UChar surrMatchSet4[] = {0x0000}; 489 const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0}; 490 const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0}; 491 const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */ 492 const UChar 493 empty[] = { 0 }, 494 a[] = { 0x61, 0 }, 495 ab[] = { 0x61, 0x62, 0 }, 496 ba[] = { 0x62, 0x61, 0 }, 497 abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 }, 498 cd[] = { 0x63, 0x64, 0 }, 499 dc[] = { 0x64, 0x63, 0 }, 500 cdh[] = { 0x63, 0x64, 0x68, 0 }, 501 f[] = { 0x66, 0 }, 502 fg[] = { 0x66, 0x67, 0 }, 503 gf[] = { 0x67, 0x66, 0 }; 504 505 log_verbose("Testing u_strpbrk()"); 506 507 if (u_strpbrk(testString, a) != &testString[0]) { 508 log_err("u_strpbrk couldn't find first letter a.\n"); 509 } 510 if (u_strpbrk(testString, dc) != &testString[2]) { 511 log_err("u_strpbrk couldn't find d or c.\n"); 512 } 513 if (u_strpbrk(testString, cd) != &testString[2]) { 514 log_err("u_strpbrk couldn't find c or d.\n"); 515 } 516 if (u_strpbrk(testString, cdh) != &testString[2]) { 517 log_err("u_strpbrk couldn't find c, d or h.\n"); 518 } 519 if (u_strpbrk(testString, f) != NULL) { 520 log_err("u_strpbrk didn't return NULL for \"f\".\n"); 521 } 522 if (u_strpbrk(testString, fg) != NULL) { 523 log_err("u_strpbrk didn't return NULL for \"fg\".\n"); 524 } 525 if (u_strpbrk(testString, gf) != NULL) { 526 log_err("u_strpbrk didn't return NULL for \"gf\".\n"); 527 } 528 if (u_strpbrk(testString, empty) != NULL) { 529 log_err("u_strpbrk didn't return NULL for \"\".\n"); 530 } 531 532 log_verbose("Testing u_strpbrk() with surrogates"); 533 534 if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) { 535 log_err("u_strpbrk couldn't find first letter a.\n"); 536 } 537 if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) { 538 log_err("u_strpbrk couldn't find d or c.\n"); 539 } 540 if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) { 541 log_err("u_strpbrk couldn't find c or d.\n"); 542 } 543 if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) { 544 log_err("u_strpbrk couldn't find c, d or h.\n"); 545 } 546 if (u_strpbrk(testSurrogateString, f) != NULL) { 547 log_err("u_strpbrk didn't return NULL for \"f\".\n"); 548 } 549 if (u_strpbrk(testSurrogateString, fg) != NULL) { 550 log_err("u_strpbrk didn't return NULL for \"fg\".\n"); 551 } 552 if (u_strpbrk(testSurrogateString, gf) != NULL) { 553 log_err("u_strpbrk didn't return NULL for \"gf\".\n"); 554 } 555 if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) { 556 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n"); 557 } 558 if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) { 559 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n"); 560 } 561 if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) { 562 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n"); 563 } 564 if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) { 565 log_err("u_strpbrk should have returned NULL for empty string.\n"); 566 } 567 if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) { 568 log_err("u_strpbrk should have found bad surrogate.\n"); 569 } 570 571 log_verbose("Testing u_strcspn()"); 572 573 if (u_strcspn(testString, a) != 0) { 574 log_err("u_strcspn couldn't find first letter a.\n"); 575 } 576 if (u_strcspn(testString, dc) != 2) { 577 log_err("u_strcspn couldn't find d or c.\n"); 578 } 579 if (u_strcspn(testString, cd) != 2) { 580 log_err("u_strcspn couldn't find c or d.\n"); 581 } 582 if (u_strcspn(testString, cdh) != 2) { 583 log_err("u_strcspn couldn't find c, d or h.\n"); 584 } 585 if (u_strcspn(testString, f) != u_strlen(testString)) { 586 log_err("u_strcspn didn't return NULL for \"f\".\n"); 587 } 588 if (u_strcspn(testString, fg) != u_strlen(testString)) { 589 log_err("u_strcspn didn't return NULL for \"fg\".\n"); 590 } 591 if (u_strcspn(testString, gf) != u_strlen(testString)) { 592 log_err("u_strcspn didn't return NULL for \"gf\".\n"); 593 } 594 595 log_verbose("Testing u_strcspn() with surrogates"); 596 597 if (u_strcspn(testSurrogateString, a) != 1) { 598 log_err("u_strcspn couldn't find first letter a.\n"); 599 } 600 if (u_strcspn(testSurrogateString, dc) != 5) { 601 log_err("u_strcspn couldn't find d or c.\n"); 602 } 603 if (u_strcspn(testSurrogateString, cd) != 5) { 604 log_err("u_strcspn couldn't find c or d.\n"); 605 } 606 if (u_strcspn(testSurrogateString, cdh) != 5) { 607 log_err("u_strcspn couldn't find c, d or h.\n"); 608 } 609 if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) { 610 log_err("u_strcspn didn't return NULL for \"f\".\n"); 611 } 612 if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) { 613 log_err("u_strcspn didn't return NULL for \"fg\".\n"); 614 } 615 if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) { 616 log_err("u_strcspn didn't return NULL for \"gf\".\n"); 617 } 618 if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) { 619 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n"); 620 } 621 if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) { 622 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n"); 623 } 624 if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) { 625 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n"); 626 } 627 if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) { 628 log_err("u_strcspn should have returned strlen for empty string.\n"); 629 } 630 631 632 log_verbose("Testing u_strspn()"); 633 634 if (u_strspn(testString, a) != 1) { 635 log_err("u_strspn couldn't skip first letter a.\n"); 636 } 637 if (u_strspn(testString, ab) != 2) { 638 log_err("u_strspn couldn't skip a or b.\n"); 639 } 640 if (u_strspn(testString, ba) != 2) { 641 log_err("u_strspn couldn't skip a or b.\n"); 642 } 643 if (u_strspn(testString, f) != 0) { 644 log_err("u_strspn didn't return 0 for \"f\".\n"); 645 } 646 if (u_strspn(testString, dc) != 0) { 647 log_err("u_strspn couldn't find first letter a (skip d or c).\n"); 648 } 649 if (u_strspn(testString, abcd) != u_strlen(testString)) { 650 log_err("u_strspn couldn't skip over the whole string.\n"); 651 } 652 if (u_strspn(testString, empty) != 0) { 653 log_err("u_strspn should have returned 0 for empty string.\n"); 654 } 655 656 log_verbose("Testing u_strspn() with surrogates"); 657 if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) { 658 log_err("u_strspn couldn't skip 0xdbff or a.\n"); 659 } 660 if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) { 661 log_err("u_strspn couldn't skip 0xdbff or a.\n"); 662 } 663 if (u_strspn(testSurrogateString, f) != 0) { 664 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); 665 } 666 if (u_strspn(testSurrogateString, dc) != 0) { 667 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); 668 } 669 if (u_strspn(testSurrogateString, cd) != 0) { 670 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); 671 } 672 if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) { 673 log_err("u_strspn couldn't skip whole string.\n"); 674 } 675 if (u_strspn(testSurrogateString, surrMatchSet1) != 0) { 676 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n"); 677 } 678 if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) { 679 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n"); 680 } 681 if (u_strspn(testSurrogateString, surrMatchSet4) != 0) { 682 log_err("u_strspn should have returned 0 for empty string.\n"); 683 } 684 } 685 686 /* 687 * All binary Unicode string searches should behave the same for equivalent input. 688 * See Jitterbug 2145. 689 * There are some new functions, too - just test them all. 690 */ 691 static void 692 TestSurrogateSearching() { 693 static const UChar s[]={ 694 /* 0 1 2 3 4 5 6 7 8 9 10 11 */ 695 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0 696 }, sub_a[]={ 697 0x61, 0 698 }, sub_b[]={ 699 0x62, 0 700 }, sub_lead[]={ 701 0xd801, 0 702 }, sub_trail[]={ 703 0xdc02, 0 704 }, sub_supp[]={ 705 0xd801, 0xdc02, 0 706 }, sub_supp2[]={ 707 0xd801, 0xdc03, 0 708 }, sub_a_lead[]={ 709 0x61, 0xd801, 0 710 }, sub_trail_a[]={ 711 0xdc02, 0x61, 0 712 }, sub_aba[]={ 713 0x61, 0x62, 0x61, 0 714 }; 715 static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0; 716 static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456; 717 718 const UChar *first, *last; 719 720 /* search for NUL code point: find end of string */ 721 first=s+u_strlen(s); 722 723 if( 724 first!=u_strchr(s, nul) || 725 first!=u_strchr32(s, nul) || 726 first!=u_memchr(s, nul, LENGTHOF(s)) || 727 first!=u_memchr32(s, nul, LENGTHOF(s)) || 728 first!=u_strrchr(s, nul) || 729 first!=u_strrchr32(s, nul) || 730 first!=u_memrchr(s, nul, LENGTHOF(s)) || 731 first!=u_memrchr32(s, nul, LENGTHOF(s)) 732 ) { 733 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n"); 734 } 735 736 /* search for empty substring: find beginning of string */ 737 if( 738 s!=u_strstr(s, &nul) || 739 s!=u_strFindFirst(s, -1, &nul, -1) || 740 s!=u_strFindFirst(s, -1, &nul, 0) || 741 s!=u_strFindFirst(s, LENGTHOF(s), &nul, -1) || 742 s!=u_strFindFirst(s, LENGTHOF(s), &nul, 0) || 743 s!=u_strrstr(s, &nul) || 744 s!=u_strFindLast(s, -1, &nul, -1) || 745 s!=u_strFindLast(s, -1, &nul, 0) || 746 s!=u_strFindLast(s, LENGTHOF(s), &nul, -1) || 747 s!=u_strFindLast(s, LENGTHOF(s), &nul, 0) 748 ) { 749 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n"); 750 } 751 752 /* find 'a' in s[1..10[ */ 753 first=s+3; 754 last=s+7; 755 if( 756 first!=u_strchr(s+1, a) || 757 first!=u_strchr32(s+1, a) || 758 first!=u_memchr(s+1, a, 9) || 759 first!=u_memchr32(s+1, a, 9) || 760 first!=u_strstr(s+1, sub_a) || 761 first!=u_strFindFirst(s+1, -1, sub_a, -1) || 762 first!=u_strFindFirst(s+1, -1, &a, 1) || 763 first!=u_strFindFirst(s+1, 9, sub_a, -1) || 764 first!=u_strFindFirst(s+1, 9, &a, 1) || 765 (s+10)!=u_strrchr(s+1, a) || 766 (s+10)!=u_strrchr32(s+1, a) || 767 last!=u_memrchr(s+1, a, 9) || 768 last!=u_memrchr32(s+1, a, 9) || 769 (s+10)!=u_strrstr(s+1, sub_a) || 770 (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) || 771 (s+10)!=u_strFindLast(s+1, -1, &a, 1) || 772 last!=u_strFindLast(s+1, 9, sub_a, -1) || 773 last!=u_strFindLast(s+1, 9, &a, 1) 774 ) { 775 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n"); 776 } 777 778 /* do not find 'b' in s[1..10[ */ 779 if( 780 NULL!=u_strchr(s+1, b) || 781 NULL!=u_strchr32(s+1, b) || 782 NULL!=u_memchr(s+1, b, 9) || 783 NULL!=u_memchr32(s+1, b, 9) || 784 NULL!=u_strstr(s+1, sub_b) || 785 NULL!=u_strFindFirst(s+1, -1, sub_b, -1) || 786 NULL!=u_strFindFirst(s+1, -1, &b, 1) || 787 NULL!=u_strFindFirst(s+1, 9, sub_b, -1) || 788 NULL!=u_strFindFirst(s+1, 9, &b, 1) || 789 NULL!=u_strrchr(s+1, b) || 790 NULL!=u_strrchr32(s+1, b) || 791 NULL!=u_memrchr(s+1, b, 9) || 792 NULL!=u_memrchr32(s+1, b, 9) || 793 NULL!=u_strrstr(s+1, sub_b) || 794 NULL!=u_strFindLast(s+1, -1, sub_b, -1) || 795 NULL!=u_strFindLast(s+1, -1, &b, 1) || 796 NULL!=u_strFindLast(s+1, 9, sub_b, -1) || 797 NULL!=u_strFindLast(s+1, 9, &b, 1) 798 ) { 799 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n"); 800 } 801 802 /* do not find a non-code point in s[1..10[ */ 803 if( 804 NULL!=u_strchr32(s+1, ill) || 805 NULL!=u_memchr32(s+1, ill, 9) || 806 NULL!=u_strrchr32(s+1, ill) || 807 NULL!=u_memrchr32(s+1, ill, 9) 808 ) { 809 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n"); 810 } 811 812 /* find U+d801 in s[1..10[ */ 813 first=s+6; 814 if( 815 first!=u_strchr(s+1, lead) || 816 first!=u_strchr32(s+1, lead) || 817 first!=u_memchr(s+1, lead, 9) || 818 first!=u_memchr32(s+1, lead, 9) || 819 first!=u_strstr(s+1, sub_lead) || 820 first!=u_strFindFirst(s+1, -1, sub_lead, -1) || 821 first!=u_strFindFirst(s+1, -1, &lead, 1) || 822 first!=u_strFindFirst(s+1, 9, sub_lead, -1) || 823 first!=u_strFindFirst(s+1, 9, &lead, 1) || 824 first!=u_strrchr(s+1, lead) || 825 first!=u_strrchr32(s+1, lead) || 826 first!=u_memrchr(s+1, lead, 9) || 827 first!=u_memrchr32(s+1, lead, 9) || 828 first!=u_strrstr(s+1, sub_lead) || 829 first!=u_strFindLast(s+1, -1, sub_lead, -1) || 830 first!=u_strFindLast(s+1, -1, &lead, 1) || 831 first!=u_strFindLast(s+1, 9, sub_lead, -1) || 832 first!=u_strFindLast(s+1, 9, &lead, 1) 833 ) { 834 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n"); 835 } 836 837 /* find U+dc02 in s[1..10[ */ 838 first=s+4; 839 if( 840 first!=u_strchr(s+1, trail) || 841 first!=u_strchr32(s+1, trail) || 842 first!=u_memchr(s+1, trail, 9) || 843 first!=u_memchr32(s+1, trail, 9) || 844 first!=u_strstr(s+1, sub_trail) || 845 first!=u_strFindFirst(s+1, -1, sub_trail, -1) || 846 first!=u_strFindFirst(s+1, -1, &trail, 1) || 847 first!=u_strFindFirst(s+1, 9, sub_trail, -1) || 848 first!=u_strFindFirst(s+1, 9, &trail, 1) || 849 first!=u_strrchr(s+1, trail) || 850 first!=u_strrchr32(s+1, trail) || 851 first!=u_memrchr(s+1, trail, 9) || 852 first!=u_memrchr32(s+1, trail, 9) || 853 first!=u_strrstr(s+1, sub_trail) || 854 first!=u_strFindLast(s+1, -1, sub_trail, -1) || 855 first!=u_strFindLast(s+1, -1, &trail, 1) || 856 first!=u_strFindLast(s+1, 9, sub_trail, -1) || 857 first!=u_strFindLast(s+1, 9, &trail, 1) 858 ) { 859 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n"); 860 } 861 862 /* find U+10402 in s[1..10[ */ 863 first=s+1; 864 last=s+8; 865 if( 866 first!=u_strchr32(s+1, supp) || 867 first!=u_memchr32(s+1, supp, 9) || 868 first!=u_strstr(s+1, sub_supp) || 869 first!=u_strFindFirst(s+1, -1, sub_supp, -1) || 870 first!=u_strFindFirst(s+1, -1, sub_supp, 2) || 871 first!=u_strFindFirst(s+1, 9, sub_supp, -1) || 872 first!=u_strFindFirst(s+1, 9, sub_supp, 2) || 873 last!=u_strrchr32(s+1, supp) || 874 last!=u_memrchr32(s+1, supp, 9) || 875 last!=u_strrstr(s+1, sub_supp) || 876 last!=u_strFindLast(s+1, -1, sub_supp, -1) || 877 last!=u_strFindLast(s+1, -1, sub_supp, 2) || 878 last!=u_strFindLast(s+1, 9, sub_supp, -1) || 879 last!=u_strFindLast(s+1, 9, sub_supp, 2) 880 ) { 881 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n"); 882 } 883 884 /* do not find U+10402 in a single UChar */ 885 if( 886 NULL!=u_memchr32(s+1, supp, 1) || 887 NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) || 888 NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) || 889 NULL!=u_memrchr32(s+1, supp, 1) || 890 NULL!=u_strFindLast(s+1, 1, sub_supp, -1) || 891 NULL!=u_strFindLast(s+1, 1, sub_supp, 2) || 892 NULL!=u_memrchr32(s+2, supp, 1) || 893 NULL!=u_strFindLast(s+2, 1, sub_supp, -1) || 894 NULL!=u_strFindLast(s+2, 1, sub_supp, 2) 895 ) { 896 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n"); 897 } 898 899 /* do not find U+10403 in s[1..10[ */ 900 if( 901 NULL!=u_strchr32(s+1, supp2) || 902 NULL!=u_memchr32(s+1, supp2, 9) || 903 NULL!=u_strstr(s+1, sub_supp2) || 904 NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) || 905 NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) || 906 NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) || 907 NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) || 908 NULL!=u_strrchr32(s+1, supp2) || 909 NULL!=u_memrchr32(s+1, supp2, 9) || 910 NULL!=u_strrstr(s+1, sub_supp2) || 911 NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) || 912 NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) || 913 NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) || 914 NULL!=u_strFindLast(s+1, 9, sub_supp2, 2) 915 ) { 916 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n"); 917 } 918 919 /* find <0061 d801> in s[1..10[ */ 920 first=s+5; 921 if( 922 first!=u_strstr(s+1, sub_a_lead) || 923 first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) || 924 first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) || 925 first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) || 926 first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) || 927 first!=u_strrstr(s+1, sub_a_lead) || 928 first!=u_strFindLast(s+1, -1, sub_a_lead, -1) || 929 first!=u_strFindLast(s+1, -1, sub_a_lead, 2) || 930 first!=u_strFindLast(s+1, 9, sub_a_lead, -1) || 931 first!=u_strFindLast(s+1, 9, sub_a_lead, 2) 932 ) { 933 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n"); 934 } 935 936 /* find <dc02 0061> in s[1..10[ */ 937 first=s+4; 938 if( 939 first!=u_strstr(s+1, sub_trail_a) || 940 first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) || 941 first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) || 942 first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) || 943 first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) || 944 first!=u_strrstr(s+1, sub_trail_a) || 945 first!=u_strFindLast(s+1, -1, sub_trail_a, -1) || 946 first!=u_strFindLast(s+1, -1, sub_trail_a, 2) || 947 first!=u_strFindLast(s+1, 9, sub_trail_a, -1) || 948 first!=u_strFindLast(s+1, 9, sub_trail_a, 2) 949 ) { 950 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n"); 951 } 952 953 /* do not find "aba" in s[1..10[ */ 954 if( 955 NULL!=u_strstr(s+1, sub_aba) || 956 NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) || 957 NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) || 958 NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) || 959 NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) || 960 NULL!=u_strrstr(s+1, sub_aba) || 961 NULL!=u_strFindLast(s+1, -1, sub_aba, -1) || 962 NULL!=u_strFindLast(s+1, -1, sub_aba, 3) || 963 NULL!=u_strFindLast(s+1, 9, sub_aba, -1) || 964 NULL!=u_strFindLast(s+1, 9, sub_aba, 3) 965 ) { 966 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n"); 967 } 968 } 969 970 static void TestStringCopy() 971 { 972 UChar temp[40]; 973 UChar *result=0; 974 UChar subString[5]; 975 UChar uchars[]={0x61, 0x62, 0x63, 0x00}; 976 char charOut[40]; 977 char chars[]="abc"; /* needs default codepage */ 978 979 log_verbose("Testing u_uastrncpy() and u_uastrcpy()"); 980 981 u_uastrcpy(temp, "abc"); 982 if(u_strcmp(temp, uchars) != 0) { 983 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp)); 984 } 985 986 temp[0] = 0xFB; /* load garbage into it */ 987 temp[1] = 0xFB; 988 temp[2] = 0xFB; 989 temp[3] = 0xFB; 990 991 u_uastrncpy(temp, "abcabcabc", 3); 992 if(u_strncmp(uchars, temp, 3) != 0){ 993 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp)); 994 } 995 if(temp[3] != 0xFB) { 996 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n"); 997 } 998 999 charOut[0] = (char)0x7B; /* load garbage into it */ 1000 charOut[1] = (char)0x7B; 1001 charOut[2] = (char)0x7B; 1002 charOut[3] = (char)0x7B; 1003 1004 temp[0] = 0x0061; 1005 temp[1] = 0x0062; 1006 temp[2] = 0x0063; 1007 temp[3] = 0x0061; 1008 temp[4] = 0x0062; 1009 temp[5] = 0x0063; 1010 temp[6] = 0x0000; 1011 1012 u_austrncpy(charOut, temp, 3); 1013 if(strncmp(chars, charOut, 3) != 0){ 1014 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp)); 1015 } 1016 if(charOut[3] != (char)0x7B) { 1017 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n"); 1018 } 1019 1020 /*Testing u_strchr()*/ 1021 log_verbose("Testing u_strchr\n"); 1022 temp[0]=0x42; 1023 temp[1]=0x62; 1024 temp[2]=0x62; 1025 temp[3]=0x63; 1026 temp[4]=0xd841; 1027 temp[5]=0xd841; 1028 temp[6]=0xdc02; 1029 temp[7]=0; 1030 result=u_strchr(temp, (UChar)0x62); 1031 if(result != temp+1){ 1032 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result); 1033 } 1034 /*Testing u_strstr()*/ 1035 log_verbose("Testing u_strstr\n"); 1036 subString[0]=0x62; 1037 subString[1]=0x63; 1038 subString[2]=0; 1039 result=u_strstr(temp, subString); 1040 if(result != temp+2){ 1041 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result); 1042 } 1043 result=u_strstr(temp, subString+2); /* subString+2 is an empty string */ 1044 if(result != temp){ 1045 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result); 1046 } 1047 result=u_strstr(subString, temp); 1048 if(result != NULL){ 1049 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n"); 1050 } 1051 1052 /*Testing u_strchr32*/ 1053 log_verbose("Testing u_strchr32\n"); 1054 result=u_strchr32(temp, (UChar32)0x62); 1055 if(result != temp+1){ 1056 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result); 1057 } 1058 result=u_strchr32(temp, (UChar32)0xfb); 1059 if(result != NULL){ 1060 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n"); 1061 } 1062 result=u_strchr32(temp, (UChar32)0x20402); 1063 if(result != temp+5){ 1064 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result); 1065 } 1066 1067 temp[7]=0xfc00; 1068 result=u_memchr32(temp, (UChar32)0x20402, 7); 1069 if(result != temp+5){ 1070 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result); 1071 } 1072 result=u_memchr32(temp, (UChar32)0x20402, 6); 1073 if(result != NULL){ 1074 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result); 1075 } 1076 result=u_memchr32(temp, (UChar32)0x20402, 1); 1077 if(result != NULL){ 1078 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result); 1079 } 1080 result=u_memchr32(temp, (UChar32)0xfc00, 8); 1081 if(result != temp+7){ 1082 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result); 1083 } 1084 } 1085 1086 /* test u_unescape() and u_unescapeAt() ------------------------------------- */ 1087 1088 static void 1089 TestUnescape() { 1090 static UChar buffer[200]; 1091 1092 static const char* input = 1093 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}"; 1094 1095 static const UChar expect[]={ 1096 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20, 1097 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c, 1098 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20, 1099 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0 1100 }; 1101 static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1; 1102 int32_t length; 1103 1104 /* test u_unescape() */ 1105 length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0])); 1106 if(length!=explength || u_strcmp(buffer, expect)!=0) { 1107 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length, 1108 explength); 1109 } 1110 1111 /* try preflighting */ 1112 length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0])); 1113 if(length!=explength || u_strcmp(buffer, expect)!=0) { 1114 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength); 1115 } 1116 1117 /* ### TODO: test u_unescapeAt() */ 1118 } 1119 1120 /* test code point counting functions --------------------------------------- */ 1121 1122 /* reference implementation of u_strHasMoreChar32Than() */ 1123 static int32_t 1124 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) { 1125 int32_t count=u_countChar32(s, length); 1126 return count>number; 1127 } 1128 1129 /* compare the real function against the reference */ 1130 static void 1131 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) { 1132 if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) { 1133 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n", 1134 i, length, number, u_strHasMoreChar32Than(s, length, number)); 1135 } 1136 } 1137 1138 static void 1139 TestCountChar32() { 1140 static const UChar string[]={ 1141 0x61, 0x62, 0xd800, 0xdc00, 1142 0xd801, 0xdc01, 0x63, 0xd802, 1143 0x64, 0xdc03, 0x65, 0x66, 1144 0xd804, 0xdc04, 0xd805, 0xdc05, 1145 0x67 1146 }; 1147 UChar buffer[100]; 1148 int32_t i, length, number; 1149 1150 /* test u_strHasMoreChar32Than() with length>=0 */ 1151 length=LENGTHOF(string); 1152 while(length>=0) { 1153 for(i=0; i<=length; ++i) { 1154 for(number=-1; number<=((length-i)+2); ++number) { 1155 _testStrHasMoreChar32Than(string+i, i, length-i, number); 1156 } 1157 } 1158 --length; 1159 } 1160 1161 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */ 1162 length=LENGTHOF(string); 1163 u_memcpy(buffer, string, length); 1164 while(length>=0) { 1165 buffer[length]=0; 1166 for(i=0; i<=length; ++i) { 1167 for(number=-1; number<=((length-i)+2); ++number) { 1168 _testStrHasMoreChar32Than(string+i, i, -1, number); 1169 } 1170 } 1171 --length; 1172 } 1173 1174 /* test u_strHasMoreChar32Than() with NULL string (bad input) */ 1175 for(length=-1; length<=1; ++length) { 1176 for(i=0; i<=length; ++i) { 1177 for(number=-2; number<=2; ++number) { 1178 _testStrHasMoreChar32Than(NULL, 0, length, number); 1179 } 1180 } 1181 } 1182 } 1183 1184 /* UCharIterator ------------------------------------------------------------ */ 1185 1186 /* 1187 * Compare results from two iterators, should be same. 1188 * Assume that the text is not empty and that 1189 * iteration start==0 and iteration limit==length. 1190 */ 1191 static void 1192 compareIterators(UCharIterator *iter1, const char *n1, 1193 UCharIterator *iter2, const char *n2) { 1194 int32_t i, pos1, pos2, middle, length; 1195 UChar32 c1, c2; 1196 1197 /* compare lengths */ 1198 length=iter1->getIndex(iter1, UITER_LENGTH); 1199 pos2=iter2->getIndex(iter2, UITER_LENGTH); 1200 if(length!=pos2) { 1201 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2); 1202 return; 1203 } 1204 1205 /* set into the middle */ 1206 middle=length/2; 1207 1208 pos1=iter1->move(iter1, middle, UITER_ZERO); 1209 if(pos1!=middle) { 1210 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1); 1211 return; 1212 } 1213 1214 pos2=iter2->move(iter2, middle, UITER_ZERO); 1215 if(pos2!=middle) { 1216 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2); 1217 return; 1218 } 1219 1220 /* test current() */ 1221 c1=iter1->current(iter1); 1222 c2=iter2->current(iter2); 1223 if(c1!=c2) { 1224 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle); 1225 return; 1226 } 1227 1228 /* move forward 3 UChars */ 1229 for(i=0; i<3; ++i) { 1230 c1=iter1->next(iter1); 1231 c2=iter2->next(iter2); 1232 if(c1!=c2) { 1233 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1234 return; 1235 } 1236 } 1237 1238 /* move backward 5 UChars */ 1239 for(i=0; i<5; ++i) { 1240 c1=iter1->previous(iter1); 1241 c2=iter2->previous(iter2); 1242 if(c1!=c2) { 1243 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1244 return; 1245 } 1246 } 1247 1248 /* iterate forward from the beginning */ 1249 pos1=iter1->move(iter1, 0, UITER_START); 1250 if(pos1<0) { 1251 log_err("%s->move(start) failed\n", n1); 1252 return; 1253 } 1254 if(!iter1->hasNext(iter1)) { 1255 log_err("%s->hasNext() at the start returns FALSE\n", n1); 1256 return; 1257 } 1258 1259 pos2=iter2->move(iter2, 0, UITER_START); 1260 if(pos2<0) { 1261 log_err("%s->move(start) failed\n", n2); 1262 return; 1263 } 1264 if(!iter2->hasNext(iter2)) { 1265 log_err("%s->hasNext() at the start returns FALSE\n", n2); 1266 return; 1267 } 1268 1269 do { 1270 c1=iter1->next(iter1); 1271 c2=iter2->next(iter2); 1272 if(c1!=c2) { 1273 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1274 return; 1275 } 1276 } while(c1>=0); 1277 1278 if(iter1->hasNext(iter1)) { 1279 log_err("%s->hasNext() at the end returns TRUE\n", n1); 1280 return; 1281 } 1282 if(iter2->hasNext(iter2)) { 1283 log_err("%s->hasNext() at the end returns TRUE\n", n2); 1284 return; 1285 } 1286 1287 /* back to the middle */ 1288 pos1=iter1->move(iter1, middle, UITER_ZERO); 1289 if(pos1!=middle) { 1290 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1); 1291 return; 1292 } 1293 1294 pos2=iter2->move(iter2, middle, UITER_ZERO); 1295 if(pos2!=middle) { 1296 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2); 1297 return; 1298 } 1299 1300 /* move to index 1 */ 1301 pos1=iter1->move(iter1, 1, UITER_ZERO); 1302 if(pos1!=1) { 1303 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1); 1304 return; 1305 } 1306 1307 pos2=iter2->move(iter2, 1, UITER_ZERO); 1308 if(pos2!=1) { 1309 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2); 1310 return; 1311 } 1312 1313 /* iterate backward from the end */ 1314 pos1=iter1->move(iter1, 0, UITER_LIMIT); 1315 if(pos1<0) { 1316 log_err("%s->move(limit) failed\n", n1); 1317 return; 1318 } 1319 if(!iter1->hasPrevious(iter1)) { 1320 log_err("%s->hasPrevious() at the end returns FALSE\n", n1); 1321 return; 1322 } 1323 1324 pos2=iter2->move(iter2, 0, UITER_LIMIT); 1325 if(pos2<0) { 1326 log_err("%s->move(limit) failed\n", n2); 1327 return; 1328 } 1329 if(!iter2->hasPrevious(iter2)) { 1330 log_err("%s->hasPrevious() at the end returns FALSE\n", n2); 1331 return; 1332 } 1333 1334 do { 1335 c1=iter1->previous(iter1); 1336 c2=iter2->previous(iter2); 1337 if(c1!=c2) { 1338 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1339 return; 1340 } 1341 } while(c1>=0); 1342 1343 if(iter1->hasPrevious(iter1)) { 1344 log_err("%s->hasPrevious() at the start returns TRUE\n", n1); 1345 return; 1346 } 1347 if(iter2->hasPrevious(iter2)) { 1348 log_err("%s->hasPrevious() at the start returns TRUE\n", n2); 1349 return; 1350 } 1351 } 1352 1353 /* 1354 * Test the iterator's getState() and setState() functions. 1355 * iter1 and iter2 must be set up for the same iterator type and the same string 1356 * but may be physically different structs (different addresses). 1357 * 1358 * Assume that the text is not empty and that 1359 * iteration start==0 and iteration limit==length. 1360 * It must be 2<=middle<=length-2. 1361 */ 1362 static void 1363 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) { 1364 UChar32 u[4]; 1365 1366 UErrorCode errorCode; 1367 UChar32 c; 1368 uint32_t state; 1369 int32_t i, j; 1370 1371 /* get four UChars from the middle of the string */ 1372 iter1->move(iter1, middle-2, UITER_ZERO); 1373 for(i=0; i<4; ++i) { 1374 c=iter1->next(iter1); 1375 if(c<0) { 1376 /* the test violates the assumptions, see comment above */ 1377 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c); 1378 return; 1379 } 1380 u[i]=c; 1381 } 1382 1383 /* move to the middle and get the state */ 1384 iter1->move(iter1, -2, UITER_CURRENT); 1385 state=uiter_getState(iter1); 1386 1387 /* set the state into the second iterator and compare the results */ 1388 errorCode=U_ZERO_ERROR; 1389 uiter_setState(iter2, state, &errorCode); 1390 if(U_FAILURE(errorCode)) { 1391 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode)); 1392 return; 1393 } 1394 1395 c=iter2->current(iter2); 1396 if(c!=u[2]) { 1397 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]); 1398 } 1399 1400 c=iter2->previous(iter2); 1401 if(c!=u[1]) { 1402 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]); 1403 } 1404 1405 iter2->move(iter2, 2, UITER_CURRENT); 1406 c=iter2->next(iter2); 1407 if(c!=u[3]) { 1408 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]); 1409 } 1410 1411 iter2->move(iter2, -3, UITER_CURRENT); 1412 c=iter2->previous(iter2); 1413 if(c!=u[0]) { 1414 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]); 1415 } 1416 1417 /* move the second iterator back to the middle */ 1418 iter2->move(iter2, 1, UITER_CURRENT); 1419 iter2->next(iter2); 1420 1421 /* check that both are in the middle */ 1422 i=iter1->getIndex(iter1, UITER_CURRENT); 1423 j=iter2->getIndex(iter2, UITER_CURRENT); 1424 if(i!=middle) { 1425 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle); 1426 } 1427 if(i!=j) { 1428 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i); 1429 } 1430 1431 /* compare lengths */ 1432 i=iter1->getIndex(iter1, UITER_LENGTH); 1433 j=iter2->getIndex(iter2, UITER_LENGTH); 1434 if(i!=j) { 1435 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j); 1436 } 1437 } 1438 1439 static void 1440 TestUCharIterator() { 1441 static const UChar text[]={ 1442 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0 1443 }; 1444 char bytes[40]; 1445 1446 UCharIterator iter, iter1, iter2; 1447 UConverter *cnv; 1448 UErrorCode errorCode; 1449 int32_t length; 1450 1451 /* simple API/code coverage - test NOOP UCharIterator */ 1452 uiter_setString(&iter, NULL, 0); 1453 if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 || 1454 iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 || 1455 iter.hasNext(&iter) || iter.hasPrevious(&iter) 1456 ) { 1457 log_err("NOOP UCharIterator behaves unexpectedly\n"); 1458 } 1459 1460 /* test get/set state */ 1461 length=LENGTHOF(text)-1; 1462 uiter_setString(&iter1, text, -1); 1463 uiter_setString(&iter2, text, length); 1464 testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2); 1465 testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1); 1466 1467 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */ 1468 errorCode=U_ZERO_ERROR; 1469 u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode); 1470 if(U_FAILURE(errorCode)) { 1471 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode)); 1472 return; 1473 } 1474 1475 uiter_setString(&iter1, text, -1); 1476 uiter_setUTF8(&iter2, bytes, length); 1477 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator"); 1478 1479 /* try again with length=-1 */ 1480 uiter_setUTF8(&iter2, bytes, -1); 1481 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1"); 1482 1483 /* test get/set state */ 1484 length=LENGTHOF(text)-1; 1485 uiter_setUTF8(&iter1, bytes, -1); 1486 testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2); 1487 testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1); 1488 1489 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */ 1490 errorCode=U_ZERO_ERROR; 1491 cnv=ucnv_open("UTF-16BE", &errorCode); 1492 length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode); 1493 ucnv_close(cnv); 1494 if(U_FAILURE(errorCode)) { 1495 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode)); 1496 return; 1497 } 1498 1499 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */ 1500 bytes[length]=bytes[length+1]=0; 1501 1502 uiter_setString(&iter1, text, -1); 1503 uiter_setUTF16BE(&iter2, bytes, length); 1504 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator"); 1505 1506 /* try again with length=-1 */ 1507 uiter_setUTF16BE(&iter2, bytes, -1); 1508 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1"); 1509 1510 /* try again after moving the bytes up one, and with length=-1 */ 1511 memmove(bytes+1, bytes, length+2); 1512 uiter_setUTF16BE(&iter2, bytes+1, -1); 1513 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1"); 1514 1515 /* ### TODO test other iterators: CharacterIterator, Replaceable */ 1516 } 1517 1518 #if UCONFIG_NO_COLLATION 1519 1520 static void 1521 TestUNormIterator() { 1522 /* test nothing */ 1523 } 1524 1525 static void 1526 TestBadUNormIterator(void) { 1527 /* test nothing, as well */ 1528 } 1529 1530 #else 1531 1532 #include "unicode/unorm.h" 1533 #include "unorm_it.h" 1534 1535 /* 1536 * Compare results from two iterators, should be same. 1537 * Assume that the text is not empty and that 1538 * iteration start==0 and iteration limit==length. 1539 * 1540 * Modified version of compareIterators() but does not assume that indexes 1541 * are available. 1542 */ 1543 static void 1544 compareIterNoIndexes(UCharIterator *iter1, const char *n1, 1545 UCharIterator *iter2, const char *n2, 1546 int32_t middle) { 1547 uint32_t state; 1548 int32_t i; 1549 UChar32 c1, c2; 1550 UErrorCode errorCode; 1551 1552 /* code coverage for unorm_it.c/unormIteratorGetIndex() */ 1553 if( 1554 iter2->getIndex(iter2, UITER_START)!=0 || 1555 iter2->getIndex(iter2, UITER_LENGTH)!=UITER_UNKNOWN_INDEX 1556 ) { 1557 log_err("UNormIterator.getIndex() failed\n"); 1558 } 1559 1560 /* set into the middle */ 1561 iter1->move(iter1, middle, UITER_ZERO); 1562 iter2->move(iter2, middle, UITER_ZERO); 1563 1564 /* test current() */ 1565 c1=iter1->current(iter1); 1566 c2=iter2->current(iter2); 1567 if(c1!=c2) { 1568 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle); 1569 return; 1570 } 1571 1572 /* move forward 3 UChars */ 1573 for(i=0; i<3; ++i) { 1574 c1=iter1->next(iter1); 1575 c2=iter2->next(iter2); 1576 if(c1!=c2) { 1577 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1578 return; 1579 } 1580 } 1581 1582 /* move backward 5 UChars */ 1583 for(i=0; i<5; ++i) { 1584 c1=iter1->previous(iter1); 1585 c2=iter2->previous(iter2); 1586 if(c1!=c2) { 1587 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1588 return; 1589 } 1590 } 1591 1592 /* iterate forward from the beginning */ 1593 iter1->move(iter1, 0, UITER_START); 1594 if(!iter1->hasNext(iter1)) { 1595 log_err("%s->hasNext() at the start returns FALSE\n", n1); 1596 return; 1597 } 1598 1599 iter2->move(iter2, 0, UITER_START); 1600 if(!iter2->hasNext(iter2)) { 1601 log_err("%s->hasNext() at the start returns FALSE\n", n2); 1602 return; 1603 } 1604 1605 do { 1606 c1=iter1->next(iter1); 1607 c2=iter2->next(iter2); 1608 if(c1!=c2) { 1609 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1610 return; 1611 } 1612 } while(c1>=0); 1613 1614 if(iter1->hasNext(iter1)) { 1615 log_err("%s->hasNext() at the end returns TRUE\n", n1); 1616 return; 1617 } 1618 if(iter2->hasNext(iter2)) { 1619 log_err("%s->hasNext() at the end returns TRUE\n", n2); 1620 return; 1621 } 1622 1623 /* iterate backward */ 1624 do { 1625 c1=iter1->previous(iter1); 1626 c2=iter2->previous(iter2); 1627 if(c1!=c2) { 1628 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1629 return; 1630 } 1631 } while(c1>=0); 1632 1633 /* back to the middle */ 1634 iter1->move(iter1, middle, UITER_ZERO); 1635 iter2->move(iter2, middle, UITER_ZERO); 1636 1637 /* try get/set state */ 1638 while((state=uiter_getState(iter2))==UITER_NO_STATE) { 1639 if(!iter2->hasNext(iter2)) { 1640 log_err("%s has no known state from middle=%d to the end\n", n2, middle); 1641 return; 1642 } 1643 iter2->next(iter2); 1644 } 1645 1646 errorCode=U_ZERO_ERROR; 1647 1648 c2=iter2->current(iter2); 1649 iter2->move(iter2, 0, UITER_ZERO); 1650 uiter_setState(iter2, state, &errorCode); 1651 c1=iter2->current(iter2); 1652 if(U_FAILURE(errorCode) || c1!=c2) { 1653 log_err("%s->current() differs across get/set state, U+%04x vs. U+%04x\n", n2, c2, c1); 1654 return; 1655 } 1656 1657 c2=iter2->previous(iter2); 1658 iter2->move(iter2, 0, UITER_ZERO); 1659 uiter_setState(iter2, state, &errorCode); 1660 c1=iter2->previous(iter2); 1661 if(U_FAILURE(errorCode) || c1!=c2) { 1662 log_err("%s->previous() differs across get/set state, U+%04x vs. U+%04x\n", n2, c2, c1); 1663 return; 1664 } 1665 1666 /* iterate backward from the end */ 1667 iter1->move(iter1, 0, UITER_LIMIT); 1668 if(!iter1->hasPrevious(iter1)) { 1669 log_err("%s->hasPrevious() at the end returns FALSE\n", n1); 1670 return; 1671 } 1672 1673 iter2->move(iter2, 0, UITER_LIMIT); 1674 if(!iter2->hasPrevious(iter2)) { 1675 log_err("%s->hasPrevious() at the end returns FALSE\n", n2); 1676 return; 1677 } 1678 1679 do { 1680 c1=iter1->previous(iter1); 1681 c2=iter2->previous(iter2); 1682 if(c1!=c2) { 1683 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1684 return; 1685 } 1686 } while(c1>=0); 1687 1688 if(iter1->hasPrevious(iter1)) { 1689 log_err("%s->hasPrevious() at the start returns TRUE\n", n1); 1690 return; 1691 } 1692 if(iter2->hasPrevious(iter2)) { 1693 log_err("%s->hasPrevious() at the start returns TRUE\n", n2); 1694 return; 1695 } 1696 } 1697 1698 /* n2 must have a digit 1 at the end, will be incremented with the normalization mode */ 1699 static void 1700 testUNormIteratorWithText(const UChar *text, int32_t textLength, int32_t middle, 1701 const char *name1, const char *n2) { 1702 UChar buffer[600]; 1703 char name2[40]; 1704 1705 UCharIterator iter1, iter2, *iter; 1706 UNormIterator *uni; 1707 1708 UNormalizationMode mode; 1709 UErrorCode errorCode; 1710 int32_t length; 1711 1712 /* open a normalizing iterator */ 1713 errorCode=U_ZERO_ERROR; 1714 uni=unorm_openIter(NULL, 0, &errorCode); 1715 if(U_FAILURE(errorCode)) { 1716 log_err("unorm_openIter() fails: %s\n", u_errorName(errorCode)); 1717 return; 1718 } 1719 1720 /* set iterator 2 to the original text */ 1721 uiter_setString(&iter2, text, textLength); 1722 1723 strcpy(name2, n2); 1724 1725 /* test the normalizing iterator for each mode */ 1726 for(mode=UNORM_NONE; mode<UNORM_MODE_COUNT; ++mode) { 1727 length=unorm_normalize(text, textLength, mode, 0, buffer, LENGTHOF(buffer), &errorCode); 1728 if(U_FAILURE(errorCode)) { 1729 log_data_err("unorm_normalize(mode %d) failed: %s - (Are you missing data?)\n", mode, u_errorName(errorCode)); 1730 break; 1731 } 1732 1733 /* set iterator 1 to the normalized text */ 1734 uiter_setString(&iter1, buffer, length); 1735 1736 /* set the normalizing iterator to use iter2 */ 1737 iter=unorm_setIter(uni, &iter2, mode, &errorCode); 1738 if(U_FAILURE(errorCode)) { 1739 log_err("unorm_setIter(mode %d) failed: %s\n", mode, u_errorName(errorCode)); 1740 break; 1741 } 1742 1743 compareIterNoIndexes(&iter1, name1, iter, name2, middle); 1744 ++name2[strlen(name2)-1]; 1745 } 1746 1747 unorm_closeIter(uni); 1748 } 1749 1750 static void 1751 TestUNormIterator() { 1752 static const UChar text[]={ /* must contain <00C5 0327> see u_strchr() below */ 1753 0x61, /* 'a' */ 1754 0xe4, 0x61, 0x308, /* variations of 'a'+umlaut */ 1755 0xc5, 0x327, 0x41, 0x30a, 0x327, 0x41, 0x327, 0x30a, /* variations of 'A'+ring+cedilla */ 1756 0xfb03, 0xfb00, 0x69, 0x66, 0x66, 0x69, 0x66, 0xfb01 /* variations of 'ffi' */ 1757 }; 1758 static const UChar surrogateText[]={ 1759 0x6e, 0xd900, 0x6a, 0xdc00, 0xd900, 0xdc00, 0x61 1760 }; 1761 1762 UChar longText[600]; 1763 int32_t i, middle, length; 1764 1765 length=LENGTHOF(text); 1766 testUNormIteratorWithText(text, length, length/2, "UCharIter", "UNormIter1"); 1767 testUNormIteratorWithText(text, length, length, "UCharIterEnd", "UNormIterEnd1"); 1768 1769 /* test again, this time with an insane string to cause internal buffer overflows */ 1770 middle=(int32_t)(u_strchr(text, 0x327)-text); /* see comment at text[] */ 1771 memcpy(longText, text, middle*U_SIZEOF_UCHAR); 1772 for(i=0; i<150; ++i) { 1773 longText[middle+i]=0x30a; /* insert many rings between 'A-ring' and cedilla */ 1774 } 1775 memcpy(longText+middle+i, text+middle, (LENGTHOF(text)-middle)*U_SIZEOF_UCHAR); 1776 length=LENGTHOF(text)+i; 1777 1778 /* append another copy of this string for more overflows */ 1779 memcpy(longText+length, longText, length*U_SIZEOF_UCHAR); 1780 length*=2; 1781 1782 /* the first test of the following two starts at length/4, inside the sea of combining rings */ 1783 testUNormIteratorWithText(longText, length, length/4, "UCharIterLong", "UNormIterLong1"); 1784 testUNormIteratorWithText(longText, length, length, "UCharIterLongEnd", "UNormIterLongEnd1"); 1785 1786 length=LENGTHOF(surrogateText); 1787 testUNormIteratorWithText(surrogateText, length, length/4, "UCharIterSurr", "UNormIterSurr1"); 1788 testUNormIteratorWithText(surrogateText, length, length, "UCharIterSurrEnd", "UNormIterSurrEnd1"); 1789 } 1790 1791 static void 1792 TestBadUNormIterator(void) { 1793 #if !UCONFIG_NO_NORMALIZATION 1794 UErrorCode status = U_ILLEGAL_ESCAPE_SEQUENCE; 1795 UNormIterator *uni; 1796 1797 unorm_setIter(NULL, NULL, UNORM_NONE, &status); 1798 if (status != U_ILLEGAL_ESCAPE_SEQUENCE) { 1799 log_err("unorm_setIter changed the error code to: %s\n", u_errorName(status)); 1800 } 1801 status = U_ZERO_ERROR; 1802 unorm_setIter(NULL, NULL, UNORM_NONE, &status); 1803 if (status != U_ILLEGAL_ARGUMENT_ERROR) { 1804 log_err("unorm_setIter didn't react correctly to bad arguments: %s\n", u_errorName(status)); 1805 } 1806 status = U_ZERO_ERROR; 1807 uni=unorm_openIter(NULL, 0, &status); 1808 if(U_FAILURE(status)) { 1809 log_err("unorm_openIter() fails: %s\n", u_errorName(status)); 1810 return; 1811 } 1812 unorm_setIter(uni, NULL, UNORM_NONE, &status); 1813 unorm_closeIter(uni); 1814 #endif 1815 } 1816 1817 #endif 1818