1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2002-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: custrtst.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2002oct09 14 * created by: Markus W. Scherer 15 * 16 * Tests of ustring.h Unicode string API functions. 17 */ 18 19 #include "unicode/ustring.h" 20 #include "unicode/ucnv.h" 21 #include "unicode/uiter.h" 22 #include "cintltst.h" 23 #include <string.h> 24 25 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 26 27 /* get the sign of an integer */ 28 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1) 29 30 /* test setup --------------------------------------------------------------- */ 31 32 static void setUpDataTable(void); 33 static void TestStringCopy(void); 34 static void TestStringFunctions(void); 35 static void TestStringSearching(void); 36 static void TestSurrogateSearching(void); 37 static void TestUnescape(void); 38 static void TestCountChar32(void); 39 static void TestUCharIterator(void); 40 41 void addUStringTest(TestNode** root); 42 43 void addUStringTest(TestNode** root) 44 { 45 addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy"); 46 addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions"); 47 addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching"); 48 addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching"); 49 addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape"); 50 addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32"); 51 addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator"); 52 } 53 54 /* test data for TestStringFunctions ---------------------------------------- */ 55 56 UChar*** dataTable = NULL; 57 58 static const char* raw[3][4] = { 59 60 /* First String */ 61 { "English_", "French_", "Croatian_", "English_"}, 62 /* Second String */ 63 { "United States", "France", "Croatia", "Unites States"}, 64 65 /* Concatenated string */ 66 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"} 67 }; 68 69 static void setUpDataTable() 70 { 71 int32_t i,j; 72 if(dataTable == NULL) { 73 dataTable = (UChar***)calloc(sizeof(UChar**),3); 74 75 for (i = 0; i < 3; i++) { 76 dataTable[i] = (UChar**)calloc(sizeof(UChar*),4); 77 for (j = 0; j < 4; j++){ 78 dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1)); 79 u_uastrcpy(dataTable[i][j],raw[i][j]); 80 } 81 } 82 } 83 } 84 85 static void cleanUpDataTable() 86 { 87 int32_t i,j; 88 if(dataTable != NULL) { 89 for (i=0; i<3; i++) { 90 for(j = 0; j<4; j++) { 91 free(dataTable[i][j]); 92 } 93 free(dataTable[i]); 94 } 95 free(dataTable); 96 } 97 dataTable = NULL; 98 } 99 100 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */ 101 static void TestStringFunctions() 102 { 103 int32_t i,j,k; 104 UChar temp[512]; 105 UChar nullTemp[512]; 106 char test[512]; 107 char tempOut[512]; 108 109 setUpDataTable(); 110 111 log_verbose("Testing u_strlen()\n"); 112 if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2])) 113 log_err("There is an error in u_strlen()"); 114 115 log_verbose("Testing u_memcpy() and u_memcmp()\n"); 116 117 for(i=0;i<3;++i) 118 { 119 for(j=0;j<4;++j) 120 { 121 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j])); 122 temp[0] = 0; 123 temp[7] = 0xA4; /* Mark the end */ 124 u_memcpy(temp,dataTable[i][j], 7); 125 126 if(temp[7] != 0xA4) 127 log_err("an error occured in u_memcpy()\n"); 128 if(u_memcmp(temp, dataTable[i][j], 7)!=0) 129 log_err("an error occured in u_memcpy() or u_memcmp()\n"); 130 } 131 } 132 if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0) 133 log_err("an error occured in u_memcmp()\n"); 134 135 log_verbose("Testing u_memset()\n"); 136 nullTemp[0] = 0; 137 nullTemp[7] = 0; 138 u_memset(nullTemp, 0xa4, 7); 139 for (i = 0; i < 7; i++) { 140 if(nullTemp[i] != 0xa4) { 141 log_err("an error occured in u_memset()\n"); 142 } 143 } 144 if(nullTemp[7] != 0) { 145 log_err("u_memset() went too far\n"); 146 } 147 148 u_memset(nullTemp, 0, 7); 149 nullTemp[7] = 0xa4; 150 temp[7] = 0; 151 u_memcpy(temp,nullTemp, 7); 152 if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0) 153 log_err("an error occured in u_memcpy() or u_memcmp()\n"); 154 155 156 log_verbose("Testing u_memmove()\n"); 157 for (i = 0; i < 7; i++) { 158 temp[i] = (UChar)i; 159 } 160 u_memmove(temp + 1, temp, 7); 161 if(temp[0] != 0) { 162 log_err("an error occured in u_memmove()\n"); 163 } 164 for (i = 1; i <= 7; i++) { 165 if(temp[i] != (i - 1)) { 166 log_err("an error occured in u_memmove()\n"); 167 } 168 } 169 170 log_verbose("Testing u_strcpy() and u_strcmp()\n"); 171 172 for(i=0;i<3;++i) 173 { 174 for(j=0;j<4;++j) 175 { 176 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j])); 177 temp[0] = 0; 178 u_strcpy(temp,dataTable[i][j]); 179 180 if(u_strcmp(temp,dataTable[i][j])!=0) 181 log_err("something threw an error in u_strcpy() or u_strcmp()\n"); 182 } 183 } 184 if(u_strcmp(dataTable[0][0], dataTable[1][1])==0) 185 log_err("an error occured in u_memcmp()\n"); 186 187 log_verbose("testing u_strcat()\n"); 188 i=0; 189 for(j=0; j<2;++j) 190 { 191 u_uastrcpy(temp, ""); 192 u_strcpy(temp,dataTable[i][j]); 193 u_strcat(temp,dataTable[i+1][j]); 194 if(u_strcmp(temp,dataTable[i+2][j])!=0) 195 log_err("something threw an error in u_strcat()\n"); 196 197 } 198 log_verbose("Testing u_strncmp()\n"); 199 for(i=0,j=0;j<4; ++j) 200 { 201 k=u_strlen(dataTable[i][j]); 202 if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0) 203 log_err("Something threw an error in u_strncmp\n"); 204 } 205 if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0) 206 log_err("an error occured in u_memcmp()\n"); 207 208 209 log_verbose("Testing u_strncat\n"); 210 for(i=0,j=0;j<4; ++j) 211 { 212 k=u_strlen(dataTable[i][j]); 213 214 u_uastrcpy(temp,""); 215 216 if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0) 217 log_err("something threw an error in u_strncat or u_uastrcpy()\n"); 218 219 } 220 221 log_verbose("Testing u_strncpy() and u_uastrcpy()\n"); 222 for(i=2,j=0;j<4; ++j) 223 { 224 k=u_strlen(dataTable[i][j]); 225 u_strncpy(temp, dataTable[i][j],k); 226 temp[k] = 0xa4; 227 228 if(u_strncmp(temp, dataTable[i][j],k)!=0) 229 log_err("something threw an error in u_strncpy()\n"); 230 231 if(temp[k] != 0xa4) 232 log_err("something threw an error in u_strncpy()\n"); 233 234 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); 235 u_uastrncpy(temp, raw[i][j], k-1); 236 if(u_strncmp(temp, dataTable[i][j],k-1)!=0) 237 log_err("something threw an error in u_uastrncpy(k-1)\n"); 238 239 if(temp[k-1] != 0x3F) 240 log_err("something threw an error in u_uastrncpy(k-1)\n"); 241 242 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); 243 u_uastrncpy(temp, raw[i][j], k+1); 244 if(u_strcmp(temp, dataTable[i][j])!=0) 245 log_err("something threw an error in u_uastrncpy(k+1)\n"); 246 247 if(temp[k] != 0) 248 log_err("something threw an error in u_uastrncpy(k+1)\n"); 249 250 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); 251 u_uastrncpy(temp, raw[i][j], k); 252 if(u_strncmp(temp, dataTable[i][j], k)!=0) 253 log_err("something threw an error in u_uastrncpy(k)\n"); 254 255 if(temp[k] != 0x3F) 256 log_err("something threw an error in u_uastrncpy(k)\n"); 257 } 258 259 log_verbose("Testing u_strchr() and u_memchr()\n"); 260 261 for(i=2,j=0;j<4;j++) 262 { 263 UChar saveVal = dataTable[i][j][0]; 264 UChar *findPtr = u_strchr(dataTable[i][j], 0x005F); 265 int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1); 266 267 log_verbose("%s ", u_austrcpy(tempOut, findPtr)); 268 269 if (findPtr == NULL || *findPtr != 0x005F) { 270 log_err("u_strchr can't find '_' in the string\n"); 271 } 272 273 findPtr = u_strchr32(dataTable[i][j], 0x005F); 274 if (findPtr == NULL || *findPtr != 0x005F) { 275 log_err("u_strchr32 can't find '_' in the string\n"); 276 } 277 278 findPtr = u_strchr(dataTable[i][j], 0); 279 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 280 log_err("u_strchr can't find NULL in the string\n"); 281 } 282 283 findPtr = u_strchr32(dataTable[i][j], 0); 284 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 285 log_err("u_strchr32 can't find NULL in the string\n"); 286 } 287 288 findPtr = u_memchr(dataTable[i][j], 0, dataSize); 289 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 290 log_err("u_memchr can't find NULL in the string\n"); 291 } 292 293 findPtr = u_memchr32(dataTable[i][j], 0, dataSize); 294 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 295 log_err("u_memchr32 can't find NULL in the string\n"); 296 } 297 298 dataTable[i][j][0] = 0; 299 /* Make sure we skip over the NULL termination */ 300 findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize); 301 if (findPtr == NULL || *findPtr != 0x005F) { 302 log_err("u_memchr can't find '_' in the string\n"); 303 } 304 305 findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize); 306 if (findPtr == NULL || *findPtr != 0x005F) { 307 log_err("u_memchr32 can't find '_' in the string\n"); 308 } 309 findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize); 310 if (findPtr != NULL) { 311 log_err("Should have found NULL when the character is not there.\n"); 312 } 313 dataTable[i][j][0] = saveVal; /* Put it back for the other tests */ 314 } 315 316 /* 317 * test that u_strchr32() 318 * does not find surrogate code points when they are part of matched pairs 319 * (= part of supplementary code points) 320 * Jitterbug 1542 321 */ 322 { 323 static const UChar s[]={ 324 /* 0 1 2 3 4 5 6 7 8 9 */ 325 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0 326 }; 327 328 if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) { 329 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n"); 330 } 331 if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) { 332 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n"); 333 } 334 } 335 336 log_verbose("Testing u_austrcpy()"); 337 u_austrcpy(test,dataTable[0][0]); 338 if(strcmp(test,raw[0][0])!=0) 339 log_err("There is an error in u_austrcpy()"); 340 341 342 log_verbose("Testing u_strtok_r()"); 343 { 344 const char tokString[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n"; 345 const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"}; 346 UChar delimBuf[sizeof(test)]; 347 UChar currTokenBuf[sizeof(tokString)]; 348 UChar *state; 349 uint32_t currToken = 0; 350 UChar *ptr; 351 352 u_uastrcpy(temp, tokString); 353 u_uastrcpy(delimBuf, " "); 354 355 ptr = u_strtok_r(temp, delimBuf, &state); 356 u_uastrcpy(delimBuf, " ,"); 357 while (ptr != NULL) { 358 u_uastrcpy(currTokenBuf, tokens[currToken]); 359 if (u_strcmp(ptr, currTokenBuf) != 0) { 360 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]); 361 } 362 ptr = u_strtok_r(NULL, delimBuf, &state); 363 currToken++; 364 } 365 366 if (currToken != sizeof(tokens)/sizeof(tokens[0])) { 367 log_err("Didn't get correct number of tokens\n"); 368 } 369 state = delimBuf; /* Give it an "invalid" saveState */ 370 u_uastrcpy(currTokenBuf, ""); 371 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) { 372 log_err("Didn't get NULL for empty string\n"); 373 } 374 if (state != NULL) { 375 log_err("State should be NULL for empty string\n"); 376 } 377 state = delimBuf; /* Give it an "invalid" saveState */ 378 u_uastrcpy(currTokenBuf, ", ,"); 379 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) { 380 log_err("Didn't get NULL for a string of delimiters\n"); 381 } 382 if (state != NULL) { 383 log_err("State should be NULL for a string of delimiters\n"); 384 } 385 386 state = delimBuf; /* Give it an "invalid" saveState */ 387 u_uastrcpy(currTokenBuf, "q, ,"); 388 if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) { 389 log_err("Got NULL for a string that does not begin with delimiters\n"); 390 } 391 if (u_strtok_r(NULL, delimBuf, &state) != NULL) { 392 log_err("Didn't get NULL for a string that ends in delimiters\n"); 393 } 394 if (state != NULL) { 395 log_err("State should be NULL for empty string\n"); 396 } 397 398 state = delimBuf; /* Give it an "invalid" saveState */ 399 u_uastrcpy(currTokenBuf, tokString); 400 u_uastrcpy(temp, tokString); 401 u_uastrcpy(delimBuf, "q"); /* Give it a delimiter that it can't find. */ 402 ptr = u_strtok_r(currTokenBuf, delimBuf, &state); 403 if (ptr == NULL || u_strcmp(ptr, temp) != 0) { 404 log_err("Should have recieved the same string when there are no delimiters\n"); 405 } 406 if (u_strtok_r(NULL, delimBuf, &state) != NULL) { 407 log_err("Should not have found another token in a one token string\n"); 408 } 409 } 410 411 /* test u_strcmpCodePointOrder() */ 412 { 413 /* these strings are in ascending order */ 414 static const UChar strings[][4]={ 415 { 0x61, 0 }, /* U+0061 */ 416 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */ 417 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */ 418 { 0xd800, 0 }, /* U+d800 */ 419 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */ 420 { 0xdfff, 0 }, /* U+dfff */ 421 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */ 422 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */ 423 { 0xd800, 0xdc02, 0 }, /* U+10002 */ 424 { 0xd84d, 0xdc56, 0 } /* U+23456 */ 425 }; 426 427 UCharIterator iter1, iter2; 428 int32_t len1, len2, r1, r2; 429 430 for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) { 431 if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) { 432 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i); 433 } 434 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) { 435 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i); 436 } 437 438 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */ 439 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) { 440 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i); 441 } 442 443 /* test u_strCompare(TRUE) */ 444 len1=u_strlen(strings[i]); 445 len2=u_strlen(strings[i+1]); 446 if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 || 447 u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 || 448 u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 || 449 u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0 450 ) { 451 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i); 452 } 453 454 /* test u_strCompare(FALSE) */ 455 r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE); 456 r2=u_strcmp(strings[i], strings[i+1]); 457 if(_SIGN(r1)!=_SIGN(r2)) { 458 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i); 459 } 460 461 /* test u_strCompareIter() */ 462 uiter_setString(&iter1, strings[i], len1); 463 uiter_setString(&iter2, strings[i+1], len2); 464 if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) { 465 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i); 466 } 467 r1=u_strCompareIter(&iter1, &iter2, FALSE); 468 if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) { 469 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i); 470 } 471 } 472 } 473 474 cleanUpDataTable(); 475 } 476 477 static void TestStringSearching() 478 { 479 const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0}; 480 const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0}; 481 const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0}; 482 const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0}; 483 const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0}; 484 const UChar surrMatchSet4[] = {0x0000}; 485 const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0}; 486 const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0}; 487 const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */ 488 const UChar 489 empty[] = { 0 }, 490 a[] = { 0x61, 0 }, 491 ab[] = { 0x61, 0x62, 0 }, 492 ba[] = { 0x62, 0x61, 0 }, 493 abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 }, 494 cd[] = { 0x63, 0x64, 0 }, 495 dc[] = { 0x64, 0x63, 0 }, 496 cdh[] = { 0x63, 0x64, 0x68, 0 }, 497 f[] = { 0x66, 0 }, 498 fg[] = { 0x66, 0x67, 0 }, 499 gf[] = { 0x67, 0x66, 0 }; 500 501 log_verbose("Testing u_strpbrk()"); 502 503 if (u_strpbrk(testString, a) != &testString[0]) { 504 log_err("u_strpbrk couldn't find first letter a.\n"); 505 } 506 if (u_strpbrk(testString, dc) != &testString[2]) { 507 log_err("u_strpbrk couldn't find d or c.\n"); 508 } 509 if (u_strpbrk(testString, cd) != &testString[2]) { 510 log_err("u_strpbrk couldn't find c or d.\n"); 511 } 512 if (u_strpbrk(testString, cdh) != &testString[2]) { 513 log_err("u_strpbrk couldn't find c, d or h.\n"); 514 } 515 if (u_strpbrk(testString, f) != NULL) { 516 log_err("u_strpbrk didn't return NULL for \"f\".\n"); 517 } 518 if (u_strpbrk(testString, fg) != NULL) { 519 log_err("u_strpbrk didn't return NULL for \"fg\".\n"); 520 } 521 if (u_strpbrk(testString, gf) != NULL) { 522 log_err("u_strpbrk didn't return NULL for \"gf\".\n"); 523 } 524 if (u_strpbrk(testString, empty) != NULL) { 525 log_err("u_strpbrk didn't return NULL for \"\".\n"); 526 } 527 528 log_verbose("Testing u_strpbrk() with surrogates"); 529 530 if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) { 531 log_err("u_strpbrk couldn't find first letter a.\n"); 532 } 533 if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) { 534 log_err("u_strpbrk couldn't find d or c.\n"); 535 } 536 if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) { 537 log_err("u_strpbrk couldn't find c or d.\n"); 538 } 539 if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) { 540 log_err("u_strpbrk couldn't find c, d or h.\n"); 541 } 542 if (u_strpbrk(testSurrogateString, f) != NULL) { 543 log_err("u_strpbrk didn't return NULL for \"f\".\n"); 544 } 545 if (u_strpbrk(testSurrogateString, fg) != NULL) { 546 log_err("u_strpbrk didn't return NULL for \"fg\".\n"); 547 } 548 if (u_strpbrk(testSurrogateString, gf) != NULL) { 549 log_err("u_strpbrk didn't return NULL for \"gf\".\n"); 550 } 551 if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) { 552 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n"); 553 } 554 if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) { 555 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n"); 556 } 557 if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) { 558 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n"); 559 } 560 if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) { 561 log_err("u_strpbrk should have returned NULL for empty string.\n"); 562 } 563 if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) { 564 log_err("u_strpbrk should have found bad surrogate.\n"); 565 } 566 567 log_verbose("Testing u_strcspn()"); 568 569 if (u_strcspn(testString, a) != 0) { 570 log_err("u_strcspn couldn't find first letter a.\n"); 571 } 572 if (u_strcspn(testString, dc) != 2) { 573 log_err("u_strcspn couldn't find d or c.\n"); 574 } 575 if (u_strcspn(testString, cd) != 2) { 576 log_err("u_strcspn couldn't find c or d.\n"); 577 } 578 if (u_strcspn(testString, cdh) != 2) { 579 log_err("u_strcspn couldn't find c, d or h.\n"); 580 } 581 if (u_strcspn(testString, f) != u_strlen(testString)) { 582 log_err("u_strcspn didn't return NULL for \"f\".\n"); 583 } 584 if (u_strcspn(testString, fg) != u_strlen(testString)) { 585 log_err("u_strcspn didn't return NULL for \"fg\".\n"); 586 } 587 if (u_strcspn(testString, gf) != u_strlen(testString)) { 588 log_err("u_strcspn didn't return NULL for \"gf\".\n"); 589 } 590 591 log_verbose("Testing u_strcspn() with surrogates"); 592 593 if (u_strcspn(testSurrogateString, a) != 1) { 594 log_err("u_strcspn couldn't find first letter a.\n"); 595 } 596 if (u_strcspn(testSurrogateString, dc) != 5) { 597 log_err("u_strcspn couldn't find d or c.\n"); 598 } 599 if (u_strcspn(testSurrogateString, cd) != 5) { 600 log_err("u_strcspn couldn't find c or d.\n"); 601 } 602 if (u_strcspn(testSurrogateString, cdh) != 5) { 603 log_err("u_strcspn couldn't find c, d or h.\n"); 604 } 605 if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) { 606 log_err("u_strcspn didn't return NULL for \"f\".\n"); 607 } 608 if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) { 609 log_err("u_strcspn didn't return NULL for \"fg\".\n"); 610 } 611 if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) { 612 log_err("u_strcspn didn't return NULL for \"gf\".\n"); 613 } 614 if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) { 615 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n"); 616 } 617 if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) { 618 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n"); 619 } 620 if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) { 621 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n"); 622 } 623 if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) { 624 log_err("u_strcspn should have returned strlen for empty string.\n"); 625 } 626 627 628 log_verbose("Testing u_strspn()"); 629 630 if (u_strspn(testString, a) != 1) { 631 log_err("u_strspn couldn't skip first letter a.\n"); 632 } 633 if (u_strspn(testString, ab) != 2) { 634 log_err("u_strspn couldn't skip a or b.\n"); 635 } 636 if (u_strspn(testString, ba) != 2) { 637 log_err("u_strspn couldn't skip a or b.\n"); 638 } 639 if (u_strspn(testString, f) != 0) { 640 log_err("u_strspn didn't return 0 for \"f\".\n"); 641 } 642 if (u_strspn(testString, dc) != 0) { 643 log_err("u_strspn couldn't find first letter a (skip d or c).\n"); 644 } 645 if (u_strspn(testString, abcd) != u_strlen(testString)) { 646 log_err("u_strspn couldn't skip over the whole string.\n"); 647 } 648 if (u_strspn(testString, empty) != 0) { 649 log_err("u_strspn should have returned 0 for empty string.\n"); 650 } 651 652 log_verbose("Testing u_strspn() with surrogates"); 653 if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) { 654 log_err("u_strspn couldn't skip 0xdbff or a.\n"); 655 } 656 if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) { 657 log_err("u_strspn couldn't skip 0xdbff or a.\n"); 658 } 659 if (u_strspn(testSurrogateString, f) != 0) { 660 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); 661 } 662 if (u_strspn(testSurrogateString, dc) != 0) { 663 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); 664 } 665 if (u_strspn(testSurrogateString, cd) != 0) { 666 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); 667 } 668 if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) { 669 log_err("u_strspn couldn't skip whole string.\n"); 670 } 671 if (u_strspn(testSurrogateString, surrMatchSet1) != 0) { 672 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n"); 673 } 674 if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) { 675 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n"); 676 } 677 if (u_strspn(testSurrogateString, surrMatchSet4) != 0) { 678 log_err("u_strspn should have returned 0 for empty string.\n"); 679 } 680 } 681 682 /* 683 * All binary Unicode string searches should behave the same for equivalent input. 684 * See Jitterbug 2145. 685 * There are some new functions, too - just test them all. 686 */ 687 static void 688 TestSurrogateSearching() { 689 static const UChar s[]={ 690 /* 0 1 2 3 4 5 6 7 8 9 10 11 */ 691 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0 692 }, sub_a[]={ 693 0x61, 0 694 }, sub_b[]={ 695 0x62, 0 696 }, sub_lead[]={ 697 0xd801, 0 698 }, sub_trail[]={ 699 0xdc02, 0 700 }, sub_supp[]={ 701 0xd801, 0xdc02, 0 702 }, sub_supp2[]={ 703 0xd801, 0xdc03, 0 704 }, sub_a_lead[]={ 705 0x61, 0xd801, 0 706 }, sub_trail_a[]={ 707 0xdc02, 0x61, 0 708 }, sub_aba[]={ 709 0x61, 0x62, 0x61, 0 710 }; 711 static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0; 712 static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456; 713 714 const UChar *first, *last; 715 716 /* search for NUL code point: find end of string */ 717 first=s+u_strlen(s); 718 719 if( 720 first!=u_strchr(s, nul) || 721 first!=u_strchr32(s, nul) || 722 first!=u_memchr(s, nul, LENGTHOF(s)) || 723 first!=u_memchr32(s, nul, LENGTHOF(s)) || 724 first!=u_strrchr(s, nul) || 725 first!=u_strrchr32(s, nul) || 726 first!=u_memrchr(s, nul, LENGTHOF(s)) || 727 first!=u_memrchr32(s, nul, LENGTHOF(s)) 728 ) { 729 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n"); 730 } 731 732 /* search for empty substring: find beginning of string */ 733 if( 734 s!=u_strstr(s, &nul) || 735 s!=u_strFindFirst(s, -1, &nul, -1) || 736 s!=u_strFindFirst(s, -1, &nul, 0) || 737 s!=u_strFindFirst(s, LENGTHOF(s), &nul, -1) || 738 s!=u_strFindFirst(s, LENGTHOF(s), &nul, 0) || 739 s!=u_strrstr(s, &nul) || 740 s!=u_strFindLast(s, -1, &nul, -1) || 741 s!=u_strFindLast(s, -1, &nul, 0) || 742 s!=u_strFindLast(s, LENGTHOF(s), &nul, -1) || 743 s!=u_strFindLast(s, LENGTHOF(s), &nul, 0) 744 ) { 745 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n"); 746 } 747 748 /* find 'a' in s[1..10[ */ 749 first=s+3; 750 last=s+7; 751 if( 752 first!=u_strchr(s+1, a) || 753 first!=u_strchr32(s+1, a) || 754 first!=u_memchr(s+1, a, 9) || 755 first!=u_memchr32(s+1, a, 9) || 756 first!=u_strstr(s+1, sub_a) || 757 first!=u_strFindFirst(s+1, -1, sub_a, -1) || 758 first!=u_strFindFirst(s+1, -1, &a, 1) || 759 first!=u_strFindFirst(s+1, 9, sub_a, -1) || 760 first!=u_strFindFirst(s+1, 9, &a, 1) || 761 (s+10)!=u_strrchr(s+1, a) || 762 (s+10)!=u_strrchr32(s+1, a) || 763 last!=u_memrchr(s+1, a, 9) || 764 last!=u_memrchr32(s+1, a, 9) || 765 (s+10)!=u_strrstr(s+1, sub_a) || 766 (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) || 767 (s+10)!=u_strFindLast(s+1, -1, &a, 1) || 768 last!=u_strFindLast(s+1, 9, sub_a, -1) || 769 last!=u_strFindLast(s+1, 9, &a, 1) 770 ) { 771 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n"); 772 } 773 774 /* do not find 'b' in s[1..10[ */ 775 if( 776 NULL!=u_strchr(s+1, b) || 777 NULL!=u_strchr32(s+1, b) || 778 NULL!=u_memchr(s+1, b, 9) || 779 NULL!=u_memchr32(s+1, b, 9) || 780 NULL!=u_strstr(s+1, sub_b) || 781 NULL!=u_strFindFirst(s+1, -1, sub_b, -1) || 782 NULL!=u_strFindFirst(s+1, -1, &b, 1) || 783 NULL!=u_strFindFirst(s+1, 9, sub_b, -1) || 784 NULL!=u_strFindFirst(s+1, 9, &b, 1) || 785 NULL!=u_strrchr(s+1, b) || 786 NULL!=u_strrchr32(s+1, b) || 787 NULL!=u_memrchr(s+1, b, 9) || 788 NULL!=u_memrchr32(s+1, b, 9) || 789 NULL!=u_strrstr(s+1, sub_b) || 790 NULL!=u_strFindLast(s+1, -1, sub_b, -1) || 791 NULL!=u_strFindLast(s+1, -1, &b, 1) || 792 NULL!=u_strFindLast(s+1, 9, sub_b, -1) || 793 NULL!=u_strFindLast(s+1, 9, &b, 1) 794 ) { 795 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n"); 796 } 797 798 /* do not find a non-code point in s[1..10[ */ 799 if( 800 NULL!=u_strchr32(s+1, ill) || 801 NULL!=u_memchr32(s+1, ill, 9) || 802 NULL!=u_strrchr32(s+1, ill) || 803 NULL!=u_memrchr32(s+1, ill, 9) 804 ) { 805 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n"); 806 } 807 808 /* find U+d801 in s[1..10[ */ 809 first=s+6; 810 if( 811 first!=u_strchr(s+1, lead) || 812 first!=u_strchr32(s+1, lead) || 813 first!=u_memchr(s+1, lead, 9) || 814 first!=u_memchr32(s+1, lead, 9) || 815 first!=u_strstr(s+1, sub_lead) || 816 first!=u_strFindFirst(s+1, -1, sub_lead, -1) || 817 first!=u_strFindFirst(s+1, -1, &lead, 1) || 818 first!=u_strFindFirst(s+1, 9, sub_lead, -1) || 819 first!=u_strFindFirst(s+1, 9, &lead, 1) || 820 first!=u_strrchr(s+1, lead) || 821 first!=u_strrchr32(s+1, lead) || 822 first!=u_memrchr(s+1, lead, 9) || 823 first!=u_memrchr32(s+1, lead, 9) || 824 first!=u_strrstr(s+1, sub_lead) || 825 first!=u_strFindLast(s+1, -1, sub_lead, -1) || 826 first!=u_strFindLast(s+1, -1, &lead, 1) || 827 first!=u_strFindLast(s+1, 9, sub_lead, -1) || 828 first!=u_strFindLast(s+1, 9, &lead, 1) 829 ) { 830 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n"); 831 } 832 833 /* find U+dc02 in s[1..10[ */ 834 first=s+4; 835 if( 836 first!=u_strchr(s+1, trail) || 837 first!=u_strchr32(s+1, trail) || 838 first!=u_memchr(s+1, trail, 9) || 839 first!=u_memchr32(s+1, trail, 9) || 840 first!=u_strstr(s+1, sub_trail) || 841 first!=u_strFindFirst(s+1, -1, sub_trail, -1) || 842 first!=u_strFindFirst(s+1, -1, &trail, 1) || 843 first!=u_strFindFirst(s+1, 9, sub_trail, -1) || 844 first!=u_strFindFirst(s+1, 9, &trail, 1) || 845 first!=u_strrchr(s+1, trail) || 846 first!=u_strrchr32(s+1, trail) || 847 first!=u_memrchr(s+1, trail, 9) || 848 first!=u_memrchr32(s+1, trail, 9) || 849 first!=u_strrstr(s+1, sub_trail) || 850 first!=u_strFindLast(s+1, -1, sub_trail, -1) || 851 first!=u_strFindLast(s+1, -1, &trail, 1) || 852 first!=u_strFindLast(s+1, 9, sub_trail, -1) || 853 first!=u_strFindLast(s+1, 9, &trail, 1) 854 ) { 855 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n"); 856 } 857 858 /* find U+10402 in s[1..10[ */ 859 first=s+1; 860 last=s+8; 861 if( 862 first!=u_strchr32(s+1, supp) || 863 first!=u_memchr32(s+1, supp, 9) || 864 first!=u_strstr(s+1, sub_supp) || 865 first!=u_strFindFirst(s+1, -1, sub_supp, -1) || 866 first!=u_strFindFirst(s+1, -1, sub_supp, 2) || 867 first!=u_strFindFirst(s+1, 9, sub_supp, -1) || 868 first!=u_strFindFirst(s+1, 9, sub_supp, 2) || 869 last!=u_strrchr32(s+1, supp) || 870 last!=u_memrchr32(s+1, supp, 9) || 871 last!=u_strrstr(s+1, sub_supp) || 872 last!=u_strFindLast(s+1, -1, sub_supp, -1) || 873 last!=u_strFindLast(s+1, -1, sub_supp, 2) || 874 last!=u_strFindLast(s+1, 9, sub_supp, -1) || 875 last!=u_strFindLast(s+1, 9, sub_supp, 2) 876 ) { 877 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n"); 878 } 879 880 /* do not find U+10402 in a single UChar */ 881 if( 882 NULL!=u_memchr32(s+1, supp, 1) || 883 NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) || 884 NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) || 885 NULL!=u_memrchr32(s+1, supp, 1) || 886 NULL!=u_strFindLast(s+1, 1, sub_supp, -1) || 887 NULL!=u_strFindLast(s+1, 1, sub_supp, 2) || 888 NULL!=u_memrchr32(s+2, supp, 1) || 889 NULL!=u_strFindLast(s+2, 1, sub_supp, -1) || 890 NULL!=u_strFindLast(s+2, 1, sub_supp, 2) 891 ) { 892 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n"); 893 } 894 895 /* do not find U+10403 in s[1..10[ */ 896 if( 897 NULL!=u_strchr32(s+1, supp2) || 898 NULL!=u_memchr32(s+1, supp2, 9) || 899 NULL!=u_strstr(s+1, sub_supp2) || 900 NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) || 901 NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) || 902 NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) || 903 NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) || 904 NULL!=u_strrchr32(s+1, supp2) || 905 NULL!=u_memrchr32(s+1, supp2, 9) || 906 NULL!=u_strrstr(s+1, sub_supp2) || 907 NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) || 908 NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) || 909 NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) || 910 NULL!=u_strFindLast(s+1, 9, sub_supp2, 2) 911 ) { 912 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n"); 913 } 914 915 /* find <0061 d801> in s[1..10[ */ 916 first=s+5; 917 if( 918 first!=u_strstr(s+1, sub_a_lead) || 919 first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) || 920 first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) || 921 first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) || 922 first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) || 923 first!=u_strrstr(s+1, sub_a_lead) || 924 first!=u_strFindLast(s+1, -1, sub_a_lead, -1) || 925 first!=u_strFindLast(s+1, -1, sub_a_lead, 2) || 926 first!=u_strFindLast(s+1, 9, sub_a_lead, -1) || 927 first!=u_strFindLast(s+1, 9, sub_a_lead, 2) 928 ) { 929 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n"); 930 } 931 932 /* find <dc02 0061> in s[1..10[ */ 933 first=s+4; 934 if( 935 first!=u_strstr(s+1, sub_trail_a) || 936 first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) || 937 first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) || 938 first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) || 939 first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) || 940 first!=u_strrstr(s+1, sub_trail_a) || 941 first!=u_strFindLast(s+1, -1, sub_trail_a, -1) || 942 first!=u_strFindLast(s+1, -1, sub_trail_a, 2) || 943 first!=u_strFindLast(s+1, 9, sub_trail_a, -1) || 944 first!=u_strFindLast(s+1, 9, sub_trail_a, 2) 945 ) { 946 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n"); 947 } 948 949 /* do not find "aba" in s[1..10[ */ 950 if( 951 NULL!=u_strstr(s+1, sub_aba) || 952 NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) || 953 NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) || 954 NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) || 955 NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) || 956 NULL!=u_strrstr(s+1, sub_aba) || 957 NULL!=u_strFindLast(s+1, -1, sub_aba, -1) || 958 NULL!=u_strFindLast(s+1, -1, sub_aba, 3) || 959 NULL!=u_strFindLast(s+1, 9, sub_aba, -1) || 960 NULL!=u_strFindLast(s+1, 9, sub_aba, 3) 961 ) { 962 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n"); 963 } 964 } 965 966 static void TestStringCopy() 967 { 968 UChar temp[40]; 969 UChar *result=0; 970 UChar subString[5]; 971 UChar uchars[]={0x61, 0x62, 0x63, 0x00}; 972 char charOut[40]; 973 char chars[]="abc"; /* needs default codepage */ 974 975 log_verbose("Testing u_uastrncpy() and u_uastrcpy()"); 976 977 u_uastrcpy(temp, "abc"); 978 if(u_strcmp(temp, uchars) != 0) { 979 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp)); 980 } 981 982 temp[0] = 0xFB; /* load garbage into it */ 983 temp[1] = 0xFB; 984 temp[2] = 0xFB; 985 temp[3] = 0xFB; 986 987 u_uastrncpy(temp, "abcabcabc", 3); 988 if(u_strncmp(uchars, temp, 3) != 0){ 989 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp)); 990 } 991 if(temp[3] != 0xFB) { 992 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n"); 993 } 994 995 charOut[0] = (char)0x7B; /* load garbage into it */ 996 charOut[1] = (char)0x7B; 997 charOut[2] = (char)0x7B; 998 charOut[3] = (char)0x7B; 999 1000 temp[0] = 0x0061; 1001 temp[1] = 0x0062; 1002 temp[2] = 0x0063; 1003 temp[3] = 0x0061; 1004 temp[4] = 0x0062; 1005 temp[5] = 0x0063; 1006 temp[6] = 0x0000; 1007 1008 u_austrncpy(charOut, temp, 3); 1009 if(strncmp(chars, charOut, 3) != 0){ 1010 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp)); 1011 } 1012 if(charOut[3] != (char)0x7B) { 1013 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n"); 1014 } 1015 1016 /*Testing u_strchr()*/ 1017 log_verbose("Testing u_strchr\n"); 1018 temp[0]=0x42; 1019 temp[1]=0x62; 1020 temp[2]=0x62; 1021 temp[3]=0x63; 1022 temp[4]=0xd841; 1023 temp[5]=0xd841; 1024 temp[6]=0xdc02; 1025 temp[7]=0; 1026 result=u_strchr(temp, (UChar)0x62); 1027 if(result != temp+1){ 1028 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result); 1029 } 1030 /*Testing u_strstr()*/ 1031 log_verbose("Testing u_strstr\n"); 1032 subString[0]=0x62; 1033 subString[1]=0x63; 1034 subString[2]=0; 1035 result=u_strstr(temp, subString); 1036 if(result != temp+2){ 1037 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result); 1038 } 1039 result=u_strstr(temp, subString+2); /* subString+2 is an empty string */ 1040 if(result != temp){ 1041 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result); 1042 } 1043 result=u_strstr(subString, temp); 1044 if(result != NULL){ 1045 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n"); 1046 } 1047 1048 /*Testing u_strchr32*/ 1049 log_verbose("Testing u_strchr32\n"); 1050 result=u_strchr32(temp, (UChar32)0x62); 1051 if(result != temp+1){ 1052 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result); 1053 } 1054 result=u_strchr32(temp, (UChar32)0xfb); 1055 if(result != NULL){ 1056 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n"); 1057 } 1058 result=u_strchr32(temp, (UChar32)0x20402); 1059 if(result != temp+5){ 1060 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result); 1061 } 1062 1063 temp[7]=0xfc00; 1064 result=u_memchr32(temp, (UChar32)0x20402, 7); 1065 if(result != temp+5){ 1066 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result); 1067 } 1068 result=u_memchr32(temp, (UChar32)0x20402, 6); 1069 if(result != NULL){ 1070 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result); 1071 } 1072 result=u_memchr32(temp, (UChar32)0x20402, 1); 1073 if(result != NULL){ 1074 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result); 1075 } 1076 result=u_memchr32(temp, (UChar32)0xfc00, 8); 1077 if(result != temp+7){ 1078 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result); 1079 } 1080 } 1081 1082 /* test u_unescape() and u_unescapeAt() ------------------------------------- */ 1083 1084 static void 1085 TestUnescape() { 1086 static UChar buffer[200]; 1087 1088 static const char* input = 1089 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}"; 1090 1091 static const UChar expect[]={ 1092 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20, 1093 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c, 1094 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20, 1095 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0 1096 }; 1097 static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1; 1098 int32_t length; 1099 1100 /* test u_unescape() */ 1101 length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0])); 1102 if(length!=explength || u_strcmp(buffer, expect)!=0) { 1103 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length, 1104 explength); 1105 } 1106 1107 /* try preflighting */ 1108 length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0])); 1109 if(length!=explength || u_strcmp(buffer, expect)!=0) { 1110 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength); 1111 } 1112 1113 /* ### TODO: test u_unescapeAt() */ 1114 } 1115 1116 /* test code point counting functions --------------------------------------- */ 1117 1118 /* reference implementation of u_strHasMoreChar32Than() */ 1119 static int32_t 1120 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) { 1121 int32_t count=u_countChar32(s, length); 1122 return count>number; 1123 } 1124 1125 /* compare the real function against the reference */ 1126 static void 1127 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) { 1128 if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) { 1129 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n", 1130 i, length, number, u_strHasMoreChar32Than(s, length, number)); 1131 } 1132 } 1133 1134 static void 1135 TestCountChar32() { 1136 static const UChar string[]={ 1137 0x61, 0x62, 0xd800, 0xdc00, 1138 0xd801, 0xdc01, 0x63, 0xd802, 1139 0x64, 0xdc03, 0x65, 0x66, 1140 0xd804, 0xdc04, 0xd805, 0xdc05, 1141 0x67 1142 }; 1143 UChar buffer[100]; 1144 int32_t i, length, number; 1145 1146 /* test u_strHasMoreChar32Than() with length>=0 */ 1147 length=LENGTHOF(string); 1148 while(length>=0) { 1149 for(i=0; i<=length; ++i) { 1150 for(number=-1; number<=((length-i)+2); ++number) { 1151 _testStrHasMoreChar32Than(string+i, i, length-i, number); 1152 } 1153 } 1154 --length; 1155 } 1156 1157 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */ 1158 length=LENGTHOF(string); 1159 u_memcpy(buffer, string, length); 1160 while(length>=0) { 1161 buffer[length]=0; 1162 for(i=0; i<=length; ++i) { 1163 for(number=-1; number<=((length-i)+2); ++number) { 1164 _testStrHasMoreChar32Than(string+i, i, -1, number); 1165 } 1166 } 1167 --length; 1168 } 1169 1170 /* test u_strHasMoreChar32Than() with NULL string (bad input) */ 1171 for(length=-1; length<=1; ++length) { 1172 for(i=0; i<=length; ++i) { 1173 for(number=-2; number<=2; ++number) { 1174 _testStrHasMoreChar32Than(NULL, 0, length, number); 1175 } 1176 } 1177 } 1178 } 1179 1180 /* UCharIterator ------------------------------------------------------------ */ 1181 1182 /* 1183 * Compare results from two iterators, should be same. 1184 * Assume that the text is not empty and that 1185 * iteration start==0 and iteration limit==length. 1186 */ 1187 static void 1188 compareIterators(UCharIterator *iter1, const char *n1, 1189 UCharIterator *iter2, const char *n2) { 1190 int32_t i, pos1, pos2, middle, length; 1191 UChar32 c1, c2; 1192 1193 /* compare lengths */ 1194 length=iter1->getIndex(iter1, UITER_LENGTH); 1195 pos2=iter2->getIndex(iter2, UITER_LENGTH); 1196 if(length!=pos2) { 1197 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2); 1198 return; 1199 } 1200 1201 /* set into the middle */ 1202 middle=length/2; 1203 1204 pos1=iter1->move(iter1, middle, UITER_ZERO); 1205 if(pos1!=middle) { 1206 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1); 1207 return; 1208 } 1209 1210 pos2=iter2->move(iter2, middle, UITER_ZERO); 1211 if(pos2!=middle) { 1212 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2); 1213 return; 1214 } 1215 1216 /* test current() */ 1217 c1=iter1->current(iter1); 1218 c2=iter2->current(iter2); 1219 if(c1!=c2) { 1220 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle); 1221 return; 1222 } 1223 1224 /* move forward 3 UChars */ 1225 for(i=0; i<3; ++i) { 1226 c1=iter1->next(iter1); 1227 c2=iter2->next(iter2); 1228 if(c1!=c2) { 1229 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1230 return; 1231 } 1232 } 1233 1234 /* move backward 5 UChars */ 1235 for(i=0; i<5; ++i) { 1236 c1=iter1->previous(iter1); 1237 c2=iter2->previous(iter2); 1238 if(c1!=c2) { 1239 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1240 return; 1241 } 1242 } 1243 1244 /* iterate forward from the beginning */ 1245 pos1=iter1->move(iter1, 0, UITER_START); 1246 if(pos1<0) { 1247 log_err("%s->move(start) failed\n", n1); 1248 return; 1249 } 1250 if(!iter1->hasNext(iter1)) { 1251 log_err("%s->hasNext() at the start returns FALSE\n", n1); 1252 return; 1253 } 1254 1255 pos2=iter2->move(iter2, 0, UITER_START); 1256 if(pos2<0) { 1257 log_err("%s->move(start) failed\n", n2); 1258 return; 1259 } 1260 if(!iter2->hasNext(iter2)) { 1261 log_err("%s->hasNext() at the start returns FALSE\n", n2); 1262 return; 1263 } 1264 1265 do { 1266 c1=iter1->next(iter1); 1267 c2=iter2->next(iter2); 1268 if(c1!=c2) { 1269 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1270 return; 1271 } 1272 } while(c1>=0); 1273 1274 if(iter1->hasNext(iter1)) { 1275 log_err("%s->hasNext() at the end returns TRUE\n", n1); 1276 return; 1277 } 1278 if(iter2->hasNext(iter2)) { 1279 log_err("%s->hasNext() at the end returns TRUE\n", n2); 1280 return; 1281 } 1282 1283 /* back to the middle */ 1284 pos1=iter1->move(iter1, middle, UITER_ZERO); 1285 if(pos1!=middle) { 1286 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1); 1287 return; 1288 } 1289 1290 pos2=iter2->move(iter2, middle, UITER_ZERO); 1291 if(pos2!=middle) { 1292 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2); 1293 return; 1294 } 1295 1296 /* move to index 1 */ 1297 pos1=iter1->move(iter1, 1, UITER_ZERO); 1298 if(pos1!=1) { 1299 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1); 1300 return; 1301 } 1302 1303 pos2=iter2->move(iter2, 1, UITER_ZERO); 1304 if(pos2!=1) { 1305 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2); 1306 return; 1307 } 1308 1309 /* iterate backward from the end */ 1310 pos1=iter1->move(iter1, 0, UITER_LIMIT); 1311 if(pos1<0) { 1312 log_err("%s->move(limit) failed\n", n1); 1313 return; 1314 } 1315 if(!iter1->hasPrevious(iter1)) { 1316 log_err("%s->hasPrevious() at the end returns FALSE\n", n1); 1317 return; 1318 } 1319 1320 pos2=iter2->move(iter2, 0, UITER_LIMIT); 1321 if(pos2<0) { 1322 log_err("%s->move(limit) failed\n", n2); 1323 return; 1324 } 1325 if(!iter2->hasPrevious(iter2)) { 1326 log_err("%s->hasPrevious() at the end returns FALSE\n", n2); 1327 return; 1328 } 1329 1330 do { 1331 c1=iter1->previous(iter1); 1332 c2=iter2->previous(iter2); 1333 if(c1!=c2) { 1334 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1335 return; 1336 } 1337 } while(c1>=0); 1338 1339 if(iter1->hasPrevious(iter1)) { 1340 log_err("%s->hasPrevious() at the start returns TRUE\n", n1); 1341 return; 1342 } 1343 if(iter2->hasPrevious(iter2)) { 1344 log_err("%s->hasPrevious() at the start returns TRUE\n", n2); 1345 return; 1346 } 1347 } 1348 1349 /* 1350 * Test the iterator's getState() and setState() functions. 1351 * iter1 and iter2 must be set up for the same iterator type and the same string 1352 * but may be physically different structs (different addresses). 1353 * 1354 * Assume that the text is not empty and that 1355 * iteration start==0 and iteration limit==length. 1356 * It must be 2<=middle<=length-2. 1357 */ 1358 static void 1359 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) { 1360 UChar32 u[4]; 1361 1362 UErrorCode errorCode; 1363 UChar32 c; 1364 uint32_t state; 1365 int32_t i, j; 1366 1367 /* get four UChars from the middle of the string */ 1368 iter1->move(iter1, middle-2, UITER_ZERO); 1369 for(i=0; i<4; ++i) { 1370 c=iter1->next(iter1); 1371 if(c<0) { 1372 /* the test violates the assumptions, see comment above */ 1373 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c); 1374 return; 1375 } 1376 u[i]=c; 1377 } 1378 1379 /* move to the middle and get the state */ 1380 iter1->move(iter1, -2, UITER_CURRENT); 1381 state=uiter_getState(iter1); 1382 1383 /* set the state into the second iterator and compare the results */ 1384 errorCode=U_ZERO_ERROR; 1385 uiter_setState(iter2, state, &errorCode); 1386 if(U_FAILURE(errorCode)) { 1387 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode)); 1388 return; 1389 } 1390 1391 c=iter2->current(iter2); 1392 if(c!=u[2]) { 1393 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]); 1394 } 1395 1396 c=iter2->previous(iter2); 1397 if(c!=u[1]) { 1398 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]); 1399 } 1400 1401 iter2->move(iter2, 2, UITER_CURRENT); 1402 c=iter2->next(iter2); 1403 if(c!=u[3]) { 1404 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]); 1405 } 1406 1407 iter2->move(iter2, -3, UITER_CURRENT); 1408 c=iter2->previous(iter2); 1409 if(c!=u[0]) { 1410 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]); 1411 } 1412 1413 /* move the second iterator back to the middle */ 1414 iter2->move(iter2, 1, UITER_CURRENT); 1415 iter2->next(iter2); 1416 1417 /* check that both are in the middle */ 1418 i=iter1->getIndex(iter1, UITER_CURRENT); 1419 j=iter2->getIndex(iter2, UITER_CURRENT); 1420 if(i!=middle) { 1421 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle); 1422 } 1423 if(i!=j) { 1424 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i); 1425 } 1426 1427 /* compare lengths */ 1428 i=iter1->getIndex(iter1, UITER_LENGTH); 1429 j=iter2->getIndex(iter2, UITER_LENGTH); 1430 if(i!=j) { 1431 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j); 1432 } 1433 } 1434 1435 static void 1436 TestUCharIterator() { 1437 static const UChar text[]={ 1438 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0 1439 }; 1440 char bytes[40]; 1441 1442 UCharIterator iter, iter1, iter2; 1443 UConverter *cnv; 1444 UErrorCode errorCode; 1445 int32_t length; 1446 1447 /* simple API/code coverage - test NOOP UCharIterator */ 1448 uiter_setString(&iter, NULL, 0); 1449 if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 || 1450 iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 || 1451 iter.hasNext(&iter) || iter.hasPrevious(&iter) 1452 ) { 1453 log_err("NOOP UCharIterator behaves unexpectedly\n"); 1454 } 1455 1456 /* test get/set state */ 1457 length=LENGTHOF(text)-1; 1458 uiter_setString(&iter1, text, -1); 1459 uiter_setString(&iter2, text, length); 1460 testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2); 1461 testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1); 1462 1463 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */ 1464 errorCode=U_ZERO_ERROR; 1465 u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode); 1466 if(U_FAILURE(errorCode)) { 1467 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode)); 1468 return; 1469 } 1470 1471 uiter_setString(&iter1, text, -1); 1472 uiter_setUTF8(&iter2, bytes, length); 1473 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator"); 1474 1475 /* try again with length=-1 */ 1476 uiter_setUTF8(&iter2, bytes, -1); 1477 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1"); 1478 1479 /* test get/set state */ 1480 length=LENGTHOF(text)-1; 1481 uiter_setUTF8(&iter1, bytes, -1); 1482 testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2); 1483 testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1); 1484 1485 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */ 1486 errorCode=U_ZERO_ERROR; 1487 cnv=ucnv_open("UTF-16BE", &errorCode); 1488 length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode); 1489 ucnv_close(cnv); 1490 if(U_FAILURE(errorCode)) { 1491 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode)); 1492 return; 1493 } 1494 1495 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */ 1496 bytes[length]=bytes[length+1]=0; 1497 1498 uiter_setString(&iter1, text, -1); 1499 uiter_setUTF16BE(&iter2, bytes, length); 1500 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator"); 1501 1502 /* try again with length=-1 */ 1503 uiter_setUTF16BE(&iter2, bytes, -1); 1504 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1"); 1505 1506 /* try again after moving the bytes up one, and with length=-1 */ 1507 memmove(bytes+1, bytes, length+2); 1508 uiter_setUTF16BE(&iter2, bytes+1, -1); 1509 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1"); 1510 1511 /* ### TODO test other iterators: CharacterIterator, Replaceable */ 1512 } 1513