1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2002-2015, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: custrtst.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2002oct09 14 * created by: Markus W. Scherer 15 * 16 * Tests of ustring.h Unicode string API functions. 17 */ 18 19 #include "unicode/ustring.h" 20 #include "unicode/ucnv.h" 21 #include "unicode/uiter.h" 22 #include "cintltst.h" 23 #include "cmemory.h" 24 #include <string.h> 25 26 /* get the sign of an integer */ 27 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1) 28 29 /* test setup --------------------------------------------------------------- */ 30 31 static void setUpDataTable(void); 32 static void TestStringCopy(void); 33 static void TestStringFunctions(void); 34 static void TestStringSearching(void); 35 static void TestSurrogateSearching(void); 36 static void TestUnescape(void); 37 static void TestCountChar32(void); 38 static void TestUCharIterator(void); 39 40 void addUStringTest(TestNode** root); 41 42 void addUStringTest(TestNode** root) 43 { 44 addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy"); 45 addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions"); 46 addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching"); 47 addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching"); 48 addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape"); 49 addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32"); 50 addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator"); 51 } 52 53 /* test data for TestStringFunctions ---------------------------------------- */ 54 55 UChar*** dataTable = NULL; 56 57 static const char* raw[3][4] = { 58 59 /* First String */ 60 { "English_", "French_", "Croatian_", "English_"}, 61 /* Second String */ 62 { "United States", "France", "Croatia", "Unites States"}, 63 64 /* Concatenated string */ 65 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"} 66 }; 67 68 static void setUpDataTable() 69 { 70 int32_t i,j; 71 if(dataTable == NULL) { 72 dataTable = (UChar***)calloc(sizeof(UChar**),3); 73 74 for (i = 0; i < 3; i++) { 75 dataTable[i] = (UChar**)calloc(sizeof(UChar*),4); 76 for (j = 0; j < 4; j++){ 77 dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1)); 78 u_uastrcpy(dataTable[i][j],raw[i][j]); 79 } 80 } 81 } 82 } 83 84 static void cleanUpDataTable() 85 { 86 int32_t i,j; 87 if(dataTable != NULL) { 88 for (i=0; i<3; i++) { 89 for(j = 0; j<4; j++) { 90 free(dataTable[i][j]); 91 } 92 free(dataTable[i]); 93 } 94 free(dataTable); 95 } 96 dataTable = NULL; 97 } 98 99 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */ 100 static void TestStringFunctions() 101 { 102 int32_t i,j,k; 103 UChar temp[512]; 104 UChar nullTemp[512]; 105 char test[512]; 106 char tempOut[512]; 107 108 setUpDataTable(); 109 110 log_verbose("Testing u_strlen()\n"); 111 if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2])) 112 log_err("There is an error in u_strlen()"); 113 114 log_verbose("Testing u_memcpy() and u_memcmp()\n"); 115 116 for(i=0;i<3;++i) 117 { 118 for(j=0;j<4;++j) 119 { 120 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j])); 121 temp[0] = 0; 122 temp[7] = 0xA4; /* Mark the end */ 123 u_memcpy(temp,dataTable[i][j], 7); 124 125 if(temp[7] != 0xA4) 126 log_err("an error occured in u_memcpy()\n"); 127 if(u_memcmp(temp, dataTable[i][j], 7)!=0) 128 log_err("an error occured in u_memcpy() or u_memcmp()\n"); 129 } 130 } 131 if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0) 132 log_err("an error occured in u_memcmp()\n"); 133 134 log_verbose("Testing u_memset()\n"); 135 nullTemp[0] = 0; 136 nullTemp[7] = 0; 137 u_memset(nullTemp, 0xa4, 7); 138 for (i = 0; i < 7; i++) { 139 if(nullTemp[i] != 0xa4) { 140 log_err("an error occured in u_memset()\n"); 141 } 142 } 143 if(nullTemp[7] != 0) { 144 log_err("u_memset() went too far\n"); 145 } 146 147 u_memset(nullTemp, 0, 7); 148 nullTemp[7] = 0xa4; 149 temp[7] = 0; 150 u_memcpy(temp,nullTemp, 7); 151 if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0) 152 log_err("an error occured in u_memcpy() or u_memcmp()\n"); 153 154 155 log_verbose("Testing u_memmove()\n"); 156 for (i = 0; i < 7; i++) { 157 temp[i] = (UChar)i; 158 } 159 u_memmove(temp + 1, temp, 7); 160 if(temp[0] != 0) { 161 log_err("an error occured in u_memmove()\n"); 162 } 163 for (i = 1; i <= 7; i++) { 164 if(temp[i] != (i - 1)) { 165 log_err("an error occured in u_memmove()\n"); 166 } 167 } 168 169 log_verbose("Testing u_strcpy() and u_strcmp()\n"); 170 171 for(i=0;i<3;++i) 172 { 173 for(j=0;j<4;++j) 174 { 175 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j])); 176 temp[0] = 0; 177 u_strcpy(temp,dataTable[i][j]); 178 179 if(u_strcmp(temp,dataTable[i][j])!=0) 180 log_err("something threw an error in u_strcpy() or u_strcmp()\n"); 181 } 182 } 183 if(u_strcmp(dataTable[0][0], dataTable[1][1])==0) 184 log_err("an error occured in u_memcmp()\n"); 185 186 log_verbose("testing u_strcat()\n"); 187 i=0; 188 for(j=0; j<2;++j) 189 { 190 u_uastrcpy(temp, ""); 191 u_strcpy(temp,dataTable[i][j]); 192 u_strcat(temp,dataTable[i+1][j]); 193 if(u_strcmp(temp,dataTable[i+2][j])!=0) 194 log_err("something threw an error in u_strcat()\n"); 195 196 } 197 log_verbose("Testing u_strncmp()\n"); 198 for(i=0,j=0;j<4; ++j) 199 { 200 k=u_strlen(dataTable[i][j]); 201 if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0) 202 log_err("Something threw an error in u_strncmp\n"); 203 } 204 if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0) 205 log_err("an error occured in u_memcmp()\n"); 206 207 208 log_verbose("Testing u_strncat\n"); 209 for(i=0,j=0;j<4; ++j) 210 { 211 k=u_strlen(dataTable[i][j]); 212 213 u_uastrcpy(temp,""); 214 215 if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0) 216 log_err("something threw an error in u_strncat or u_uastrcpy()\n"); 217 218 } 219 220 log_verbose("Testing u_strncpy() and u_uastrcpy()\n"); 221 for(i=2,j=0;j<4; ++j) 222 { 223 k=u_strlen(dataTable[i][j]); 224 u_strncpy(temp, dataTable[i][j],k); 225 temp[k] = 0xa4; 226 227 if(u_strncmp(temp, dataTable[i][j],k)!=0) 228 log_err("something threw an error in u_strncpy()\n"); 229 230 if(temp[k] != 0xa4) 231 log_err("something threw an error in u_strncpy()\n"); 232 233 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); 234 u_uastrncpy(temp, raw[i][j], k-1); 235 if(u_strncmp(temp, dataTable[i][j],k-1)!=0) 236 log_err("something threw an error in u_uastrncpy(k-1)\n"); 237 238 if(temp[k-1] != 0x3F) 239 log_err("something threw an error in u_uastrncpy(k-1)\n"); 240 241 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); 242 u_uastrncpy(temp, raw[i][j], k+1); 243 if(u_strcmp(temp, dataTable[i][j])!=0) 244 log_err("something threw an error in u_uastrncpy(k+1)\n"); 245 246 if(temp[k] != 0) 247 log_err("something threw an error in u_uastrncpy(k+1)\n"); 248 249 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); 250 u_uastrncpy(temp, raw[i][j], k); 251 if(u_strncmp(temp, dataTable[i][j], k)!=0) 252 log_err("something threw an error in u_uastrncpy(k)\n"); 253 254 if(temp[k] != 0x3F) 255 log_err("something threw an error in u_uastrncpy(k)\n"); 256 } 257 258 log_verbose("Testing u_strchr() and u_memchr()\n"); 259 260 for(i=2,j=0;j<4;j++) 261 { 262 UChar saveVal = dataTable[i][j][0]; 263 UChar *findPtr = u_strchr(dataTable[i][j], 0x005F); 264 int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1); 265 266 log_verbose("%s ", u_austrcpy(tempOut, findPtr)); 267 268 if (findPtr == NULL || *findPtr != 0x005F) { 269 log_err("u_strchr can't find '_' in the string\n"); 270 } 271 272 findPtr = u_strchr32(dataTable[i][j], 0x005F); 273 if (findPtr == NULL || *findPtr != 0x005F) { 274 log_err("u_strchr32 can't find '_' in the string\n"); 275 } 276 277 findPtr = u_strchr(dataTable[i][j], 0); 278 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 279 log_err("u_strchr can't find NULL in the string\n"); 280 } 281 282 findPtr = u_strchr32(dataTable[i][j], 0); 283 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 284 log_err("u_strchr32 can't find NULL in the string\n"); 285 } 286 287 findPtr = u_memchr(dataTable[i][j], 0, dataSize); 288 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 289 log_err("u_memchr can't find NULL in the string\n"); 290 } 291 292 findPtr = u_memchr32(dataTable[i][j], 0, dataSize); 293 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { 294 log_err("u_memchr32 can't find NULL in the string\n"); 295 } 296 297 dataTable[i][j][0] = 0; 298 /* Make sure we skip over the NULL termination */ 299 findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize); 300 if (findPtr == NULL || *findPtr != 0x005F) { 301 log_err("u_memchr can't find '_' in the string\n"); 302 } 303 304 findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize); 305 if (findPtr == NULL || *findPtr != 0x005F) { 306 log_err("u_memchr32 can't find '_' in the string\n"); 307 } 308 findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize); 309 if (findPtr != NULL) { 310 log_err("Should have found NULL when the character is not there.\n"); 311 } 312 dataTable[i][j][0] = saveVal; /* Put it back for the other tests */ 313 } 314 315 /* 316 * test that u_strchr32() 317 * does not find surrogate code points when they are part of matched pairs 318 * (= part of supplementary code points) 319 * Jitterbug 1542 320 */ 321 { 322 static const UChar s[]={ 323 /* 0 1 2 3 4 5 6 7 8 9 */ 324 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0 325 }; 326 327 if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) { 328 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n"); 329 } 330 if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) { 331 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n"); 332 } 333 } 334 335 log_verbose("Testing u_austrcpy()"); 336 u_austrcpy(test,dataTable[0][0]); 337 if(strcmp(test,raw[0][0])!=0) 338 log_err("There is an error in u_austrcpy()"); 339 340 341 log_verbose("Testing u_strtok_r()"); 342 { 343 const char tokString[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n"; 344 const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"}; 345 UChar delimBuf[sizeof(test)]; 346 UChar currTokenBuf[sizeof(tokString)]; 347 UChar *state; 348 uint32_t currToken = 0; 349 UChar *ptr; 350 351 u_uastrcpy(temp, tokString); 352 u_uastrcpy(delimBuf, " "); 353 354 ptr = u_strtok_r(temp, delimBuf, &state); 355 u_uastrcpy(delimBuf, " ,"); 356 while (ptr != NULL) { 357 u_uastrcpy(currTokenBuf, tokens[currToken]); 358 if (u_strcmp(ptr, currTokenBuf) != 0) { 359 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]); 360 } 361 ptr = u_strtok_r(NULL, delimBuf, &state); 362 currToken++; 363 } 364 365 if (currToken != sizeof(tokens)/sizeof(tokens[0])) { 366 log_err("Didn't get correct number of tokens\n"); 367 } 368 state = delimBuf; /* Give it an "invalid" saveState */ 369 u_uastrcpy(currTokenBuf, ""); 370 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) { 371 log_err("Didn't get NULL for empty string\n"); 372 } 373 if (state != NULL) { 374 log_err("State should be NULL for empty string\n"); 375 } 376 state = delimBuf; /* Give it an "invalid" saveState */ 377 u_uastrcpy(currTokenBuf, ", ,"); 378 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) { 379 log_err("Didn't get NULL for a string of delimiters\n"); 380 } 381 if (state != NULL) { 382 log_err("State should be NULL for a string of delimiters\n"); 383 } 384 385 state = delimBuf; /* Give it an "invalid" saveState */ 386 u_uastrcpy(currTokenBuf, "q, ,"); 387 if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) { 388 log_err("Got NULL for a string that does not begin with delimiters\n"); 389 } 390 if (u_strtok_r(NULL, delimBuf, &state) != NULL) { 391 log_err("Didn't get NULL for a string that ends in delimiters\n"); 392 } 393 if (state != NULL) { 394 log_err("State should be NULL for empty string\n"); 395 } 396 397 state = delimBuf; /* Give it an "invalid" saveState */ 398 u_uastrcpy(currTokenBuf, tokString); 399 u_uastrcpy(temp, tokString); 400 u_uastrcpy(delimBuf, "q"); /* Give it a delimiter that it can't find. */ 401 ptr = u_strtok_r(currTokenBuf, delimBuf, &state); 402 if (ptr == NULL || u_strcmp(ptr, temp) != 0) { 403 log_err("Should have recieved the same string when there are no delimiters\n"); 404 } 405 if (u_strtok_r(NULL, delimBuf, &state) != NULL) { 406 log_err("Should not have found another token in a one token string\n"); 407 } 408 } 409 410 /* test u_strcmpCodePointOrder() */ 411 { 412 /* these strings are in ascending order */ 413 static const UChar strings[][4]={ 414 { 0x61, 0 }, /* U+0061 */ 415 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */ 416 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */ 417 { 0xd800, 0 }, /* U+d800 */ 418 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */ 419 { 0xdfff, 0 }, /* U+dfff */ 420 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */ 421 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */ 422 { 0xd800, 0xdc02, 0 }, /* U+10002 */ 423 { 0xd84d, 0xdc56, 0 } /* U+23456 */ 424 }; 425 426 UCharIterator iter1, iter2; 427 int32_t len1, len2, r1, r2; 428 429 for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) { 430 if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) { 431 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i); 432 } 433 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) { 434 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i); 435 } 436 437 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */ 438 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) { 439 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i); 440 } 441 442 /* test u_strCompare(TRUE) */ 443 len1=u_strlen(strings[i]); 444 len2=u_strlen(strings[i+1]); 445 if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 || 446 u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 || 447 u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 || 448 u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0 449 ) { 450 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i); 451 } 452 453 /* test u_strCompare(FALSE) */ 454 r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE); 455 r2=u_strcmp(strings[i], strings[i+1]); 456 if(_SIGN(r1)!=_SIGN(r2)) { 457 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i); 458 } 459 460 /* test u_strCompareIter() */ 461 uiter_setString(&iter1, strings[i], len1); 462 uiter_setString(&iter2, strings[i+1], len2); 463 if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) { 464 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i); 465 } 466 r1=u_strCompareIter(&iter1, &iter2, FALSE); 467 if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) { 468 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i); 469 } 470 } 471 } 472 473 cleanUpDataTable(); 474 } 475 476 static void TestStringSearching() 477 { 478 const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0}; 479 const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0}; 480 const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0}; 481 const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0}; 482 const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0}; 483 const UChar surrMatchSet4[] = {0x0000}; 484 const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0}; 485 const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0}; 486 const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */ 487 const UChar 488 empty[] = { 0 }, 489 a[] = { 0x61, 0 }, 490 ab[] = { 0x61, 0x62, 0 }, 491 ba[] = { 0x62, 0x61, 0 }, 492 abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 }, 493 cd[] = { 0x63, 0x64, 0 }, 494 dc[] = { 0x64, 0x63, 0 }, 495 cdh[] = { 0x63, 0x64, 0x68, 0 }, 496 f[] = { 0x66, 0 }, 497 fg[] = { 0x66, 0x67, 0 }, 498 gf[] = { 0x67, 0x66, 0 }; 499 500 log_verbose("Testing u_strpbrk()"); 501 502 if (u_strpbrk(testString, a) != &testString[0]) { 503 log_err("u_strpbrk couldn't find first letter a.\n"); 504 } 505 if (u_strpbrk(testString, dc) != &testString[2]) { 506 log_err("u_strpbrk couldn't find d or c.\n"); 507 } 508 if (u_strpbrk(testString, cd) != &testString[2]) { 509 log_err("u_strpbrk couldn't find c or d.\n"); 510 } 511 if (u_strpbrk(testString, cdh) != &testString[2]) { 512 log_err("u_strpbrk couldn't find c, d or h.\n"); 513 } 514 if (u_strpbrk(testString, f) != NULL) { 515 log_err("u_strpbrk didn't return NULL for \"f\".\n"); 516 } 517 if (u_strpbrk(testString, fg) != NULL) { 518 log_err("u_strpbrk didn't return NULL for \"fg\".\n"); 519 } 520 if (u_strpbrk(testString, gf) != NULL) { 521 log_err("u_strpbrk didn't return NULL for \"gf\".\n"); 522 } 523 if (u_strpbrk(testString, empty) != NULL) { 524 log_err("u_strpbrk didn't return NULL for \"\".\n"); 525 } 526 527 log_verbose("Testing u_strpbrk() with surrogates"); 528 529 if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) { 530 log_err("u_strpbrk couldn't find first letter a.\n"); 531 } 532 if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) { 533 log_err("u_strpbrk couldn't find d or c.\n"); 534 } 535 if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) { 536 log_err("u_strpbrk couldn't find c or d.\n"); 537 } 538 if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) { 539 log_err("u_strpbrk couldn't find c, d or h.\n"); 540 } 541 if (u_strpbrk(testSurrogateString, f) != NULL) { 542 log_err("u_strpbrk didn't return NULL for \"f\".\n"); 543 } 544 if (u_strpbrk(testSurrogateString, fg) != NULL) { 545 log_err("u_strpbrk didn't return NULL for \"fg\".\n"); 546 } 547 if (u_strpbrk(testSurrogateString, gf) != NULL) { 548 log_err("u_strpbrk didn't return NULL for \"gf\".\n"); 549 } 550 if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) { 551 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n"); 552 } 553 if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) { 554 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n"); 555 } 556 if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) { 557 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n"); 558 } 559 if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) { 560 log_err("u_strpbrk should have returned NULL for empty string.\n"); 561 } 562 if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) { 563 log_err("u_strpbrk should have found bad surrogate.\n"); 564 } 565 566 log_verbose("Testing u_strcspn()"); 567 568 if (u_strcspn(testString, a) != 0) { 569 log_err("u_strcspn couldn't find first letter a.\n"); 570 } 571 if (u_strcspn(testString, dc) != 2) { 572 log_err("u_strcspn couldn't find d or c.\n"); 573 } 574 if (u_strcspn(testString, cd) != 2) { 575 log_err("u_strcspn couldn't find c or d.\n"); 576 } 577 if (u_strcspn(testString, cdh) != 2) { 578 log_err("u_strcspn couldn't find c, d or h.\n"); 579 } 580 if (u_strcspn(testString, f) != u_strlen(testString)) { 581 log_err("u_strcspn didn't return NULL for \"f\".\n"); 582 } 583 if (u_strcspn(testString, fg) != u_strlen(testString)) { 584 log_err("u_strcspn didn't return NULL for \"fg\".\n"); 585 } 586 if (u_strcspn(testString, gf) != u_strlen(testString)) { 587 log_err("u_strcspn didn't return NULL for \"gf\".\n"); 588 } 589 590 log_verbose("Testing u_strcspn() with surrogates"); 591 592 if (u_strcspn(testSurrogateString, a) != 1) { 593 log_err("u_strcspn couldn't find first letter a.\n"); 594 } 595 if (u_strcspn(testSurrogateString, dc) != 5) { 596 log_err("u_strcspn couldn't find d or c.\n"); 597 } 598 if (u_strcspn(testSurrogateString, cd) != 5) { 599 log_err("u_strcspn couldn't find c or d.\n"); 600 } 601 if (u_strcspn(testSurrogateString, cdh) != 5) { 602 log_err("u_strcspn couldn't find c, d or h.\n"); 603 } 604 if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) { 605 log_err("u_strcspn didn't return NULL for \"f\".\n"); 606 } 607 if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) { 608 log_err("u_strcspn didn't return NULL for \"fg\".\n"); 609 } 610 if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) { 611 log_err("u_strcspn didn't return NULL for \"gf\".\n"); 612 } 613 if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) { 614 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n"); 615 } 616 if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) { 617 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n"); 618 } 619 if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) { 620 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n"); 621 } 622 if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) { 623 log_err("u_strcspn should have returned strlen for empty string.\n"); 624 } 625 626 627 log_verbose("Testing u_strspn()"); 628 629 if (u_strspn(testString, a) != 1) { 630 log_err("u_strspn couldn't skip first letter a.\n"); 631 } 632 if (u_strspn(testString, ab) != 2) { 633 log_err("u_strspn couldn't skip a or b.\n"); 634 } 635 if (u_strspn(testString, ba) != 2) { 636 log_err("u_strspn couldn't skip a or b.\n"); 637 } 638 if (u_strspn(testString, f) != 0) { 639 log_err("u_strspn didn't return 0 for \"f\".\n"); 640 } 641 if (u_strspn(testString, dc) != 0) { 642 log_err("u_strspn couldn't find first letter a (skip d or c).\n"); 643 } 644 if (u_strspn(testString, abcd) != u_strlen(testString)) { 645 log_err("u_strspn couldn't skip over the whole string.\n"); 646 } 647 if (u_strspn(testString, empty) != 0) { 648 log_err("u_strspn should have returned 0 for empty string.\n"); 649 } 650 651 log_verbose("Testing u_strspn() with surrogates"); 652 if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) { 653 log_err("u_strspn couldn't skip 0xdbff or a.\n"); 654 } 655 if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) { 656 log_err("u_strspn couldn't skip 0xdbff or a.\n"); 657 } 658 if (u_strspn(testSurrogateString, f) != 0) { 659 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); 660 } 661 if (u_strspn(testSurrogateString, dc) != 0) { 662 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); 663 } 664 if (u_strspn(testSurrogateString, cd) != 0) { 665 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); 666 } 667 if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) { 668 log_err("u_strspn couldn't skip whole string.\n"); 669 } 670 if (u_strspn(testSurrogateString, surrMatchSet1) != 0) { 671 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n"); 672 } 673 if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) { 674 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n"); 675 } 676 if (u_strspn(testSurrogateString, surrMatchSet4) != 0) { 677 log_err("u_strspn should have returned 0 for empty string.\n"); 678 } 679 } 680 681 /* 682 * All binary Unicode string searches should behave the same for equivalent input. 683 * See Jitterbug 2145. 684 * There are some new functions, too - just test them all. 685 */ 686 static void 687 TestSurrogateSearching() { 688 static const UChar s[]={ 689 /* 0 1 2 3 4 5 6 7 8 9 10 11 */ 690 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0 691 }, sub_a[]={ 692 0x61, 0 693 }, sub_b[]={ 694 0x62, 0 695 }, sub_lead[]={ 696 0xd801, 0 697 }, sub_trail[]={ 698 0xdc02, 0 699 }, sub_supp[]={ 700 0xd801, 0xdc02, 0 701 }, sub_supp2[]={ 702 0xd801, 0xdc03, 0 703 }, sub_a_lead[]={ 704 0x61, 0xd801, 0 705 }, sub_trail_a[]={ 706 0xdc02, 0x61, 0 707 }, sub_aba[]={ 708 0x61, 0x62, 0x61, 0 709 }; 710 static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0; 711 static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456; 712 713 const UChar *first, *last; 714 715 /* search for NUL code point: find end of string */ 716 first=s+u_strlen(s); 717 718 if( 719 first!=u_strchr(s, nul) || 720 first!=u_strchr32(s, nul) || 721 first!=u_memchr(s, nul, UPRV_LENGTHOF(s)) || 722 first!=u_memchr32(s, nul, UPRV_LENGTHOF(s)) || 723 first!=u_strrchr(s, nul) || 724 first!=u_strrchr32(s, nul) || 725 first!=u_memrchr(s, nul, UPRV_LENGTHOF(s)) || 726 first!=u_memrchr32(s, nul, UPRV_LENGTHOF(s)) 727 ) { 728 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n"); 729 } 730 731 /* search for empty substring: find beginning of string */ 732 if( 733 s!=u_strstr(s, &nul) || 734 s!=u_strFindFirst(s, -1, &nul, -1) || 735 s!=u_strFindFirst(s, -1, &nul, 0) || 736 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, -1) || 737 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, 0) || 738 s!=u_strrstr(s, &nul) || 739 s!=u_strFindLast(s, -1, &nul, -1) || 740 s!=u_strFindLast(s, -1, &nul, 0) || 741 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, -1) || 742 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, 0) 743 ) { 744 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n"); 745 } 746 747 /* find 'a' in s[1..10[ */ 748 first=s+3; 749 last=s+7; 750 if( 751 first!=u_strchr(s+1, a) || 752 first!=u_strchr32(s+1, a) || 753 first!=u_memchr(s+1, a, 9) || 754 first!=u_memchr32(s+1, a, 9) || 755 first!=u_strstr(s+1, sub_a) || 756 first!=u_strFindFirst(s+1, -1, sub_a, -1) || 757 first!=u_strFindFirst(s+1, -1, &a, 1) || 758 first!=u_strFindFirst(s+1, 9, sub_a, -1) || 759 first!=u_strFindFirst(s+1, 9, &a, 1) || 760 (s+10)!=u_strrchr(s+1, a) || 761 (s+10)!=u_strrchr32(s+1, a) || 762 last!=u_memrchr(s+1, a, 9) || 763 last!=u_memrchr32(s+1, a, 9) || 764 (s+10)!=u_strrstr(s+1, sub_a) || 765 (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) || 766 (s+10)!=u_strFindLast(s+1, -1, &a, 1) || 767 last!=u_strFindLast(s+1, 9, sub_a, -1) || 768 last!=u_strFindLast(s+1, 9, &a, 1) 769 ) { 770 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n"); 771 } 772 773 /* do not find 'b' in s[1..10[ */ 774 if( 775 NULL!=u_strchr(s+1, b) || 776 NULL!=u_strchr32(s+1, b) || 777 NULL!=u_memchr(s+1, b, 9) || 778 NULL!=u_memchr32(s+1, b, 9) || 779 NULL!=u_strstr(s+1, sub_b) || 780 NULL!=u_strFindFirst(s+1, -1, sub_b, -1) || 781 NULL!=u_strFindFirst(s+1, -1, &b, 1) || 782 NULL!=u_strFindFirst(s+1, 9, sub_b, -1) || 783 NULL!=u_strFindFirst(s+1, 9, &b, 1) || 784 NULL!=u_strrchr(s+1, b) || 785 NULL!=u_strrchr32(s+1, b) || 786 NULL!=u_memrchr(s+1, b, 9) || 787 NULL!=u_memrchr32(s+1, b, 9) || 788 NULL!=u_strrstr(s+1, sub_b) || 789 NULL!=u_strFindLast(s+1, -1, sub_b, -1) || 790 NULL!=u_strFindLast(s+1, -1, &b, 1) || 791 NULL!=u_strFindLast(s+1, 9, sub_b, -1) || 792 NULL!=u_strFindLast(s+1, 9, &b, 1) 793 ) { 794 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n"); 795 } 796 797 /* do not find a non-code point in s[1..10[ */ 798 if( 799 NULL!=u_strchr32(s+1, ill) || 800 NULL!=u_memchr32(s+1, ill, 9) || 801 NULL!=u_strrchr32(s+1, ill) || 802 NULL!=u_memrchr32(s+1, ill, 9) 803 ) { 804 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n"); 805 } 806 807 /* find U+d801 in s[1..10[ */ 808 first=s+6; 809 if( 810 first!=u_strchr(s+1, lead) || 811 first!=u_strchr32(s+1, lead) || 812 first!=u_memchr(s+1, lead, 9) || 813 first!=u_memchr32(s+1, lead, 9) || 814 first!=u_strstr(s+1, sub_lead) || 815 first!=u_strFindFirst(s+1, -1, sub_lead, -1) || 816 first!=u_strFindFirst(s+1, -1, &lead, 1) || 817 first!=u_strFindFirst(s+1, 9, sub_lead, -1) || 818 first!=u_strFindFirst(s+1, 9, &lead, 1) || 819 first!=u_strrchr(s+1, lead) || 820 first!=u_strrchr32(s+1, lead) || 821 first!=u_memrchr(s+1, lead, 9) || 822 first!=u_memrchr32(s+1, lead, 9) || 823 first!=u_strrstr(s+1, sub_lead) || 824 first!=u_strFindLast(s+1, -1, sub_lead, -1) || 825 first!=u_strFindLast(s+1, -1, &lead, 1) || 826 first!=u_strFindLast(s+1, 9, sub_lead, -1) || 827 first!=u_strFindLast(s+1, 9, &lead, 1) 828 ) { 829 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n"); 830 } 831 832 /* find U+dc02 in s[1..10[ */ 833 first=s+4; 834 if( 835 first!=u_strchr(s+1, trail) || 836 first!=u_strchr32(s+1, trail) || 837 first!=u_memchr(s+1, trail, 9) || 838 first!=u_memchr32(s+1, trail, 9) || 839 first!=u_strstr(s+1, sub_trail) || 840 first!=u_strFindFirst(s+1, -1, sub_trail, -1) || 841 first!=u_strFindFirst(s+1, -1, &trail, 1) || 842 first!=u_strFindFirst(s+1, 9, sub_trail, -1) || 843 first!=u_strFindFirst(s+1, 9, &trail, 1) || 844 first!=u_strrchr(s+1, trail) || 845 first!=u_strrchr32(s+1, trail) || 846 first!=u_memrchr(s+1, trail, 9) || 847 first!=u_memrchr32(s+1, trail, 9) || 848 first!=u_strrstr(s+1, sub_trail) || 849 first!=u_strFindLast(s+1, -1, sub_trail, -1) || 850 first!=u_strFindLast(s+1, -1, &trail, 1) || 851 first!=u_strFindLast(s+1, 9, sub_trail, -1) || 852 first!=u_strFindLast(s+1, 9, &trail, 1) 853 ) { 854 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n"); 855 } 856 857 /* find U+10402 in s[1..10[ */ 858 first=s+1; 859 last=s+8; 860 if( 861 first!=u_strchr32(s+1, supp) || 862 first!=u_memchr32(s+1, supp, 9) || 863 first!=u_strstr(s+1, sub_supp) || 864 first!=u_strFindFirst(s+1, -1, sub_supp, -1) || 865 first!=u_strFindFirst(s+1, -1, sub_supp, 2) || 866 first!=u_strFindFirst(s+1, 9, sub_supp, -1) || 867 first!=u_strFindFirst(s+1, 9, sub_supp, 2) || 868 last!=u_strrchr32(s+1, supp) || 869 last!=u_memrchr32(s+1, supp, 9) || 870 last!=u_strrstr(s+1, sub_supp) || 871 last!=u_strFindLast(s+1, -1, sub_supp, -1) || 872 last!=u_strFindLast(s+1, -1, sub_supp, 2) || 873 last!=u_strFindLast(s+1, 9, sub_supp, -1) || 874 last!=u_strFindLast(s+1, 9, sub_supp, 2) 875 ) { 876 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n"); 877 } 878 879 /* do not find U+10402 in a single UChar */ 880 if( 881 NULL!=u_memchr32(s+1, supp, 1) || 882 NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) || 883 NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) || 884 NULL!=u_memrchr32(s+1, supp, 1) || 885 NULL!=u_strFindLast(s+1, 1, sub_supp, -1) || 886 NULL!=u_strFindLast(s+1, 1, sub_supp, 2) || 887 NULL!=u_memrchr32(s+2, supp, 1) || 888 NULL!=u_strFindLast(s+2, 1, sub_supp, -1) || 889 NULL!=u_strFindLast(s+2, 1, sub_supp, 2) 890 ) { 891 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n"); 892 } 893 894 /* do not find U+10403 in s[1..10[ */ 895 if( 896 NULL!=u_strchr32(s+1, supp2) || 897 NULL!=u_memchr32(s+1, supp2, 9) || 898 NULL!=u_strstr(s+1, sub_supp2) || 899 NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) || 900 NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) || 901 NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) || 902 NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) || 903 NULL!=u_strrchr32(s+1, supp2) || 904 NULL!=u_memrchr32(s+1, supp2, 9) || 905 NULL!=u_strrstr(s+1, sub_supp2) || 906 NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) || 907 NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) || 908 NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) || 909 NULL!=u_strFindLast(s+1, 9, sub_supp2, 2) 910 ) { 911 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n"); 912 } 913 914 /* find <0061 d801> in s[1..10[ */ 915 first=s+5; 916 if( 917 first!=u_strstr(s+1, sub_a_lead) || 918 first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) || 919 first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) || 920 first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) || 921 first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) || 922 first!=u_strrstr(s+1, sub_a_lead) || 923 first!=u_strFindLast(s+1, -1, sub_a_lead, -1) || 924 first!=u_strFindLast(s+1, -1, sub_a_lead, 2) || 925 first!=u_strFindLast(s+1, 9, sub_a_lead, -1) || 926 first!=u_strFindLast(s+1, 9, sub_a_lead, 2) 927 ) { 928 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n"); 929 } 930 931 /* find <dc02 0061> in s[1..10[ */ 932 first=s+4; 933 if( 934 first!=u_strstr(s+1, sub_trail_a) || 935 first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) || 936 first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) || 937 first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) || 938 first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) || 939 first!=u_strrstr(s+1, sub_trail_a) || 940 first!=u_strFindLast(s+1, -1, sub_trail_a, -1) || 941 first!=u_strFindLast(s+1, -1, sub_trail_a, 2) || 942 first!=u_strFindLast(s+1, 9, sub_trail_a, -1) || 943 first!=u_strFindLast(s+1, 9, sub_trail_a, 2) 944 ) { 945 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n"); 946 } 947 948 /* do not find "aba" in s[1..10[ */ 949 if( 950 NULL!=u_strstr(s+1, sub_aba) || 951 NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) || 952 NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) || 953 NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) || 954 NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) || 955 NULL!=u_strrstr(s+1, sub_aba) || 956 NULL!=u_strFindLast(s+1, -1, sub_aba, -1) || 957 NULL!=u_strFindLast(s+1, -1, sub_aba, 3) || 958 NULL!=u_strFindLast(s+1, 9, sub_aba, -1) || 959 NULL!=u_strFindLast(s+1, 9, sub_aba, 3) 960 ) { 961 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n"); 962 } 963 } 964 965 static void TestStringCopy() 966 { 967 UChar temp[40]; 968 UChar *result=0; 969 UChar subString[5]; 970 UChar uchars[]={0x61, 0x62, 0x63, 0x00}; 971 char charOut[40]; 972 char chars[]="abc"; /* needs default codepage */ 973 974 log_verbose("Testing u_uastrncpy() and u_uastrcpy()"); 975 976 u_uastrcpy(temp, "abc"); 977 if(u_strcmp(temp, uchars) != 0) { 978 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp)); 979 } 980 981 temp[0] = 0xFB; /* load garbage into it */ 982 temp[1] = 0xFB; 983 temp[2] = 0xFB; 984 temp[3] = 0xFB; 985 986 u_uastrncpy(temp, "abcabcabc", 3); 987 if(u_strncmp(uchars, temp, 3) != 0){ 988 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp)); 989 } 990 if(temp[3] != 0xFB) { 991 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n"); 992 } 993 994 charOut[0] = (char)0x7B; /* load garbage into it */ 995 charOut[1] = (char)0x7B; 996 charOut[2] = (char)0x7B; 997 charOut[3] = (char)0x7B; 998 999 temp[0] = 0x0061; 1000 temp[1] = 0x0062; 1001 temp[2] = 0x0063; 1002 temp[3] = 0x0061; 1003 temp[4] = 0x0062; 1004 temp[5] = 0x0063; 1005 temp[6] = 0x0000; 1006 1007 u_austrncpy(charOut, temp, 3); 1008 if(strncmp(chars, charOut, 3) != 0){ 1009 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp)); 1010 } 1011 if(charOut[3] != (char)0x7B) { 1012 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n"); 1013 } 1014 1015 /*Testing u_strchr()*/ 1016 log_verbose("Testing u_strchr\n"); 1017 temp[0]=0x42; 1018 temp[1]=0x62; 1019 temp[2]=0x62; 1020 temp[3]=0x63; 1021 temp[4]=0xd841; 1022 temp[5]=0xd841; 1023 temp[6]=0xdc02; 1024 temp[7]=0; 1025 result=u_strchr(temp, (UChar)0x62); 1026 if(result != temp+1){ 1027 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result); 1028 } 1029 /*Testing u_strstr()*/ 1030 log_verbose("Testing u_strstr\n"); 1031 subString[0]=0x62; 1032 subString[1]=0x63; 1033 subString[2]=0; 1034 result=u_strstr(temp, subString); 1035 if(result != temp+2){ 1036 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result); 1037 } 1038 result=u_strstr(temp, subString+2); /* subString+2 is an empty string */ 1039 if(result != temp){ 1040 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result); 1041 } 1042 result=u_strstr(subString, temp); 1043 if(result != NULL){ 1044 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n"); 1045 } 1046 1047 /*Testing u_strchr32*/ 1048 log_verbose("Testing u_strchr32\n"); 1049 result=u_strchr32(temp, (UChar32)0x62); 1050 if(result != temp+1){ 1051 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result); 1052 } 1053 result=u_strchr32(temp, (UChar32)0xfb); 1054 if(result != NULL){ 1055 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n"); 1056 } 1057 result=u_strchr32(temp, (UChar32)0x20402); 1058 if(result != temp+5){ 1059 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result); 1060 } 1061 1062 temp[7]=0xfc00; 1063 result=u_memchr32(temp, (UChar32)0x20402, 7); 1064 if(result != temp+5){ 1065 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result); 1066 } 1067 result=u_memchr32(temp, (UChar32)0x20402, 6); 1068 if(result != NULL){ 1069 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result); 1070 } 1071 result=u_memchr32(temp, (UChar32)0x20402, 1); 1072 if(result != NULL){ 1073 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result); 1074 } 1075 result=u_memchr32(temp, (UChar32)0xfc00, 8); 1076 if(result != temp+7){ 1077 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result); 1078 } 1079 } 1080 1081 /* test u_unescape() and u_unescapeAt() ------------------------------------- */ 1082 1083 static void 1084 TestUnescape() { 1085 static UChar buffer[200]; 1086 1087 static const char* input = 1088 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}"; 1089 1090 static const UChar expect[]={ 1091 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20, 1092 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c, 1093 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20, 1094 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0 1095 }; 1096 static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1; 1097 int32_t length; 1098 1099 /* test u_unescape() */ 1100 length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0])); 1101 if(length!=explength || u_strcmp(buffer, expect)!=0) { 1102 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length, 1103 explength); 1104 } 1105 1106 /* try preflighting */ 1107 length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0])); 1108 if(length!=explength || u_strcmp(buffer, expect)!=0) { 1109 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength); 1110 } 1111 1112 /* ### TODO: test u_unescapeAt() */ 1113 } 1114 1115 /* test code point counting functions --------------------------------------- */ 1116 1117 /* reference implementation of u_strHasMoreChar32Than() */ 1118 static int32_t 1119 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) { 1120 int32_t count=u_countChar32(s, length); 1121 return count>number; 1122 } 1123 1124 /* compare the real function against the reference */ 1125 static void 1126 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) { 1127 if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) { 1128 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n", 1129 i, length, number, u_strHasMoreChar32Than(s, length, number)); 1130 } 1131 } 1132 1133 static void 1134 TestCountChar32() { 1135 static const UChar string[]={ 1136 0x61, 0x62, 0xd800, 0xdc00, 1137 0xd801, 0xdc01, 0x63, 0xd802, 1138 0x64, 0xdc03, 0x65, 0x66, 1139 0xd804, 0xdc04, 0xd805, 0xdc05, 1140 0x67 1141 }; 1142 UChar buffer[100]; 1143 int32_t i, length, number; 1144 1145 /* test u_strHasMoreChar32Than() with length>=0 */ 1146 length=UPRV_LENGTHOF(string); 1147 while(length>=0) { 1148 for(i=0; i<=length; ++i) { 1149 for(number=-1; number<=((length-i)+2); ++number) { 1150 _testStrHasMoreChar32Than(string+i, i, length-i, number); 1151 } 1152 } 1153 --length; 1154 } 1155 1156 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */ 1157 length=UPRV_LENGTHOF(string); 1158 u_memcpy(buffer, string, length); 1159 while(length>=0) { 1160 buffer[length]=0; 1161 for(i=0; i<=length; ++i) { 1162 for(number=-1; number<=((length-i)+2); ++number) { 1163 _testStrHasMoreChar32Than(buffer+i, i, -1, number); 1164 } 1165 } 1166 --length; 1167 } 1168 1169 /* test u_strHasMoreChar32Than() with NULL string (bad input) */ 1170 for(length=-1; length<=1; ++length) { 1171 for(i=0; i<=length; ++i) { 1172 for(number=-2; number<=2; ++number) { 1173 _testStrHasMoreChar32Than(NULL, 0, length, number); 1174 } 1175 } 1176 } 1177 } 1178 1179 /* UCharIterator ------------------------------------------------------------ */ 1180 1181 /* 1182 * Compare results from two iterators, should be same. 1183 * Assume that the text is not empty and that 1184 * iteration start==0 and iteration limit==length. 1185 */ 1186 static void 1187 compareIterators(UCharIterator *iter1, const char *n1, 1188 UCharIterator *iter2, const char *n2) { 1189 int32_t i, pos1, pos2, middle, length; 1190 UChar32 c1, c2; 1191 1192 /* compare lengths */ 1193 length=iter1->getIndex(iter1, UITER_LENGTH); 1194 pos2=iter2->getIndex(iter2, UITER_LENGTH); 1195 if(length!=pos2) { 1196 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2); 1197 return; 1198 } 1199 1200 /* set into the middle */ 1201 middle=length/2; 1202 1203 pos1=iter1->move(iter1, middle, UITER_ZERO); 1204 if(pos1!=middle) { 1205 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1); 1206 return; 1207 } 1208 1209 pos2=iter2->move(iter2, middle, UITER_ZERO); 1210 if(pos2!=middle) { 1211 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2); 1212 return; 1213 } 1214 1215 /* test current() */ 1216 c1=iter1->current(iter1); 1217 c2=iter2->current(iter2); 1218 if(c1!=c2) { 1219 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle); 1220 return; 1221 } 1222 1223 /* move forward 3 UChars */ 1224 for(i=0; i<3; ++i) { 1225 c1=iter1->next(iter1); 1226 c2=iter2->next(iter2); 1227 if(c1!=c2) { 1228 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1229 return; 1230 } 1231 } 1232 1233 /* move backward 5 UChars */ 1234 for(i=0; i<5; ++i) { 1235 c1=iter1->previous(iter1); 1236 c2=iter2->previous(iter2); 1237 if(c1!=c2) { 1238 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1239 return; 1240 } 1241 } 1242 1243 /* iterate forward from the beginning */ 1244 pos1=iter1->move(iter1, 0, UITER_START); 1245 if(pos1<0) { 1246 log_err("%s->move(start) failed\n", n1); 1247 return; 1248 } 1249 if(!iter1->hasNext(iter1)) { 1250 log_err("%s->hasNext() at the start returns FALSE\n", n1); 1251 return; 1252 } 1253 1254 pos2=iter2->move(iter2, 0, UITER_START); 1255 if(pos2<0) { 1256 log_err("%s->move(start) failed\n", n2); 1257 return; 1258 } 1259 if(!iter2->hasNext(iter2)) { 1260 log_err("%s->hasNext() at the start returns FALSE\n", n2); 1261 return; 1262 } 1263 1264 do { 1265 c1=iter1->next(iter1); 1266 c2=iter2->next(iter2); 1267 if(c1!=c2) { 1268 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1269 return; 1270 } 1271 } while(c1>=0); 1272 1273 if(iter1->hasNext(iter1)) { 1274 log_err("%s->hasNext() at the end returns TRUE\n", n1); 1275 return; 1276 } 1277 if(iter2->hasNext(iter2)) { 1278 log_err("%s->hasNext() at the end returns TRUE\n", n2); 1279 return; 1280 } 1281 1282 /* back to the middle */ 1283 pos1=iter1->move(iter1, middle, UITER_ZERO); 1284 if(pos1!=middle) { 1285 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1); 1286 return; 1287 } 1288 1289 pos2=iter2->move(iter2, middle, UITER_ZERO); 1290 if(pos2!=middle) { 1291 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2); 1292 return; 1293 } 1294 1295 /* move to index 1 */ 1296 pos1=iter1->move(iter1, 1, UITER_ZERO); 1297 if(pos1!=1) { 1298 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1); 1299 return; 1300 } 1301 1302 pos2=iter2->move(iter2, 1, UITER_ZERO); 1303 if(pos2!=1) { 1304 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2); 1305 return; 1306 } 1307 1308 /* iterate backward from the end */ 1309 pos1=iter1->move(iter1, 0, UITER_LIMIT); 1310 if(pos1<0) { 1311 log_err("%s->move(limit) failed\n", n1); 1312 return; 1313 } 1314 if(!iter1->hasPrevious(iter1)) { 1315 log_err("%s->hasPrevious() at the end returns FALSE\n", n1); 1316 return; 1317 } 1318 1319 pos2=iter2->move(iter2, 0, UITER_LIMIT); 1320 if(pos2<0) { 1321 log_err("%s->move(limit) failed\n", n2); 1322 return; 1323 } 1324 if(!iter2->hasPrevious(iter2)) { 1325 log_err("%s->hasPrevious() at the end returns FALSE\n", n2); 1326 return; 1327 } 1328 1329 do { 1330 c1=iter1->previous(iter1); 1331 c2=iter2->previous(iter2); 1332 if(c1!=c2) { 1333 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); 1334 return; 1335 } 1336 } while(c1>=0); 1337 1338 if(iter1->hasPrevious(iter1)) { 1339 log_err("%s->hasPrevious() at the start returns TRUE\n", n1); 1340 return; 1341 } 1342 if(iter2->hasPrevious(iter2)) { 1343 log_err("%s->hasPrevious() at the start returns TRUE\n", n2); 1344 return; 1345 } 1346 } 1347 1348 /* 1349 * Test the iterator's getState() and setState() functions. 1350 * iter1 and iter2 must be set up for the same iterator type and the same string 1351 * but may be physically different structs (different addresses). 1352 * 1353 * Assume that the text is not empty and that 1354 * iteration start==0 and iteration limit==length. 1355 * It must be 2<=middle<=length-2. 1356 */ 1357 static void 1358 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) { 1359 UChar32 u[4]; 1360 1361 UErrorCode errorCode; 1362 UChar32 c; 1363 uint32_t state; 1364 int32_t i, j; 1365 1366 /* get four UChars from the middle of the string */ 1367 iter1->move(iter1, middle-2, UITER_ZERO); 1368 for(i=0; i<4; ++i) { 1369 c=iter1->next(iter1); 1370 if(c<0) { 1371 /* the test violates the assumptions, see comment above */ 1372 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c); 1373 return; 1374 } 1375 u[i]=c; 1376 } 1377 1378 /* move to the middle and get the state */ 1379 iter1->move(iter1, -2, UITER_CURRENT); 1380 state=uiter_getState(iter1); 1381 1382 /* set the state into the second iterator and compare the results */ 1383 errorCode=U_ZERO_ERROR; 1384 uiter_setState(iter2, state, &errorCode); 1385 if(U_FAILURE(errorCode)) { 1386 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode)); 1387 return; 1388 } 1389 1390 c=iter2->current(iter2); 1391 if(c!=u[2]) { 1392 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]); 1393 } 1394 1395 c=iter2->previous(iter2); 1396 if(c!=u[1]) { 1397 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]); 1398 } 1399 1400 iter2->move(iter2, 2, UITER_CURRENT); 1401 c=iter2->next(iter2); 1402 if(c!=u[3]) { 1403 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]); 1404 } 1405 1406 iter2->move(iter2, -3, UITER_CURRENT); 1407 c=iter2->previous(iter2); 1408 if(c!=u[0]) { 1409 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]); 1410 } 1411 1412 /* move the second iterator back to the middle */ 1413 iter2->move(iter2, 1, UITER_CURRENT); 1414 iter2->next(iter2); 1415 1416 /* check that both are in the middle */ 1417 i=iter1->getIndex(iter1, UITER_CURRENT); 1418 j=iter2->getIndex(iter2, UITER_CURRENT); 1419 if(i!=middle) { 1420 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle); 1421 } 1422 if(i!=j) { 1423 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i); 1424 } 1425 1426 /* compare lengths */ 1427 i=iter1->getIndex(iter1, UITER_LENGTH); 1428 j=iter2->getIndex(iter2, UITER_LENGTH); 1429 if(i!=j) { 1430 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j); 1431 } 1432 } 1433 1434 static void 1435 TestUCharIterator() { 1436 static const UChar text[]={ 1437 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0 1438 }; 1439 char bytes[40]; 1440 1441 UCharIterator iter, iter1, iter2; 1442 UConverter *cnv; 1443 UErrorCode errorCode; 1444 int32_t length; 1445 1446 /* simple API/code coverage - test NOOP UCharIterator */ 1447 uiter_setString(&iter, NULL, 0); 1448 if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 || 1449 iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 || 1450 iter.hasNext(&iter) || iter.hasPrevious(&iter) 1451 ) { 1452 log_err("NOOP UCharIterator behaves unexpectedly\n"); 1453 } 1454 1455 /* test get/set state */ 1456 length=UPRV_LENGTHOF(text)-1; 1457 uiter_setString(&iter1, text, -1); 1458 uiter_setString(&iter2, text, length); 1459 testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2); 1460 testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1); 1461 1462 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */ 1463 errorCode=U_ZERO_ERROR; 1464 u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode); 1465 if(U_FAILURE(errorCode)) { 1466 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode)); 1467 return; 1468 } 1469 1470 uiter_setString(&iter1, text, -1); 1471 uiter_setUTF8(&iter2, bytes, length); 1472 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator"); 1473 1474 /* try again with length=-1 */ 1475 uiter_setUTF8(&iter2, bytes, -1); 1476 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1"); 1477 1478 /* test get/set state */ 1479 length=UPRV_LENGTHOF(text)-1; 1480 uiter_setUTF8(&iter1, bytes, -1); 1481 testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2); 1482 testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1); 1483 1484 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */ 1485 errorCode=U_ZERO_ERROR; 1486 cnv=ucnv_open("UTF-16BE", &errorCode); 1487 length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode); 1488 ucnv_close(cnv); 1489 if(U_FAILURE(errorCode)) { 1490 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode)); 1491 return; 1492 } 1493 1494 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */ 1495 bytes[length]=bytes[length+1]=0; 1496 1497 uiter_setString(&iter1, text, -1); 1498 uiter_setUTF16BE(&iter2, bytes, length); 1499 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator"); 1500 1501 /* try again with length=-1 */ 1502 uiter_setUTF16BE(&iter2, bytes, -1); 1503 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1"); 1504 1505 /* try again after moving the bytes up one, and with length=-1 */ 1506 memmove(bytes+1, bytes, length+2); 1507 uiter_setUTF16BE(&iter2, bytes+1, -1); 1508 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1"); 1509 1510 /* ### TODO test other iterators: CharacterIterator, Replaceable */ 1511 } 1512