1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File CNORMTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda Ported for C API 13 * synwee added test for quick check 14 * synwee added test for checkFCD 15 *********************************************************************************/ 16 /*tests for u_normalization*/ 17 #include "unicode/utypes.h" 18 #include "unicode/unorm.h" 19 #include "unicode/utf16.h" 20 #include "cintltst.h" 21 #include "cmemory.h" 22 23 #if !UCONFIG_NO_NORMALIZATION 24 25 #include <stdlib.h> 26 #include <time.h> 27 #include "unicode/uchar.h" 28 #include "unicode/ustring.h" 29 #include "unicode/unorm.h" 30 #include "cnormtst.h" 31 32 static void 33 TestAPI(void); 34 35 static void 36 TestNormCoverage(void); 37 38 static void 39 TestConcatenate(void); 40 41 static void 42 TestNextPrevious(void); 43 44 static void TestIsNormalized(void); 45 46 static void 47 TestFCNFKCClosure(void); 48 49 static void 50 TestQuickCheckPerCP(void); 51 52 static void 53 TestComposition(void); 54 55 static void 56 TestFCD(void); 57 58 static void 59 TestGetDecomposition(void); 60 61 static void 62 TestGetRawDecomposition(void); 63 64 static void TestAppendRestoreMiddle(void); 65 static void TestGetEasyToUseInstance(void); 66 67 static const char* const canonTests[][3] = { 68 /* Input*/ /*Decomposed*/ /*Composed*/ 69 { "cat", "cat", "cat" }, 70 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", }, 71 72 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/ 73 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/ 74 75 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/ 76 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */ 77 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */ 78 79 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/ 80 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/ 81 82 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/ 83 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/ 84 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/ 85 86 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/ 87 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/ 88 89 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" }, 90 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" }, 91 92 { "Henry IV", "Henry IV", "Henry IV" }, 93 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" }, 94 95 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/ 96 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/ 97 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/ 98 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/ 99 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/ 100 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/ 101 { "", "", "" } 102 }; 103 104 static const char* const compatTests[][3] = { 105 /* Input*/ /*Decomposed */ /*Composed*/ 106 { "cat", "cat", "cat" }, 107 108 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/ 109 110 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" }, 111 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/ 112 113 { "Henry IV", "Henry IV", "Henry IV" }, 114 { "Henry \\u2163", "Henry IV", "Henry IV" }, 115 116 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/ 117 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/ 118 119 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/ 120 121 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/ 122 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/ 123 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/ 124 { "", "", "" } 125 }; 126 127 static const char* const fcdTests[][3] = { 128 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */ 129 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */ 130 { "\\u010e", "\\u010e", NULL } /* D-caron */ 131 }; 132 133 void addNormTest(TestNode** root); 134 135 void addNormTest(TestNode** root) 136 { 137 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI"); 138 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp"); 139 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp"); 140 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose"); 141 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose"); 142 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD"); 143 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull"); 144 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck"); 145 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP"); 146 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized"); 147 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD"); 148 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage"); 149 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate"); 150 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious"); 151 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure"); 152 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition"); 153 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition"); 154 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition"); 155 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle"); 156 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance"); 157 } 158 159 static const char* const modeStrings[]={ 160 "UNORM_NONE", 161 "UNORM_NFD", 162 "UNORM_NFKD", 163 "UNORM_NFC", 164 "UNORM_NFKC", 165 "UNORM_FCD", 166 "UNORM_MODE_COUNT" 167 }; 168 169 static void TestNormCases(UNormalizationMode mode, 170 const char* const cases[][3], int32_t lengthOfCases) { 171 int32_t x, neededLen, length2; 172 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1; 173 UChar *source=NULL; 174 UChar result[16]; 175 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]); 176 for(x=0; x < lengthOfCases; x++) 177 { 178 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR; 179 source=CharsToUChars(cases[x][0]); 180 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status); 181 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2); 182 if(neededLen!=length2) { 183 log_err("ERROR in unorm_normalize(%s)[%d]: " 184 "preflight length/NUL %d!=%d preflight length/srcLength\n", 185 modeStrings[mode], (int)x, (int)neededLen, (int)length2); 186 } 187 if(status==U_BUFFER_OVERFLOW_ERROR) 188 { 189 status=U_ZERO_ERROR; 190 } 191 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status); 192 if(U_FAILURE(status) || neededLen!=length2) { 193 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n", 194 modeStrings[mode], austrdup(source), myErrorName(status)); 195 } else { 196 assertEqual(result, cases[x][expIndex], x); 197 } 198 length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status); 199 if(U_FAILURE(status) || neededLen!=length2) { 200 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n", 201 modeStrings[mode], austrdup(source), myErrorName(status)); 202 } else { 203 assertEqual(result, cases[x][expIndex], x); 204 } 205 free(source); 206 } 207 } 208 209 void TestDecomp() { 210 TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests)); 211 } 212 213 void TestCompatDecomp() { 214 TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests)); 215 } 216 217 void TestCanonDecompCompose() { 218 TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests)); 219 } 220 221 void TestCompatDecompCompose() { 222 TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests)); 223 } 224 225 void TestFCD() { 226 TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests)); 227 } 228 229 static void assertEqual(const UChar* result, const char* expected, int32_t index) 230 { 231 UChar *expectedUni = CharsToUChars(expected); 232 if(u_strcmp(result, expectedUni)!=0){ 233 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected, 234 austrdup(result) ); 235 } 236 free(expectedUni); 237 } 238 239 static void TestNull_check(UChar *src, int32_t srcLen, 240 UChar *exp, int32_t expLen, 241 UNormalizationMode mode, 242 const char *name) 243 { 244 UErrorCode status = U_ZERO_ERROR; 245 int32_t len, i; 246 247 UChar result[50]; 248 249 250 status = U_ZERO_ERROR; 251 252 for(i=0;i<50;i++) 253 { 254 result[i] = 0xFFFD; 255 } 256 257 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status); 258 259 if(U_FAILURE(status)) { 260 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status)); 261 } else if (len != expLen) { 262 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len); 263 } 264 265 { 266 for(i=0;i<len;i++){ 267 if(exp[i] != result[i]) { 268 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n", 269 name, 270 i, 271 exp[i], 272 result[i]); 273 return; 274 } 275 log_verbose(" %d: \\u%04X\n", i, result[i]); 276 } 277 } 278 279 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name); 280 } 281 282 void TestNull() 283 { 284 285 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 }; 286 int32_t source_comp_len = 4; 287 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a }; 288 int32_t expect_comp_len = 3; 289 290 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 }; 291 int32_t source_dcmp_len = 3; 292 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C }; 293 int32_t expect_dcmp_len = 5; 294 295 TestNull_check(source_comp, 296 source_comp_len, 297 expect_comp, 298 expect_comp_len, 299 UNORM_NFC, 300 "UNORM_NFC"); 301 302 TestNull_check(source_dcmp, 303 source_dcmp_len, 304 expect_dcmp, 305 expect_dcmp_len, 306 UNORM_NFD, 307 "UNORM_NFD"); 308 309 TestNull_check(source_comp, 310 source_comp_len, 311 expect_comp, 312 expect_comp_len, 313 UNORM_NFKC, 314 "UNORM_NFKC"); 315 316 317 } 318 319 static void TestQuickCheckResultNO() 320 { 321 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C, 322 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E}; 323 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB, 324 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E}; 325 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE, 326 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; 327 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE, 328 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; 329 330 331 const int SIZE = 10; 332 333 int count = 0; 334 UErrorCode error = U_ZERO_ERROR; 335 336 for (; count < SIZE; count ++) 337 { 338 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != 339 UNORM_NO) 340 { 341 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); 342 return; 343 } 344 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != 345 UNORM_NO) 346 { 347 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); 348 return; 349 } 350 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != 351 UNORM_NO) 352 { 353 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); 354 return; 355 } 356 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 357 UNORM_NO) 358 { 359 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); 360 return; 361 } 362 } 363 } 364 365 366 static void TestQuickCheckResultYES() 367 { 368 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A, 369 0x2261, 0x3075, 0x4000, 0x5000, 0xF000}; 370 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500, 371 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000}; 372 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB, 373 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27}; 374 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000, 375 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E}; 376 377 const int SIZE = 10; 378 int count = 0; 379 UErrorCode error = U_ZERO_ERROR; 380 381 UChar cp = 0; 382 while (cp < 0xA0) 383 { 384 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES) 385 { 386 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp); 387 return; 388 } 389 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) != 390 UNORM_YES) 391 { 392 log_err("ERROR in NFC quick check at U+%04x\n", cp); 393 return; 394 } 395 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES) 396 { 397 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp); 398 return; 399 } 400 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) != 401 UNORM_YES) 402 { 403 log_err("ERROR in NFKC quick check at U+%04x\n", cp); 404 return; 405 } 406 cp ++; 407 } 408 409 for (; count < SIZE; count ++) 410 { 411 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != 412 UNORM_YES) 413 { 414 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); 415 return; 416 } 417 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) 418 != UNORM_YES) 419 { 420 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); 421 return; 422 } 423 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != 424 UNORM_YES) 425 { 426 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); 427 return; 428 } 429 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 430 UNORM_YES) 431 { 432 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); 433 return; 434 } 435 } 436 } 437 438 static void TestQuickCheckResultMAYBE() 439 { 440 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161, 441 0x116A, 0x1173, 0x1175, 0x3099, 0x309A}; 442 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E, 443 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099}; 444 445 446 const int SIZE = 10; 447 448 int count = 0; 449 UErrorCode error = U_ZERO_ERROR; 450 451 /* NFD and NFKD does not have any MAYBE codepoints */ 452 for (; count < SIZE; count ++) 453 { 454 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != 455 UNORM_MAYBE) 456 { 457 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]); 458 return; 459 } 460 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 461 UNORM_MAYBE) 462 { 463 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); 464 return; 465 } 466 } 467 } 468 469 static void TestQuickCheckStringResult() 470 { 471 int count; 472 UChar *d = NULL; 473 UChar *c = NULL; 474 UErrorCode error = U_ZERO_ERROR; 475 476 for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++) 477 { 478 d = CharsToUChars(canonTests[count][1]); 479 c = CharsToUChars(canonTests[count][2]); 480 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) != 481 UNORM_YES) 482 { 483 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count); 484 return; 485 } 486 487 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) == 488 UNORM_NO) 489 { 490 log_err("ERROR in NFC quick check for string at count %d\n", count); 491 return; 492 } 493 494 free(d); 495 free(c); 496 } 497 498 for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++) 499 { 500 d = CharsToUChars(compatTests[count][1]); 501 c = CharsToUChars(compatTests[count][2]); 502 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) != 503 UNORM_YES) 504 { 505 log_data_err("ERROR in NFKD quick check for string at count %d\n", count); 506 return; 507 } 508 509 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) != 510 UNORM_YES) 511 { 512 log_err("ERROR in NFKC quick check for string at count %d\n", count); 513 return; 514 } 515 516 free(d); 517 free(c); 518 } 519 } 520 521 void TestQuickCheck() 522 { 523 TestQuickCheckResultNO(); 524 TestQuickCheckResultYES(); 525 TestQuickCheckResultMAYBE(); 526 TestQuickCheckStringResult(); 527 } 528 529 /* 530 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_ 531 * normalized, and some that are not. 532 * Here we pick some specific cases and test the C API. 533 */ 534 static void TestIsNormalized(void) { 535 static const UChar notNFC[][8]={ /* strings that are not in NFC */ 536 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */ 537 { 0xfb1d, 0 }, /* excluded from composition */ 538 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */ 539 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */ 540 }; 541 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */ 542 { 0x1100, 0x1161, 0 }, /* Jamo compose */ 543 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */ 544 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */ 545 }; 546 547 int32_t i; 548 UErrorCode errorCode; 549 550 /* API test */ 551 552 /* normal case with length>=0 (length -1 used for special cases below) */ 553 errorCode=U_ZERO_ERROR; 554 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) { 555 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 556 } 557 558 /* incoming U_FAILURE */ 559 errorCode=U_TRUNCATED_CHAR_FOUND; 560 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode); 561 if(errorCode!=U_TRUNCATED_CHAR_FOUND) { 562 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode)); 563 } 564 565 /* NULL source */ 566 errorCode=U_ZERO_ERROR; 567 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode); 568 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 569 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); 570 } 571 572 /* bad length */ 573 errorCode=U_ZERO_ERROR; 574 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode); 575 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 576 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); 577 } 578 579 /* specific cases */ 580 for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) { 581 errorCode=U_ZERO_ERROR; 582 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) { 583 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode)); 584 } 585 errorCode=U_ZERO_ERROR; 586 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) { 587 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode)); 588 } 589 } 590 for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) { 591 errorCode=U_ZERO_ERROR; 592 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) { 593 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode)); 594 } 595 } 596 } 597 598 void TestCheckFCD() 599 { 600 UErrorCode status = U_ZERO_ERROR; 601 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 602 0x0A}; 603 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301, 604 0x02B9, 0x0314, 0x0315, 0x0316}; 605 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7, 606 0x0050, 0x0730, 0x09EE, 0x1E10}; 607 608 static const UChar datastr[][5] = 609 { {0x0061, 0x030A, 0x1E05, 0x0302, 0}, 610 {0x0061, 0x030A, 0x00E2, 0x0323, 0}, 611 {0x0061, 0x0323, 0x00E2, 0x0323, 0}, 612 {0x0061, 0x0323, 0x1E05, 0x0302, 0} }; 613 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES}; 614 615 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 616 0x6a, 617 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 618 0xea, 619 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 620 0x0307, 0x0308, 0x0309, 0x030a, 621 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 622 0x0327, 0x0328, 0x0329, 0x032a, 623 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06, 624 0x1e07, 0x1e08, 0x1e09, 0x1e0a}; 625 626 int count = 0; 627 628 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES) 629 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n"); 630 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO) 631 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n"); 632 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES) 633 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n"); 634 635 if (U_FAILURE(status)) 636 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status)); 637 638 while (count < 4) 639 { 640 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status); 641 if (U_FAILURE(status)) { 642 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count); 643 break; 644 } 645 else { 646 if (result[count] != fcdresult) { 647 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count, 648 result[count]); 649 } 650 } 651 count ++; 652 } 653 654 /* random checks of long strings */ 655 status = U_ZERO_ERROR; 656 srand((unsigned)time( NULL )); 657 658 for (count = 0; count < 50; count ++) 659 { 660 int size = 0; 661 UBool testresult = UNORM_YES; 662 UChar data[20]; 663 UChar norm[100]; 664 UChar nfd[100]; 665 int normsize = 0; 666 int nfdsize = 0; 667 668 while (size != 19) { 669 data[size] = datachar[(rand() * 50) / RAND_MAX]; 670 log_verbose("0x%x", data[size]); 671 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0, 672 norm + normsize, 100 - normsize, &status); 673 if (U_FAILURE(status)) { 674 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n"); 675 break; 676 } 677 size ++; 678 } 679 log_verbose("\n"); 680 681 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0, 682 nfd, 100, &status); 683 if (U_FAILURE(status)) { 684 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n"); 685 } 686 687 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) { 688 testresult = UNORM_NO; 689 } 690 if (testresult == UNORM_YES) { 691 log_verbose("result UNORM_YES\n"); 692 } 693 else { 694 log_verbose("result UNORM_NO\n"); 695 } 696 697 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) { 698 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult); 699 } 700 } 701 } 702 703 static void 704 TestAPI() { 705 static const UChar in[]={ 0x68, 0xe4 }; 706 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff }; 707 UErrorCode errorCode; 708 int32_t length; 709 710 /* try preflighting */ 711 errorCode=U_ZERO_ERROR; 712 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode); 713 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { 714 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 715 return; 716 } 717 718 errorCode=U_ZERO_ERROR; 719 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode); 720 if(U_FAILURE(errorCode)) { 721 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode)); 722 return; 723 } 724 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) { 725 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]); 726 return; 727 } 728 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode); 729 if(U_FAILURE(errorCode)) { 730 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode)); 731 return; 732 } 733 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode); 734 if(U_FAILURE(errorCode)) { 735 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode)); 736 return; 737 } 738 } 739 740 /* test cases to improve test code coverage */ 741 enum { 742 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */ 743 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */ 744 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */ 745 746 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */ 747 HANGUL_WEO=0x116f, /* Jamo V U+116f */ 748 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */ 749 750 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */ 751 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */ 752 753 MUSICAL_VOID_NOTEHEAD=0x1d157, 754 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */ 755 MUSICAL_STEM=0x1d165, /* cc=216 */ 756 MUSICAL_STACCATO=0x1d17c /* cc=220 */ 757 }; 758 759 static void 760 TestNormCoverage() { 761 UChar input[1000], expect[1000], output[1000]; 762 UErrorCode errorCode; 763 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength; 764 765 /* create a long and nasty string with NFKC-unsafe characters */ 766 inLength=0; 767 768 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */ 769 input[inLength++]=HANGUL_KIYEOK; 770 input[inLength++]=HANGUL_WEO; 771 input[inLength++]=HANGUL_KIYEOK_SIOS; 772 773 input[inLength++]=HANGUL_KIYEOK; 774 input[inLength++]=HANGUL_WEO; 775 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 776 777 input[inLength++]=HANGUL_KIYEOK; 778 input[inLength++]=HANGUL_K_WEO; 779 input[inLength++]=HANGUL_KIYEOK_SIOS; 780 781 input[inLength++]=HANGUL_KIYEOK; 782 input[inLength++]=HANGUL_K_WEO; 783 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 784 785 input[inLength++]=HANGUL_K_KIYEOK; 786 input[inLength++]=HANGUL_WEO; 787 input[inLength++]=HANGUL_KIYEOK_SIOS; 788 789 input[inLength++]=HANGUL_K_KIYEOK; 790 input[inLength++]=HANGUL_WEO; 791 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 792 793 input[inLength++]=HANGUL_K_KIYEOK; 794 input[inLength++]=HANGUL_K_WEO; 795 input[inLength++]=HANGUL_KIYEOK_SIOS; 796 797 input[inLength++]=HANGUL_K_KIYEOK; 798 input[inLength++]=HANGUL_K_WEO; 799 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 800 801 /* Hangul LV with normal/compatibility Jamo T */ 802 input[inLength++]=HANGUL_AC00; 803 input[inLength++]=HANGUL_KIYEOK_SIOS; 804 805 input[inLength++]=HANGUL_AC00; 806 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 807 808 /* compatibility Jamo L, V */ 809 input[inLength++]=HANGUL_K_KIYEOK; 810 input[inLength++]=HANGUL_K_WEO; 811 812 hangulPrefixLength=inLength; 813 814 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE); 815 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE); 816 for(i=0; i<200; ++i) { 817 input[inLength++]=U16_LEAD(MUSICAL_STACCATO); 818 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO); 819 input[inLength++]=U16_LEAD(MUSICAL_STEM); 820 input[inLength++]=U16_TRAIL(MUSICAL_STEM); 821 } 822 823 /* (compatibility) Jamo L, T do not compose */ 824 input[inLength++]=HANGUL_K_KIYEOK; 825 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 826 827 /* quick checks */ 828 errorCode=U_ZERO_ERROR; 829 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) { 830 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 831 } 832 errorCode=U_ZERO_ERROR; 833 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) { 834 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 835 } 836 errorCode=U_ZERO_ERROR; 837 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) { 838 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 839 } 840 errorCode=U_ZERO_ERROR; 841 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) { 842 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 843 } 844 errorCode=U_ZERO_ERROR; 845 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) { 846 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 847 } 848 849 /* NFKC */ 850 expectLength=0; 851 expect[expectLength++]=HANGUL_SYLLABLE; 852 853 expect[expectLength++]=HANGUL_SYLLABLE; 854 855 expect[expectLength++]=HANGUL_SYLLABLE; 856 857 expect[expectLength++]=HANGUL_SYLLABLE; 858 859 expect[expectLength++]=HANGUL_SYLLABLE; 860 861 expect[expectLength++]=HANGUL_SYLLABLE; 862 863 expect[expectLength++]=HANGUL_SYLLABLE; 864 865 expect[expectLength++]=HANGUL_SYLLABLE; 866 867 expect[expectLength++]=HANGUL_AC00+3; 868 869 expect[expectLength++]=HANGUL_AC00+3; 870 871 expect[expectLength++]=HANGUL_AC00+14*28; 872 873 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD); 874 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD); 875 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 876 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 877 for(i=0; i<200; ++i) { 878 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 879 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 880 } 881 for(i=0; i<200; ++i) { 882 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO); 883 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO); 884 } 885 886 expect[expectLength++]=HANGUL_KIYEOK; 887 expect[expectLength++]=HANGUL_KIYEOK_SIOS; 888 889 /* try destination overflow first */ 890 errorCode=U_ZERO_ERROR; 891 preflightLength=unorm_normalize(input, inLength, 892 UNORM_NFKC, 0, 893 output, 100, /* too short */ 894 &errorCode); 895 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { 896 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode)); 897 } 898 899 /* real NFKC */ 900 errorCode=U_ZERO_ERROR; 901 length=unorm_normalize(input, inLength, 902 UNORM_NFKC, 0, 903 output, sizeof(output)/U_SIZEOF_UCHAR, 904 &errorCode); 905 if(U_FAILURE(errorCode)) { 906 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode)); 907 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { 908 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n"); 909 for(i=0; i<length; ++i) { 910 if(output[i]!=expect[i]) { 911 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]); 912 break; 913 } 914 } 915 } 916 if(length!=preflightLength) { 917 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength); 918 } 919 920 /* FCD */ 921 u_memcpy(expect, input, hangulPrefixLength); 922 expectLength=hangulPrefixLength; 923 924 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD); 925 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD); 926 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 927 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 928 for(i=0; i<200; ++i) { 929 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 930 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 931 } 932 for(i=0; i<200; ++i) { 933 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO); 934 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO); 935 } 936 937 expect[expectLength++]=HANGUL_K_KIYEOK; 938 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS; 939 940 errorCode=U_ZERO_ERROR; 941 length=unorm_normalize(input, inLength, 942 UNORM_FCD, 0, 943 output, sizeof(output)/U_SIZEOF_UCHAR, 944 &errorCode); 945 if(U_FAILURE(errorCode)) { 946 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode)); 947 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { 948 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n"); 949 for(i=0; i<length; ++i) { 950 if(output[i]!=expect[i]) { 951 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]); 952 break; 953 } 954 } 955 } 956 } 957 958 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */ 959 static void 960 TestConcatenate(void) { 961 /* "re + 'sume'" */ 962 static const UChar 963 left[]={ 964 0x72, 0x65, 0 965 }, 966 right[]={ 967 0x301, 0x73, 0x75, 0x6d, 0xe9, 0 968 }, 969 expect[]={ 970 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0 971 }; 972 973 UChar buffer[100]; 974 UErrorCode errorCode; 975 int32_t length; 976 977 /* left with length, right NUL-terminated */ 978 errorCode=U_ZERO_ERROR; 979 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode); 980 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) { 981 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 982 } 983 984 /* preflighting */ 985 errorCode=U_ZERO_ERROR; 986 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode); 987 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) { 988 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 989 } 990 991 buffer[2]=0x5555; 992 errorCode=U_ZERO_ERROR; 993 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode); 994 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) { 995 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 996 } 997 998 /* enter with U_FAILURE */ 999 buffer[2]=0xaaaa; 1000 errorCode=U_UNEXPECTED_TOKEN; 1001 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode); 1002 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) { 1003 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode)); 1004 } 1005 1006 /* illegal arguments */ 1007 buffer[2]=0xaaaa; 1008 errorCode=U_ZERO_ERROR; 1009 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode); 1010 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) { 1011 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 1012 } 1013 1014 errorCode=U_ZERO_ERROR; 1015 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode); 1016 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1017 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 1018 } 1019 } 1020 1021 enum { 1022 _PLUS=0x2b 1023 }; 1024 1025 static const char *const _modeString[UNORM_MODE_COUNT]={ 1026 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD" 1027 }; 1028 1029 static void 1030 _testIter(const UChar *src, int32_t srcLength, 1031 UCharIterator *iter, UNormalizationMode mode, UBool forward, 1032 const UChar *out, int32_t outLength, 1033 const int32_t *srcIndexes, int32_t srcIndexesLength) { 1034 UChar buffer[4]; 1035 const UChar *expect, *outLimit, *in; 1036 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength; 1037 UErrorCode errorCode; 1038 UBool neededToNormalize, expectNeeded; 1039 1040 errorCode=U_ZERO_ERROR; 1041 outLimit=out+outLength; 1042 if(forward) { 1043 expect=out; 1044 i=index=0; 1045 } else { 1046 expect=outLimit; 1047 i=srcIndexesLength-2; 1048 index=srcLength; 1049 } 1050 1051 for(;;) { 1052 prevIndex=index; 1053 if(forward) { 1054 if(!iter->hasNext(iter)) { 1055 return; 1056 } 1057 length=unorm_next(iter, 1058 buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1059 mode, 0, 1060 (UBool)(out!=NULL), &neededToNormalize, 1061 &errorCode); 1062 expectIndex=srcIndexes[i+1]; 1063 in=src+prevIndex; 1064 inLength=expectIndex-prevIndex; 1065 1066 if(out!=NULL) { 1067 /* get output piece from between plus signs */ 1068 expectLength=0; 1069 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) { 1070 ++expectLength; 1071 } 1072 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); 1073 } else { 1074 expect=in; 1075 expectLength=inLength; 1076 expectNeeded=FALSE; 1077 } 1078 } else { 1079 if(!iter->hasPrevious(iter)) { 1080 return; 1081 } 1082 length=unorm_previous(iter, 1083 buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1084 mode, 0, 1085 (UBool)(out!=NULL), &neededToNormalize, 1086 &errorCode); 1087 expectIndex=srcIndexes[i]; 1088 in=src+expectIndex; 1089 inLength=prevIndex-expectIndex; 1090 1091 if(out!=NULL) { 1092 /* get output piece from between plus signs */ 1093 expectLength=0; 1094 while(expect!=out && expect[-1]!=_PLUS) { 1095 ++expectLength; 1096 --expect; 1097 } 1098 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); 1099 } else { 1100 expect=in; 1101 expectLength=inLength; 1102 expectNeeded=FALSE; 1103 } 1104 } 1105 index=iter->getIndex(iter, UITER_CURRENT); 1106 1107 if(U_FAILURE(errorCode)) { 1108 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n", 1109 forward, _modeString[mode], i, u_errorName(errorCode)); 1110 return; 1111 } 1112 if(expectIndex!=index) { 1113 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n", 1114 forward, _modeString[mode], i, index, expectIndex); 1115 return; 1116 } 1117 if(expectLength!=length) { 1118 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n", 1119 forward, _modeString[mode], i, length, expectLength); 1120 return; 1121 } 1122 if(0!=u_memcmp(expect, buffer, length)) { 1123 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n", 1124 forward, _modeString[mode], i); 1125 return; 1126 } 1127 if(neededToNormalize!=expectNeeded) { 1128 } 1129 1130 if(forward) { 1131 expect+=expectLength+1; /* go after the + */ 1132 ++i; 1133 } else { 1134 --expect; /* go before the + */ 1135 --i; 1136 } 1137 } 1138 } 1139 1140 static void 1141 TestNextPrevious() { 1142 static const UChar 1143 src[]={ /* input string */ 1144 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133 1145 }, 1146 nfd[]={ /* + separates expected output pieces */ 1147 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133 1148 }, 1149 nfkd[]={ 1150 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa 1151 }, 1152 nfc[]={ 1153 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133 1154 }, 1155 nfkc[]={ 1156 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03 1157 }, 1158 fcd[]={ 1159 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133 1160 }; 1161 1162 /* expected iterator indexes in the source string for each iteration piece */ 1163 static const int32_t 1164 nfdIndexes[]={ 1165 0, 1, 2, 5, 6, 7 1166 }, 1167 nfkdIndexes[]={ 1168 0, 1, 2, 5, 6, 7 1169 }, 1170 nfcIndexes[]={ 1171 0, 1, 2, 5, 6, 7 1172 }, 1173 nfkcIndexes[]={ 1174 0, 1, 2, 5, 7 1175 }, 1176 fcdIndexes[]={ 1177 0, 1, 2, 5, 6, 7 1178 }; 1179 1180 UCharIterator iter; 1181 1182 UChar buffer[4]; 1183 int32_t length; 1184 1185 UBool neededToNormalize; 1186 UErrorCode errorCode; 1187 1188 uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR); 1189 1190 /* test iteration with doNormalize */ 1191 iter.index=0; 1192 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4); 1193 iter.index=0; 1194 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4); 1195 iter.index=0; 1196 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4); 1197 iter.index=0; 1198 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4); 1199 iter.index=0; 1200 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4); 1201 1202 iter.index=iter.length; 1203 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4); 1204 iter.index=iter.length; 1205 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4); 1206 iter.index=iter.length; 1207 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4); 1208 iter.index=iter.length; 1209 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4); 1210 iter.index=iter.length; 1211 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4); 1212 1213 /* test iteration without doNormalize */ 1214 iter.index=0; 1215 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4); 1216 iter.index=0; 1217 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4); 1218 iter.index=0; 1219 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4); 1220 iter.index=0; 1221 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4); 1222 iter.index=0; 1223 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4); 1224 1225 iter.index=iter.length; 1226 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4); 1227 iter.index=iter.length; 1228 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4); 1229 iter.index=iter.length; 1230 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4); 1231 iter.index=iter.length; 1232 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4); 1233 iter.index=iter.length; 1234 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4); 1235 1236 /* try without neededToNormalize */ 1237 errorCode=U_ZERO_ERROR; 1238 buffer[0]=5; 1239 iter.index=1; 1240 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1241 UNORM_NFD, 0, TRUE, NULL, 1242 &errorCode); 1243 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) { 1244 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode)); 1245 return; 1246 } 1247 1248 /* preflight */ 1249 neededToNormalize=9; 1250 iter.index=1; 1251 length=unorm_next(&iter, NULL, 0, 1252 UNORM_NFD, 0, TRUE, &neededToNormalize, 1253 &errorCode); 1254 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) { 1255 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode)); 1256 return; 1257 } 1258 1259 errorCode=U_ZERO_ERROR; 1260 buffer[0]=buffer[1]=5; 1261 neededToNormalize=9; 1262 iter.index=1; 1263 length=unorm_next(&iter, buffer, 1, 1264 UNORM_NFD, 0, TRUE, &neededToNormalize, 1265 &errorCode); 1266 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) { 1267 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode)); 1268 return; 1269 } 1270 1271 /* no iterator */ 1272 errorCode=U_ZERO_ERROR; 1273 buffer[0]=buffer[1]=5; 1274 neededToNormalize=9; 1275 iter.index=1; 1276 length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1277 UNORM_NFD, 0, TRUE, &neededToNormalize, 1278 &errorCode); 1279 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1280 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode)); 1281 return; 1282 } 1283 1284 /* illegal mode */ 1285 buffer[0]=buffer[1]=5; 1286 neededToNormalize=9; 1287 iter.index=1; 1288 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1289 (UNormalizationMode)0, 0, TRUE, &neededToNormalize, 1290 &errorCode); 1291 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1292 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode)); 1293 return; 1294 } 1295 1296 /* error coming in */ 1297 errorCode=U_MISPLACED_QUANTIFIER; 1298 buffer[0]=5; 1299 iter.index=1; 1300 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1301 UNORM_NFD, 0, TRUE, NULL, 1302 &errorCode); 1303 if(errorCode!=U_MISPLACED_QUANTIFIER) { 1304 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode)); 1305 return; 1306 } 1307 } 1308 1309 static void 1310 TestFCNFKCClosure(void) { 1311 static const struct { 1312 UChar32 c; 1313 const UChar s[6]; 1314 } tests[]={ 1315 { 0x00C4, { 0 } }, 1316 { 0x00E4, { 0 } }, 1317 { 0x037A, { 0x0020, 0x03B9, 0 } }, 1318 { 0x03D2, { 0x03C5, 0 } }, 1319 { 0x20A8, { 0x0072, 0x0073, 0 } }, 1320 { 0x210B, { 0x0068, 0 } }, 1321 { 0x210C, { 0x0068, 0 } }, 1322 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } }, 1323 { 0x2122, { 0x0074, 0x006D, 0 } }, 1324 { 0x2128, { 0x007A, 0 } }, 1325 { 0x1D5DB, { 0x0068, 0 } }, 1326 { 0x1D5ED, { 0x007A, 0 } }, 1327 { 0x0061, { 0 } } 1328 }; 1329 1330 UChar buffer[8]; 1331 UErrorCode errorCode; 1332 int32_t i, length; 1333 1334 for(i=0; i<UPRV_LENGTHOF(tests); ++i) { 1335 errorCode=U_ZERO_ERROR; 1336 length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode); 1337 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) { 1338 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode)); 1339 } 1340 } 1341 1342 /* error handling */ 1343 errorCode=U_ZERO_ERROR; 1344 length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode); 1345 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1346 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode)); 1347 } 1348 1349 length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode); 1350 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1351 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode)); 1352 } 1353 } 1354 1355 static void 1356 TestQuickCheckPerCP() { 1357 UErrorCode errorCode; 1358 UChar32 c, lead, trail; 1359 UChar s[U16_MAX_LENGTH], nfd[16]; 1360 int32_t length, lccc1, lccc2, tccc1, tccc2; 1361 int32_t qc1, qc2; 1362 1363 if( 1364 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES || 1365 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES || 1366 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE || 1367 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE || 1368 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) || 1369 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) 1370 ) { 1371 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n"); 1372 } 1373 1374 /* 1375 * compare the quick check property values for some code points 1376 * to the quick check results for checking same-code point strings 1377 */ 1378 errorCode=U_ZERO_ERROR; 1379 c=0; 1380 while(c<0x110000) { 1381 length=0; 1382 U16_APPEND_UNSAFE(s, length, c); 1383 1384 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK); 1385 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode); 1386 if(qc1!=qc2) { 1387 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1388 } 1389 1390 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK); 1391 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode); 1392 if(qc1!=qc2) { 1393 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1394 } 1395 1396 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK); 1397 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode); 1398 if(qc1!=qc2) { 1399 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1400 } 1401 1402 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK); 1403 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode); 1404 if(qc1!=qc2) { 1405 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1406 } 1407 1408 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode); 1409 /* length-length == 0 is used to get around a compiler warning. */ 1410 U16_GET(nfd, 0, length-length, length, lead); 1411 U16_GET(nfd, 0, length-1, length, trail); 1412 1413 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS); 1414 lccc2=u_getCombiningClass(lead); 1415 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS); 1416 tccc2=u_getCombiningClass(trail); 1417 1418 if(lccc1!=lccc2) { 1419 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n", 1420 lccc1, lccc2, c); 1421 } 1422 if(tccc1!=tccc2) { 1423 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n", 1424 tccc1, tccc2, c); 1425 } 1426 1427 /* skip some code points */ 1428 c=(20*c)/19+1; 1429 } 1430 } 1431 1432 static void 1433 TestComposition(void) { 1434 static const struct { 1435 UNormalizationMode mode; 1436 uint32_t options; 1437 UChar input[12]; 1438 UChar expect[12]; 1439 } cases[]={ 1440 /* 1441 * special cases for UAX #15 bug 1442 * see Unicode Corrigendum #5: Normalization Idempotency 1443 * at http://unicode.org/versions/corrigendum5.html 1444 * (was Public Review Issue #29) 1445 */ 1446 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } }, 1447 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } }, 1448 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } }, 1449 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } }, 1450 1451 /* TODO: add test cases for UNORM_FCC here (j2151) */ 1452 }; 1453 1454 UChar output[16]; 1455 UErrorCode errorCode; 1456 int32_t i, length; 1457 1458 for(i=0; i<UPRV_LENGTHOF(cases); ++i) { 1459 errorCode=U_ZERO_ERROR; 1460 length=unorm_normalize( 1461 cases[i].input, -1, 1462 cases[i].mode, cases[i].options, 1463 output, UPRV_LENGTHOF(output), 1464 &errorCode); 1465 if( U_FAILURE(errorCode) || 1466 length!=u_strlen(cases[i].expect) || 1467 0!=u_memcmp(output, cases[i].expect, length) 1468 ) { 1469 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i); 1470 } 1471 } 1472 } 1473 1474 static void 1475 TestGetDecomposition() { 1476 UChar decomp[32]; 1477 int32_t length; 1478 1479 UErrorCode errorCode=U_ZERO_ERROR; 1480 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode); 1481 if(U_FAILURE(errorCode)) { 1482 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode)); 1483 return; 1484 } 1485 1486 length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1487 if(U_FAILURE(errorCode) || length>=0) { 1488 log_err("unorm2_getDecomposition(fcc, space) failed\n"); 1489 } 1490 errorCode=U_ZERO_ERROR; 1491 length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1492 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) { 1493 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n"); 1494 } 1495 errorCode=U_ZERO_ERROR; 1496 length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1497 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) { 1498 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n"); 1499 } 1500 errorCode=U_ZERO_ERROR; 1501 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode); 1502 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { 1503 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n"); 1504 } 1505 errorCode=U_ZERO_ERROR; 1506 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode); 1507 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1508 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n"); 1509 } 1510 errorCode=U_ZERO_ERROR; 1511 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode); 1512 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1513 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n"); 1514 } 1515 } 1516 1517 static void 1518 TestGetRawDecomposition() { 1519 UChar decomp[32]; 1520 int32_t length; 1521 1522 UErrorCode errorCode=U_ZERO_ERROR; 1523 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode); 1524 if(U_FAILURE(errorCode)) { 1525 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode)); 1526 return; 1527 } 1528 /* 1529 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values, 1530 * without recursive decomposition. 1531 */ 1532 1533 length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1534 if(U_FAILURE(errorCode) || length>=0) { 1535 log_err("unorm2_getDecomposition(nfkc, space) failed\n"); 1536 } 1537 errorCode=U_ZERO_ERROR; 1538 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1539 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) { 1540 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n"); 1541 } 1542 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */ 1543 errorCode=U_ZERO_ERROR; 1544 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1545 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) { 1546 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n"); 1547 } 1548 /* U+212B ANGSTROM SIGN */ 1549 errorCode=U_ZERO_ERROR; 1550 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1551 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) { 1552 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n"); 1553 } 1554 errorCode=U_ZERO_ERROR; 1555 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1556 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) { 1557 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n"); 1558 } 1559 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */ 1560 errorCode=U_ZERO_ERROR; 1561 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1562 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) { 1563 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n"); 1564 } 1565 errorCode=U_ZERO_ERROR; 1566 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode); 1567 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) { 1568 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n"); 1569 } 1570 errorCode=U_ZERO_ERROR; 1571 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode); 1572 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1573 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n"); 1574 } 1575 errorCode=U_ZERO_ERROR; 1576 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode); 1577 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1578 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n"); 1579 } 1580 } 1581 1582 static void 1583 TestAppendRestoreMiddle() { 1584 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */ 1585 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */ 1586 /* NFC: C5 is 'A with ring above' */ 1587 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 }; 1588 int32_t length; 1589 UErrorCode errorCode=U_ZERO_ERROR; 1590 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); 1591 if(U_FAILURE(errorCode)) { 1592 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode)); 1593 return; 1594 } 1595 /* 1596 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity. 1597 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A> 1598 * still fits into a[] but the full result still overflows this capacity. 1599 * (Let it modify the destination buffer before reallocating internally.) 1600 */ 1601 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode); 1602 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) { 1603 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length); 1604 return; 1605 } 1606 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */ 1607 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) { 1608 log_err("unorm2_append(overflow) modified the first string\n"); 1609 return; 1610 } 1611 errorCode=U_ZERO_ERROR; 1612 length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode); 1613 if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) { 1614 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length); 1615 return; 1616 } 1617 } 1618 1619 static void 1620 TestGetEasyToUseInstance() { 1621 static const UChar in[]={ 1622 0xA0, /* -> <noBreak> 0020 */ 1623 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */ 1624 }; 1625 UChar out[32]; 1626 int32_t length; 1627 1628 UErrorCode errorCode=U_ZERO_ERROR; 1629 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); 1630 if(U_FAILURE(errorCode)) { 1631 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode)); 1632 return; 1633 } 1634 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode); 1635 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) { 1636 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n", 1637 (int)length, u_errorName(errorCode)); 1638 } 1639 1640 errorCode=U_ZERO_ERROR; 1641 n2=unorm2_getNFDInstance(&errorCode); 1642 if(U_FAILURE(errorCode)) { 1643 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode)); 1644 return; 1645 } 1646 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode); 1647 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) { 1648 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n", 1649 (int)length, u_errorName(errorCode)); 1650 } 1651 1652 errorCode=U_ZERO_ERROR; 1653 n2=unorm2_getNFKCInstance(&errorCode); 1654 if(U_FAILURE(errorCode)) { 1655 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode)); 1656 return; 1657 } 1658 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode); 1659 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) { 1660 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n", 1661 (int)length, u_errorName(errorCode)); 1662 } 1663 1664 errorCode=U_ZERO_ERROR; 1665 n2=unorm2_getNFKDInstance(&errorCode); 1666 if(U_FAILURE(errorCode)) { 1667 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode)); 1668 return; 1669 } 1670 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode); 1671 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) { 1672 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n", 1673 (int)length, u_errorName(errorCode)); 1674 } 1675 1676 errorCode=U_ZERO_ERROR; 1677 n2=unorm2_getNFKCCasefoldInstance(&errorCode); 1678 if(U_FAILURE(errorCode)) { 1679 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode)); 1680 return; 1681 } 1682 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode); 1683 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) { 1684 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n", 1685 (int)length, u_errorName(errorCode)); 1686 } 1687 } 1688 1689 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 1690