1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File CNORMTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda Ported for C API 13 * synwee added test for quick check 14 * synwee added test for checkFCD 15 *********************************************************************************/ 16 /*tests for u_normalization*/ 17 #include "unicode/utypes.h" 18 #include "unicode/unorm.h" 19 #include "unicode/utf16.h" 20 #include "cintltst.h" 21 22 #if !UCONFIG_NO_NORMALIZATION 23 24 #include <stdlib.h> 25 #include <time.h> 26 #include "unicode/uchar.h" 27 #include "unicode/ustring.h" 28 #include "unicode/unorm.h" 29 #include "cnormtst.h" 30 31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0])) 32 33 static void 34 TestAPI(void); 35 36 static void 37 TestNormCoverage(void); 38 39 static void 40 TestConcatenate(void); 41 42 static void 43 TestNextPrevious(void); 44 45 static void TestIsNormalized(void); 46 47 static void 48 TestFCNFKCClosure(void); 49 50 static void 51 TestQuickCheckPerCP(void); 52 53 static void 54 TestComposition(void); 55 56 static void 57 TestFCD(void); 58 59 static void 60 TestGetDecomposition(void); 61 62 static void 63 TestGetRawDecomposition(void); 64 65 static void TestAppendRestoreMiddle(void); 66 static void TestGetEasyToUseInstance(void); 67 68 static const char* const canonTests[][3] = { 69 /* Input*/ /*Decomposed*/ /*Composed*/ 70 { "cat", "cat", "cat" }, 71 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", }, 72 73 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/ 74 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/ 75 76 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/ 77 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */ 78 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */ 79 80 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/ 81 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/ 82 83 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/ 84 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/ 85 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/ 86 87 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/ 88 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/ 89 90 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" }, 91 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" }, 92 93 { "Henry IV", "Henry IV", "Henry IV" }, 94 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" }, 95 96 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/ 97 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/ 98 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/ 99 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/ 100 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/ 101 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/ 102 { "", "", "" } 103 }; 104 105 static const char* const compatTests[][3] = { 106 /* Input*/ /*Decomposed */ /*Composed*/ 107 { "cat", "cat", "cat" }, 108 109 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/ 110 111 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" }, 112 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/ 113 114 { "Henry IV", "Henry IV", "Henry IV" }, 115 { "Henry \\u2163", "Henry IV", "Henry IV" }, 116 117 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/ 118 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/ 119 120 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/ 121 122 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/ 123 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/ 124 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/ 125 { "", "", "" } 126 }; 127 128 static const char* const fcdTests[][3] = { 129 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */ 130 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */ 131 { "\\u010e", "\\u010e", NULL } /* D-caron */ 132 }; 133 134 void addNormTest(TestNode** root); 135 136 void addNormTest(TestNode** root) 137 { 138 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI"); 139 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp"); 140 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp"); 141 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose"); 142 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose"); 143 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD"); 144 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull"); 145 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck"); 146 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP"); 147 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized"); 148 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD"); 149 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage"); 150 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate"); 151 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious"); 152 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure"); 153 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition"); 154 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition"); 155 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition"); 156 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle"); 157 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance"); 158 } 159 160 static const char* const modeStrings[]={ 161 "UNORM_NONE", 162 "UNORM_NFD", 163 "UNORM_NFKD", 164 "UNORM_NFC", 165 "UNORM_NFKC", 166 "UNORM_FCD", 167 "UNORM_MODE_COUNT" 168 }; 169 170 static void TestNormCases(UNormalizationMode mode, 171 const char* const cases[][3], int32_t lengthOfCases) { 172 int32_t x, neededLen, length2; 173 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1; 174 UChar *source=NULL; 175 UChar result[16]; 176 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]); 177 for(x=0; x < lengthOfCases; x++) 178 { 179 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR; 180 source=CharsToUChars(cases[x][0]); 181 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status); 182 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2); 183 if(neededLen!=length2) { 184 log_err("ERROR in unorm_normalize(%s)[%d]: " 185 "preflight length/NUL %d!=%d preflight length/srcLength\n", 186 modeStrings[mode], (int)x, (int)neededLen, (int)length2); 187 } 188 if(status==U_BUFFER_OVERFLOW_ERROR) 189 { 190 status=U_ZERO_ERROR; 191 } 192 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, LENGTHOF(result), &status); 193 if(U_FAILURE(status) || neededLen!=length2) { 194 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n", 195 modeStrings[mode], austrdup(source), myErrorName(status)); 196 } else { 197 assertEqual(result, cases[x][expIndex], x); 198 } 199 length2=unorm_normalize(source, -1, mode, 0, result, LENGTHOF(result), &status); 200 if(U_FAILURE(status) || neededLen!=length2) { 201 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n", 202 modeStrings[mode], austrdup(source), myErrorName(status)); 203 } else { 204 assertEqual(result, cases[x][expIndex], x); 205 } 206 free(source); 207 } 208 } 209 210 void TestDecomp() { 211 TestNormCases(UNORM_NFD, canonTests, LENGTHOF(canonTests)); 212 } 213 214 void TestCompatDecomp() { 215 TestNormCases(UNORM_NFKD, compatTests, LENGTHOF(compatTests)); 216 } 217 218 void TestCanonDecompCompose() { 219 TestNormCases(UNORM_NFC, canonTests, LENGTHOF(canonTests)); 220 } 221 222 void TestCompatDecompCompose() { 223 TestNormCases(UNORM_NFKC, compatTests, LENGTHOF(compatTests)); 224 } 225 226 void TestFCD() { 227 TestNormCases(UNORM_FCD, fcdTests, LENGTHOF(fcdTests)); 228 } 229 230 static void assertEqual(const UChar* result, const char* expected, int32_t index) 231 { 232 UChar *expectedUni = CharsToUChars(expected); 233 if(u_strcmp(result, expectedUni)!=0){ 234 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected, 235 austrdup(result) ); 236 } 237 free(expectedUni); 238 } 239 240 static void TestNull_check(UChar *src, int32_t srcLen, 241 UChar *exp, int32_t expLen, 242 UNormalizationMode mode, 243 const char *name) 244 { 245 UErrorCode status = U_ZERO_ERROR; 246 int32_t len, i; 247 248 UChar result[50]; 249 250 251 status = U_ZERO_ERROR; 252 253 for(i=0;i<50;i++) 254 { 255 result[i] = 0xFFFD; 256 } 257 258 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status); 259 260 if(U_FAILURE(status)) { 261 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status)); 262 } else if (len != expLen) { 263 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len); 264 } 265 266 { 267 for(i=0;i<len;i++){ 268 if(exp[i] != result[i]) { 269 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n", 270 name, 271 i, 272 exp[i], 273 result[i]); 274 return; 275 } 276 log_verbose(" %d: \\u%04X\n", i, result[i]); 277 } 278 } 279 280 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name); 281 } 282 283 void TestNull() 284 { 285 286 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 }; 287 int32_t source_comp_len = 4; 288 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a }; 289 int32_t expect_comp_len = 3; 290 291 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 }; 292 int32_t source_dcmp_len = 3; 293 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C }; 294 int32_t expect_dcmp_len = 5; 295 296 TestNull_check(source_comp, 297 source_comp_len, 298 expect_comp, 299 expect_comp_len, 300 UNORM_NFC, 301 "UNORM_NFC"); 302 303 TestNull_check(source_dcmp, 304 source_dcmp_len, 305 expect_dcmp, 306 expect_dcmp_len, 307 UNORM_NFD, 308 "UNORM_NFD"); 309 310 TestNull_check(source_comp, 311 source_comp_len, 312 expect_comp, 313 expect_comp_len, 314 UNORM_NFKC, 315 "UNORM_NFKC"); 316 317 318 } 319 320 static void TestQuickCheckResultNO() 321 { 322 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C, 323 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E}; 324 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB, 325 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E}; 326 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE, 327 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; 328 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE, 329 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; 330 331 332 const int SIZE = 10; 333 334 int count = 0; 335 UErrorCode error = U_ZERO_ERROR; 336 337 for (; count < SIZE; count ++) 338 { 339 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != 340 UNORM_NO) 341 { 342 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); 343 return; 344 } 345 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != 346 UNORM_NO) 347 { 348 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); 349 return; 350 } 351 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != 352 UNORM_NO) 353 { 354 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); 355 return; 356 } 357 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 358 UNORM_NO) 359 { 360 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); 361 return; 362 } 363 } 364 } 365 366 367 static void TestQuickCheckResultYES() 368 { 369 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A, 370 0x2261, 0x3075, 0x4000, 0x5000, 0xF000}; 371 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500, 372 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000}; 373 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB, 374 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27}; 375 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000, 376 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E}; 377 378 const int SIZE = 10; 379 int count = 0; 380 UErrorCode error = U_ZERO_ERROR; 381 382 UChar cp = 0; 383 while (cp < 0xA0) 384 { 385 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES) 386 { 387 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp); 388 return; 389 } 390 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) != 391 UNORM_YES) 392 { 393 log_err("ERROR in NFC quick check at U+%04x\n", cp); 394 return; 395 } 396 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES) 397 { 398 log_err("ERROR in NFKD quick check at U+%04x\n", cp); 399 return; 400 } 401 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) != 402 UNORM_YES) 403 { 404 log_err("ERROR in NFKC quick check at U+%04x\n", cp); 405 return; 406 } 407 cp ++; 408 } 409 410 for (; count < SIZE; count ++) 411 { 412 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != 413 UNORM_YES) 414 { 415 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); 416 return; 417 } 418 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) 419 != UNORM_YES) 420 { 421 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); 422 return; 423 } 424 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != 425 UNORM_YES) 426 { 427 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); 428 return; 429 } 430 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 431 UNORM_YES) 432 { 433 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); 434 return; 435 } 436 } 437 } 438 439 static void TestQuickCheckResultMAYBE() 440 { 441 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161, 442 0x116A, 0x1173, 0x1175, 0x3099, 0x309A}; 443 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E, 444 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099}; 445 446 447 const int SIZE = 10; 448 449 int count = 0; 450 UErrorCode error = U_ZERO_ERROR; 451 452 /* NFD and NFKD does not have any MAYBE codepoints */ 453 for (; count < SIZE; count ++) 454 { 455 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != 456 UNORM_MAYBE) 457 { 458 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]); 459 return; 460 } 461 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 462 UNORM_MAYBE) 463 { 464 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); 465 return; 466 } 467 } 468 } 469 470 static void TestQuickCheckStringResult() 471 { 472 int count; 473 UChar *d = NULL; 474 UChar *c = NULL; 475 UErrorCode error = U_ZERO_ERROR; 476 477 for (count = 0; count < LENGTHOF(canonTests); count ++) 478 { 479 d = CharsToUChars(canonTests[count][1]); 480 c = CharsToUChars(canonTests[count][2]); 481 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) != 482 UNORM_YES) 483 { 484 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count); 485 return; 486 } 487 488 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) == 489 UNORM_NO) 490 { 491 log_err("ERROR in NFC quick check for string at count %d\n", count); 492 return; 493 } 494 495 free(d); 496 free(c); 497 } 498 499 for (count = 0; count < LENGTHOF(compatTests); count ++) 500 { 501 d = CharsToUChars(compatTests[count][1]); 502 c = CharsToUChars(compatTests[count][2]); 503 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) != 504 UNORM_YES) 505 { 506 log_err("ERROR in NFKD quick check for string at count %d\n", count); 507 return; 508 } 509 510 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) != 511 UNORM_YES) 512 { 513 log_err("ERROR in NFKC quick check for string at count %d\n", count); 514 return; 515 } 516 517 free(d); 518 free(c); 519 } 520 } 521 522 void TestQuickCheck() 523 { 524 TestQuickCheckResultNO(); 525 TestQuickCheckResultYES(); 526 TestQuickCheckResultMAYBE(); 527 TestQuickCheckStringResult(); 528 } 529 530 /* 531 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_ 532 * normalized, and some that are not. 533 * Here we pick some specific cases and test the C API. 534 */ 535 static void TestIsNormalized(void) { 536 static const UChar notNFC[][8]={ /* strings that are not in NFC */ 537 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */ 538 { 0xfb1d, 0 }, /* excluded from composition */ 539 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */ 540 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */ 541 }; 542 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */ 543 { 0x1100, 0x1161, 0 }, /* Jamo compose */ 544 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */ 545 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */ 546 }; 547 548 int32_t i; 549 UErrorCode errorCode; 550 551 /* API test */ 552 553 /* normal case with length>=0 (length -1 used for special cases below) */ 554 errorCode=U_ZERO_ERROR; 555 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) { 556 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 557 } 558 559 /* incoming U_FAILURE */ 560 errorCode=U_TRUNCATED_CHAR_FOUND; 561 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode); 562 if(errorCode!=U_TRUNCATED_CHAR_FOUND) { 563 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode)); 564 } 565 566 /* NULL source */ 567 errorCode=U_ZERO_ERROR; 568 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode); 569 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 570 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); 571 } 572 573 /* bad length */ 574 errorCode=U_ZERO_ERROR; 575 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode); 576 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 577 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); 578 } 579 580 /* specific cases */ 581 for(i=0; i<LENGTHOF(notNFC); ++i) { 582 errorCode=U_ZERO_ERROR; 583 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) { 584 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode)); 585 } 586 errorCode=U_ZERO_ERROR; 587 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) { 588 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode)); 589 } 590 } 591 for(i=0; i<LENGTHOF(notNFKC); ++i) { 592 errorCode=U_ZERO_ERROR; 593 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) { 594 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode)); 595 } 596 } 597 } 598 599 void TestCheckFCD() 600 { 601 UErrorCode status = U_ZERO_ERROR; 602 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 603 0x0A}; 604 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301, 605 0x02B9, 0x0314, 0x0315, 0x0316}; 606 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7, 607 0x0050, 0x0730, 0x09EE, 0x1E10}; 608 609 static const UChar datastr[][5] = 610 { {0x0061, 0x030A, 0x1E05, 0x0302, 0}, 611 {0x0061, 0x030A, 0x00E2, 0x0323, 0}, 612 {0x0061, 0x0323, 0x00E2, 0x0323, 0}, 613 {0x0061, 0x0323, 0x1E05, 0x0302, 0} }; 614 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES}; 615 616 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 617 0x6a, 618 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 619 0xea, 620 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 621 0x0307, 0x0308, 0x0309, 0x030a, 622 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 623 0x0327, 0x0328, 0x0329, 0x032a, 624 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06, 625 0x1e07, 0x1e08, 0x1e09, 0x1e0a}; 626 627 int count = 0; 628 629 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES) 630 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n"); 631 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO) 632 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n"); 633 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES) 634 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n"); 635 636 if (U_FAILURE(status)) 637 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status)); 638 639 while (count < 4) 640 { 641 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status); 642 if (U_FAILURE(status)) { 643 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count); 644 break; 645 } 646 else { 647 if (result[count] != fcdresult) { 648 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count, 649 result[count]); 650 } 651 } 652 count ++; 653 } 654 655 /* random checks of long strings */ 656 status = U_ZERO_ERROR; 657 srand((unsigned)time( NULL )); 658 659 for (count = 0; count < 50; count ++) 660 { 661 int size = 0; 662 UBool testresult = UNORM_YES; 663 UChar data[20]; 664 UChar norm[100]; 665 UChar nfd[100]; 666 int normsize = 0; 667 int nfdsize = 0; 668 669 while (size != 19) { 670 data[size] = datachar[(rand() * 50) / RAND_MAX]; 671 log_verbose("0x%x", data[size]); 672 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0, 673 norm + normsize, 100 - normsize, &status); 674 if (U_FAILURE(status)) { 675 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n"); 676 break; 677 } 678 size ++; 679 } 680 log_verbose("\n"); 681 682 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0, 683 nfd, 100, &status); 684 if (U_FAILURE(status)) { 685 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n"); 686 } 687 688 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) { 689 testresult = UNORM_NO; 690 } 691 if (testresult == UNORM_YES) { 692 log_verbose("result UNORM_YES\n"); 693 } 694 else { 695 log_verbose("result UNORM_NO\n"); 696 } 697 698 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) { 699 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult); 700 } 701 } 702 } 703 704 static void 705 TestAPI() { 706 static const UChar in[]={ 0x68, 0xe4 }; 707 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff }; 708 UErrorCode errorCode; 709 int32_t length; 710 711 /* try preflighting */ 712 errorCode=U_ZERO_ERROR; 713 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode); 714 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { 715 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 716 return; 717 } 718 719 errorCode=U_ZERO_ERROR; 720 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode); 721 if(U_FAILURE(errorCode)) { 722 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode)); 723 return; 724 } 725 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) { 726 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]); 727 return; 728 } 729 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode); 730 if(U_FAILURE(errorCode)) { 731 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode)); 732 return; 733 } 734 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode); 735 if(U_FAILURE(errorCode)) { 736 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode)); 737 return; 738 } 739 } 740 741 /* test cases to improve test code coverage */ 742 enum { 743 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */ 744 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */ 745 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */ 746 747 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */ 748 HANGUL_WEO=0x116f, /* Jamo V U+116f */ 749 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */ 750 751 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */ 752 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */ 753 754 MUSICAL_VOID_NOTEHEAD=0x1d157, 755 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */ 756 MUSICAL_STEM=0x1d165, /* cc=216 */ 757 MUSICAL_STACCATO=0x1d17c /* cc=220 */ 758 }; 759 760 static void 761 TestNormCoverage() { 762 UChar input[1000], expect[1000], output[1000]; 763 UErrorCode errorCode; 764 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength; 765 766 /* create a long and nasty string with NFKC-unsafe characters */ 767 inLength=0; 768 769 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */ 770 input[inLength++]=HANGUL_KIYEOK; 771 input[inLength++]=HANGUL_WEO; 772 input[inLength++]=HANGUL_KIYEOK_SIOS; 773 774 input[inLength++]=HANGUL_KIYEOK; 775 input[inLength++]=HANGUL_WEO; 776 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 777 778 input[inLength++]=HANGUL_KIYEOK; 779 input[inLength++]=HANGUL_K_WEO; 780 input[inLength++]=HANGUL_KIYEOK_SIOS; 781 782 input[inLength++]=HANGUL_KIYEOK; 783 input[inLength++]=HANGUL_K_WEO; 784 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 785 786 input[inLength++]=HANGUL_K_KIYEOK; 787 input[inLength++]=HANGUL_WEO; 788 input[inLength++]=HANGUL_KIYEOK_SIOS; 789 790 input[inLength++]=HANGUL_K_KIYEOK; 791 input[inLength++]=HANGUL_WEO; 792 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 793 794 input[inLength++]=HANGUL_K_KIYEOK; 795 input[inLength++]=HANGUL_K_WEO; 796 input[inLength++]=HANGUL_KIYEOK_SIOS; 797 798 input[inLength++]=HANGUL_K_KIYEOK; 799 input[inLength++]=HANGUL_K_WEO; 800 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 801 802 /* Hangul LV with normal/compatibility Jamo T */ 803 input[inLength++]=HANGUL_AC00; 804 input[inLength++]=HANGUL_KIYEOK_SIOS; 805 806 input[inLength++]=HANGUL_AC00; 807 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 808 809 /* compatibility Jamo L, V */ 810 input[inLength++]=HANGUL_K_KIYEOK; 811 input[inLength++]=HANGUL_K_WEO; 812 813 hangulPrefixLength=inLength; 814 815 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE); 816 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE); 817 for(i=0; i<200; ++i) { 818 input[inLength++]=U16_LEAD(MUSICAL_STACCATO); 819 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO); 820 input[inLength++]=U16_LEAD(MUSICAL_STEM); 821 input[inLength++]=U16_TRAIL(MUSICAL_STEM); 822 } 823 824 /* (compatibility) Jamo L, T do not compose */ 825 input[inLength++]=HANGUL_K_KIYEOK; 826 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 827 828 /* quick checks */ 829 errorCode=U_ZERO_ERROR; 830 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) { 831 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 832 } 833 errorCode=U_ZERO_ERROR; 834 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) { 835 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 836 } 837 errorCode=U_ZERO_ERROR; 838 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) { 839 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 840 } 841 errorCode=U_ZERO_ERROR; 842 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) { 843 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 844 } 845 errorCode=U_ZERO_ERROR; 846 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) { 847 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 848 } 849 850 /* NFKC */ 851 expectLength=0; 852 expect[expectLength++]=HANGUL_SYLLABLE; 853 854 expect[expectLength++]=HANGUL_SYLLABLE; 855 856 expect[expectLength++]=HANGUL_SYLLABLE; 857 858 expect[expectLength++]=HANGUL_SYLLABLE; 859 860 expect[expectLength++]=HANGUL_SYLLABLE; 861 862 expect[expectLength++]=HANGUL_SYLLABLE; 863 864 expect[expectLength++]=HANGUL_SYLLABLE; 865 866 expect[expectLength++]=HANGUL_SYLLABLE; 867 868 expect[expectLength++]=HANGUL_AC00+3; 869 870 expect[expectLength++]=HANGUL_AC00+3; 871 872 expect[expectLength++]=HANGUL_AC00+14*28; 873 874 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD); 875 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD); 876 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 877 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 878 for(i=0; i<200; ++i) { 879 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 880 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 881 } 882 for(i=0; i<200; ++i) { 883 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO); 884 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO); 885 } 886 887 expect[expectLength++]=HANGUL_KIYEOK; 888 expect[expectLength++]=HANGUL_KIYEOK_SIOS; 889 890 /* try destination overflow first */ 891 errorCode=U_ZERO_ERROR; 892 preflightLength=unorm_normalize(input, inLength, 893 UNORM_NFKC, 0, 894 output, 100, /* too short */ 895 &errorCode); 896 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { 897 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode)); 898 } 899 900 /* real NFKC */ 901 errorCode=U_ZERO_ERROR; 902 length=unorm_normalize(input, inLength, 903 UNORM_NFKC, 0, 904 output, sizeof(output)/U_SIZEOF_UCHAR, 905 &errorCode); 906 if(U_FAILURE(errorCode)) { 907 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode)); 908 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { 909 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n"); 910 for(i=0; i<length; ++i) { 911 if(output[i]!=expect[i]) { 912 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]); 913 break; 914 } 915 } 916 } 917 if(length!=preflightLength) { 918 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength); 919 } 920 921 /* FCD */ 922 u_memcpy(expect, input, hangulPrefixLength); 923 expectLength=hangulPrefixLength; 924 925 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD); 926 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD); 927 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 928 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 929 for(i=0; i<200; ++i) { 930 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 931 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 932 } 933 for(i=0; i<200; ++i) { 934 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO); 935 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO); 936 } 937 938 expect[expectLength++]=HANGUL_K_KIYEOK; 939 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS; 940 941 errorCode=U_ZERO_ERROR; 942 length=unorm_normalize(input, inLength, 943 UNORM_FCD, 0, 944 output, sizeof(output)/U_SIZEOF_UCHAR, 945 &errorCode); 946 if(U_FAILURE(errorCode)) { 947 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode)); 948 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { 949 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n"); 950 for(i=0; i<length; ++i) { 951 if(output[i]!=expect[i]) { 952 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]); 953 break; 954 } 955 } 956 } 957 } 958 959 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */ 960 static void 961 TestConcatenate(void) { 962 /* "re + 'sume'" */ 963 static const UChar 964 left[]={ 965 0x72, 0x65, 0 966 }, 967 right[]={ 968 0x301, 0x73, 0x75, 0x6d, 0xe9, 0 969 }, 970 expect[]={ 971 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0 972 }; 973 974 UChar buffer[100]; 975 UErrorCode errorCode; 976 int32_t length; 977 978 /* left with length, right NUL-terminated */ 979 errorCode=U_ZERO_ERROR; 980 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode); 981 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) { 982 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 983 } 984 985 /* preflighting */ 986 errorCode=U_ZERO_ERROR; 987 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode); 988 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) { 989 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 990 } 991 992 buffer[2]=0x5555; 993 errorCode=U_ZERO_ERROR; 994 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode); 995 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) { 996 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 997 } 998 999 /* enter with U_FAILURE */ 1000 buffer[2]=0xaaaa; 1001 errorCode=U_UNEXPECTED_TOKEN; 1002 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode); 1003 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) { 1004 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode)); 1005 } 1006 1007 /* illegal arguments */ 1008 buffer[2]=0xaaaa; 1009 errorCode=U_ZERO_ERROR; 1010 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode); 1011 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) { 1012 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 1013 } 1014 1015 errorCode=U_ZERO_ERROR; 1016 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode); 1017 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1018 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 1019 } 1020 } 1021 1022 enum { 1023 _PLUS=0x2b 1024 }; 1025 1026 static const char *const _modeString[UNORM_MODE_COUNT]={ 1027 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD" 1028 }; 1029 1030 static void 1031 _testIter(const UChar *src, int32_t srcLength, 1032 UCharIterator *iter, UNormalizationMode mode, UBool forward, 1033 const UChar *out, int32_t outLength, 1034 const int32_t *srcIndexes, int32_t srcIndexesLength) { 1035 UChar buffer[4]; 1036 const UChar *expect, *outLimit, *in; 1037 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength; 1038 UErrorCode errorCode; 1039 UBool neededToNormalize, expectNeeded; 1040 1041 errorCode=U_ZERO_ERROR; 1042 outLimit=out+outLength; 1043 if(forward) { 1044 expect=out; 1045 i=index=0; 1046 } else { 1047 expect=outLimit; 1048 i=srcIndexesLength-2; 1049 index=srcLength; 1050 } 1051 1052 for(;;) { 1053 prevIndex=index; 1054 if(forward) { 1055 if(!iter->hasNext(iter)) { 1056 return; 1057 } 1058 length=unorm_next(iter, 1059 buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1060 mode, 0, 1061 (UBool)(out!=NULL), &neededToNormalize, 1062 &errorCode); 1063 expectIndex=srcIndexes[i+1]; 1064 in=src+prevIndex; 1065 inLength=expectIndex-prevIndex; 1066 1067 if(out!=NULL) { 1068 /* get output piece from between plus signs */ 1069 expectLength=0; 1070 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) { 1071 ++expectLength; 1072 } 1073 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); 1074 } else { 1075 expect=in; 1076 expectLength=inLength; 1077 expectNeeded=FALSE; 1078 } 1079 } else { 1080 if(!iter->hasPrevious(iter)) { 1081 return; 1082 } 1083 length=unorm_previous(iter, 1084 buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1085 mode, 0, 1086 (UBool)(out!=NULL), &neededToNormalize, 1087 &errorCode); 1088 expectIndex=srcIndexes[i]; 1089 in=src+expectIndex; 1090 inLength=prevIndex-expectIndex; 1091 1092 if(out!=NULL) { 1093 /* get output piece from between plus signs */ 1094 expectLength=0; 1095 while(expect!=out && expect[-1]!=_PLUS) { 1096 ++expectLength; 1097 --expect; 1098 } 1099 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); 1100 } else { 1101 expect=in; 1102 expectLength=inLength; 1103 expectNeeded=FALSE; 1104 } 1105 } 1106 index=iter->getIndex(iter, UITER_CURRENT); 1107 1108 if(U_FAILURE(errorCode)) { 1109 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n", 1110 forward, _modeString[mode], i, u_errorName(errorCode)); 1111 return; 1112 } 1113 if(expectIndex!=index) { 1114 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n", 1115 forward, _modeString[mode], i, index, expectIndex); 1116 return; 1117 } 1118 if(expectLength!=length) { 1119 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n", 1120 forward, _modeString[mode], i, length, expectLength); 1121 return; 1122 } 1123 if(0!=u_memcmp(expect, buffer, length)) { 1124 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n", 1125 forward, _modeString[mode], i); 1126 return; 1127 } 1128 if(neededToNormalize!=expectNeeded) { 1129 } 1130 1131 if(forward) { 1132 expect+=expectLength+1; /* go after the + */ 1133 ++i; 1134 } else { 1135 --expect; /* go before the + */ 1136 --i; 1137 } 1138 } 1139 } 1140 1141 static void 1142 TestNextPrevious() { 1143 static const UChar 1144 src[]={ /* input string */ 1145 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133 1146 }, 1147 nfd[]={ /* + separates expected output pieces */ 1148 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133 1149 }, 1150 nfkd[]={ 1151 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa 1152 }, 1153 nfc[]={ 1154 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133 1155 }, 1156 nfkc[]={ 1157 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03 1158 }, 1159 fcd[]={ 1160 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133 1161 }; 1162 1163 /* expected iterator indexes in the source string for each iteration piece */ 1164 static const int32_t 1165 nfdIndexes[]={ 1166 0, 1, 2, 5, 6, 7 1167 }, 1168 nfkdIndexes[]={ 1169 0, 1, 2, 5, 6, 7 1170 }, 1171 nfcIndexes[]={ 1172 0, 1, 2, 5, 6, 7 1173 }, 1174 nfkcIndexes[]={ 1175 0, 1, 2, 5, 7 1176 }, 1177 fcdIndexes[]={ 1178 0, 1, 2, 5, 6, 7 1179 }; 1180 1181 UCharIterator iter; 1182 1183 UChar buffer[4]; 1184 int32_t length; 1185 1186 UBool neededToNormalize; 1187 UErrorCode errorCode; 1188 1189 uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR); 1190 1191 /* test iteration with doNormalize */ 1192 iter.index=0; 1193 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4); 1194 iter.index=0; 1195 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4); 1196 iter.index=0; 1197 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4); 1198 iter.index=0; 1199 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4); 1200 iter.index=0; 1201 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4); 1202 1203 iter.index=iter.length; 1204 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4); 1205 iter.index=iter.length; 1206 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4); 1207 iter.index=iter.length; 1208 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4); 1209 iter.index=iter.length; 1210 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4); 1211 iter.index=iter.length; 1212 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4); 1213 1214 /* test iteration without doNormalize */ 1215 iter.index=0; 1216 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4); 1217 iter.index=0; 1218 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4); 1219 iter.index=0; 1220 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4); 1221 iter.index=0; 1222 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4); 1223 iter.index=0; 1224 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4); 1225 1226 iter.index=iter.length; 1227 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4); 1228 iter.index=iter.length; 1229 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4); 1230 iter.index=iter.length; 1231 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4); 1232 iter.index=iter.length; 1233 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4); 1234 iter.index=iter.length; 1235 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4); 1236 1237 /* try without neededToNormalize */ 1238 errorCode=U_ZERO_ERROR; 1239 buffer[0]=5; 1240 iter.index=1; 1241 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1242 UNORM_NFD, 0, TRUE, NULL, 1243 &errorCode); 1244 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) { 1245 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode)); 1246 return; 1247 } 1248 1249 /* preflight */ 1250 neededToNormalize=9; 1251 iter.index=1; 1252 length=unorm_next(&iter, NULL, 0, 1253 UNORM_NFD, 0, TRUE, &neededToNormalize, 1254 &errorCode); 1255 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) { 1256 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode)); 1257 return; 1258 } 1259 1260 errorCode=U_ZERO_ERROR; 1261 buffer[0]=buffer[1]=5; 1262 neededToNormalize=9; 1263 iter.index=1; 1264 length=unorm_next(&iter, buffer, 1, 1265 UNORM_NFD, 0, TRUE, &neededToNormalize, 1266 &errorCode); 1267 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) { 1268 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode)); 1269 return; 1270 } 1271 1272 /* no iterator */ 1273 errorCode=U_ZERO_ERROR; 1274 buffer[0]=buffer[1]=5; 1275 neededToNormalize=9; 1276 iter.index=1; 1277 length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1278 UNORM_NFD, 0, TRUE, &neededToNormalize, 1279 &errorCode); 1280 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1281 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode)); 1282 return; 1283 } 1284 1285 /* illegal mode */ 1286 buffer[0]=buffer[1]=5; 1287 neededToNormalize=9; 1288 iter.index=1; 1289 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1290 (UNormalizationMode)0, 0, TRUE, &neededToNormalize, 1291 &errorCode); 1292 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1293 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode)); 1294 return; 1295 } 1296 1297 /* error coming in */ 1298 errorCode=U_MISPLACED_QUANTIFIER; 1299 buffer[0]=5; 1300 iter.index=1; 1301 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, 1302 UNORM_NFD, 0, TRUE, NULL, 1303 &errorCode); 1304 if(errorCode!=U_MISPLACED_QUANTIFIER) { 1305 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode)); 1306 return; 1307 } 1308 } 1309 1310 static void 1311 TestFCNFKCClosure(void) { 1312 static const struct { 1313 UChar32 c; 1314 const UChar s[6]; 1315 } tests[]={ 1316 { 0x00C4, { 0 } }, 1317 { 0x00E4, { 0 } }, 1318 { 0x037A, { 0x0020, 0x03B9, 0 } }, 1319 { 0x03D2, { 0x03C5, 0 } }, 1320 { 0x20A8, { 0x0072, 0x0073, 0 } }, 1321 { 0x210B, { 0x0068, 0 } }, 1322 { 0x210C, { 0x0068, 0 } }, 1323 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } }, 1324 { 0x2122, { 0x0074, 0x006D, 0 } }, 1325 { 0x2128, { 0x007A, 0 } }, 1326 { 0x1D5DB, { 0x0068, 0 } }, 1327 { 0x1D5ED, { 0x007A, 0 } }, 1328 { 0x0061, { 0 } } 1329 }; 1330 1331 UChar buffer[8]; 1332 UErrorCode errorCode; 1333 int32_t i, length; 1334 1335 for(i=0; i<LENGTHOF(tests); ++i) { 1336 errorCode=U_ZERO_ERROR; 1337 length=u_getFC_NFKC_Closure(tests[i].c, buffer, LENGTHOF(buffer), &errorCode); 1338 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) { 1339 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode)); 1340 } 1341 } 1342 1343 /* error handling */ 1344 errorCode=U_ZERO_ERROR; 1345 length=u_getFC_NFKC_Closure(0x5c, NULL, LENGTHOF(buffer), &errorCode); 1346 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1347 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode)); 1348 } 1349 1350 length=u_getFC_NFKC_Closure(0x5c, buffer, LENGTHOF(buffer), &errorCode); 1351 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1352 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode)); 1353 } 1354 } 1355 1356 static void 1357 TestQuickCheckPerCP() { 1358 UErrorCode errorCode; 1359 UChar32 c, lead, trail; 1360 UChar s[U16_MAX_LENGTH], nfd[16]; 1361 int32_t length, lccc1, lccc2, tccc1, tccc2; 1362 int32_t qc1, qc2; 1363 1364 if( 1365 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES || 1366 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES || 1367 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE || 1368 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE || 1369 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) || 1370 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) 1371 ) { 1372 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n"); 1373 } 1374 1375 /* 1376 * compare the quick check property values for some code points 1377 * to the quick check results for checking same-code point strings 1378 */ 1379 errorCode=U_ZERO_ERROR; 1380 c=0; 1381 while(c<0x110000) { 1382 length=0; 1383 U16_APPEND_UNSAFE(s, length, c); 1384 1385 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK); 1386 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode); 1387 if(qc1!=qc2) { 1388 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1389 } 1390 1391 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK); 1392 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode); 1393 if(qc1!=qc2) { 1394 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1395 } 1396 1397 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK); 1398 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode); 1399 if(qc1!=qc2) { 1400 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1401 } 1402 1403 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK); 1404 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode); 1405 if(qc1!=qc2) { 1406 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1407 } 1408 1409 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode); 1410 /* length-length == 0 is used to get around a compiler warning. */ 1411 U16_GET(nfd, 0, length-length, length, lead); 1412 U16_GET(nfd, 0, length-1, length, trail); 1413 1414 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS); 1415 lccc2=u_getCombiningClass(lead); 1416 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS); 1417 tccc2=u_getCombiningClass(trail); 1418 1419 if(lccc1!=lccc2) { 1420 log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n", 1421 lccc1, lccc2, c); 1422 } 1423 if(tccc1!=tccc2) { 1424 log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n", 1425 tccc1, tccc2, c); 1426 } 1427 1428 /* skip some code points */ 1429 c=(20*c)/19+1; 1430 } 1431 } 1432 1433 static void 1434 TestComposition(void) { 1435 static const struct { 1436 UNormalizationMode mode; 1437 uint32_t options; 1438 UChar input[12]; 1439 UChar expect[12]; 1440 } cases[]={ 1441 /* 1442 * special cases for UAX #15 bug 1443 * see Unicode Corrigendum #5: Normalization Idempotency 1444 * at http://unicode.org/versions/corrigendum5.html 1445 * (was Public Review Issue #29) 1446 */ 1447 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } }, 1448 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } }, 1449 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } }, 1450 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } }, 1451 1452 /* TODO: add test cases for UNORM_FCC here (j2151) */ 1453 }; 1454 1455 UChar output[16]; 1456 UErrorCode errorCode; 1457 int32_t i, length; 1458 1459 for(i=0; i<LENGTHOF(cases); ++i) { 1460 errorCode=U_ZERO_ERROR; 1461 length=unorm_normalize( 1462 cases[i].input, -1, 1463 cases[i].mode, cases[i].options, 1464 output, LENGTHOF(output), 1465 &errorCode); 1466 if( U_FAILURE(errorCode) || 1467 length!=u_strlen(cases[i].expect) || 1468 0!=u_memcmp(output, cases[i].expect, length) 1469 ) { 1470 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i); 1471 } 1472 } 1473 } 1474 1475 static void 1476 TestGetDecomposition() { 1477 UChar decomp[32]; 1478 int32_t length; 1479 1480 UErrorCode errorCode=U_ZERO_ERROR; 1481 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode); 1482 if(U_FAILURE(errorCode)) { 1483 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode)); 1484 return; 1485 } 1486 1487 length=unorm2_getDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCode); 1488 if(U_FAILURE(errorCode) || length>=0) { 1489 log_err("unorm2_getDecomposition(fcc, space) failed\n"); 1490 } 1491 errorCode=U_ZERO_ERROR; 1492 length=unorm2_getDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCode); 1493 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) { 1494 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n"); 1495 } 1496 errorCode=U_ZERO_ERROR; 1497 length=unorm2_getDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorCode); 1498 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) { 1499 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n"); 1500 } 1501 errorCode=U_ZERO_ERROR; 1502 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode); 1503 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { 1504 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n"); 1505 } 1506 errorCode=U_ZERO_ERROR; 1507 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode); 1508 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1509 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n"); 1510 } 1511 errorCode=U_ZERO_ERROR; 1512 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode); 1513 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1514 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n"); 1515 } 1516 } 1517 1518 static void 1519 TestGetRawDecomposition() { 1520 UChar decomp[32]; 1521 int32_t length; 1522 1523 UErrorCode errorCode=U_ZERO_ERROR; 1524 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode); 1525 if(U_FAILURE(errorCode)) { 1526 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode)); 1527 return; 1528 } 1529 /* 1530 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values, 1531 * without recursive decomposition. 1532 */ 1533 1534 length=unorm2_getRawDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCode); 1535 if(U_FAILURE(errorCode) || length>=0) { 1536 log_err("unorm2_getDecomposition(nfkc, space) failed\n"); 1537 } 1538 errorCode=U_ZERO_ERROR; 1539 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCode); 1540 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) { 1541 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n"); 1542 } 1543 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */ 1544 errorCode=U_ZERO_ERROR; 1545 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, LENGTHOF(decomp), &errorCode); 1546 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) { 1547 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n"); 1548 } 1549 /* U+212B ANGSTROM SIGN */ 1550 errorCode=U_ZERO_ERROR; 1551 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, LENGTHOF(decomp), &errorCode); 1552 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) { 1553 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n"); 1554 } 1555 errorCode=U_ZERO_ERROR; 1556 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, LENGTHOF(decomp), &errorCode); 1557 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) { 1558 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n"); 1559 } 1560 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */ 1561 errorCode=U_ZERO_ERROR; 1562 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorCode); 1563 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) { 1564 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n"); 1565 } 1566 errorCode=U_ZERO_ERROR; 1567 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode); 1568 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) { 1569 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n"); 1570 } 1571 errorCode=U_ZERO_ERROR; 1572 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode); 1573 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1574 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n"); 1575 } 1576 errorCode=U_ZERO_ERROR; 1577 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode); 1578 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1579 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n"); 1580 } 1581 } 1582 1583 static void 1584 TestAppendRestoreMiddle() { 1585 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */ 1586 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */ 1587 /* NFC: C5 is 'A with ring above' */ 1588 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 }; 1589 int32_t length; 1590 UErrorCode errorCode=U_ZERO_ERROR; 1591 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); 1592 if(U_FAILURE(errorCode)) { 1593 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode)); 1594 return; 1595 } 1596 /* 1597 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity. 1598 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A> 1599 * still fits into a[] but the full result still overflows this capacity. 1600 * (Let it modify the destination buffer before reallocating internally.) 1601 */ 1602 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode); 1603 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=LENGTHOF(expected)) { 1604 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length); 1605 return; 1606 } 1607 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */ 1608 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) { 1609 log_err("unorm2_append(overflow) modified the first string\n"); 1610 return; 1611 } 1612 errorCode=U_ZERO_ERROR; 1613 length=unorm2_append(n2, a, -1, LENGTHOF(a), b, -1, &errorCode); 1614 if(U_FAILURE(errorCode) || length!=LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) { 1615 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length); 1616 return; 1617 } 1618 } 1619 1620 static void 1621 TestGetEasyToUseInstance() { 1622 static const UChar in[]={ 1623 0xA0, /* -> <noBreak> 0020 */ 1624 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */ 1625 }; 1626 UChar out[32]; 1627 int32_t length; 1628 1629 UErrorCode errorCode=U_ZERO_ERROR; 1630 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); 1631 if(U_FAILURE(errorCode)) { 1632 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode)); 1633 return; 1634 } 1635 length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode); 1636 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) { 1637 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n", 1638 (int)length, u_errorName(errorCode)); 1639 } 1640 1641 errorCode=U_ZERO_ERROR; 1642 n2=unorm2_getNFDInstance(&errorCode); 1643 if(U_FAILURE(errorCode)) { 1644 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode)); 1645 return; 1646 } 1647 length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode); 1648 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) { 1649 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n", 1650 (int)length, u_errorName(errorCode)); 1651 } 1652 1653 errorCode=U_ZERO_ERROR; 1654 n2=unorm2_getNFKCInstance(&errorCode); 1655 if(U_FAILURE(errorCode)) { 1656 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode)); 1657 return; 1658 } 1659 length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode); 1660 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) { 1661 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n", 1662 (int)length, u_errorName(errorCode)); 1663 } 1664 1665 errorCode=U_ZERO_ERROR; 1666 n2=unorm2_getNFKDInstance(&errorCode); 1667 if(U_FAILURE(errorCode)) { 1668 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode)); 1669 return; 1670 } 1671 length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode); 1672 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) { 1673 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n", 1674 (int)length, u_errorName(errorCode)); 1675 } 1676 1677 errorCode=U_ZERO_ERROR; 1678 n2=unorm2_getNFKCCasefoldInstance(&errorCode); 1679 if(U_FAILURE(errorCode)) { 1680 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode)); 1681 return; 1682 } 1683 length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode); 1684 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) { 1685 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n", 1686 (int)length, u_errorName(errorCode)); 1687 } 1688 } 1689 1690 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 1691