1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 /******************************************************************************** 9 * 10 * File CNORMTST.C 11 * 12 * Modification History: 13 * Name Description 14 * Madhu Katragadda Ported for C API 15 * synwee added test for quick check 16 * synwee added test for checkFCD 17 *********************************************************************************/ 18 /*tests for u_normalization*/ 19 #include "unicode/utypes.h" 20 #include "unicode/unorm.h" 21 #include "unicode/utf16.h" 22 #include "cintltst.h" 23 #include "cmemory.h" 24 25 #if !UCONFIG_NO_NORMALIZATION 26 27 #include <stdlib.h> 28 #include <time.h> 29 #include "unicode/uchar.h" 30 #include "unicode/ustring.h" 31 #include "unicode/unorm.h" 32 #include "cnormtst.h" 33 34 static void 35 TestAPI(void); 36 37 static void 38 TestNormCoverage(void); 39 40 static void 41 TestConcatenate(void); 42 43 static void 44 TestNextPrevious(void); 45 46 static void TestIsNormalized(void); 47 48 static void 49 TestFCNFKCClosure(void); 50 51 static void 52 TestQuickCheckPerCP(void); 53 54 static void 55 TestComposition(void); 56 57 static void 58 TestFCD(void); 59 60 static void 61 TestGetDecomposition(void); 62 63 static void 64 TestGetRawDecomposition(void); 65 66 static void TestAppendRestoreMiddle(void); 67 static void TestGetEasyToUseInstance(void); 68 69 static const char* const canonTests[][3] = { 70 /* Input*/ /*Decomposed*/ /*Composed*/ 71 { "cat", "cat", "cat" }, 72 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", }, 73 74 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/ 75 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/ 76 77 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/ 78 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */ 79 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */ 80 81 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/ 82 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/ 83 84 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/ 85 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/ 86 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/ 87 88 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/ 89 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/ 90 91 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" }, 92 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" }, 93 94 { "Henry IV", "Henry IV", "Henry IV" }, 95 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" }, 96 97 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/ 98 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/ 99 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/ 100 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/ 101 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/ 102 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/ 103 { "", "", "" } 104 }; 105 106 static const char* const compatTests[][3] = { 107 /* Input*/ /*Decomposed */ /*Composed*/ 108 { "cat", "cat", "cat" }, 109 110 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/ 111 112 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" }, 113 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/ 114 115 { "Henry IV", "Henry IV", "Henry IV" }, 116 { "Henry \\u2163", "Henry IV", "Henry IV" }, 117 118 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/ 119 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/ 120 121 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/ 122 123 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/ 124 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/ 125 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/ 126 { "", "", "" } 127 }; 128 129 static const char* const fcdTests[][3] = { 130 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */ 131 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */ 132 { "\\u010e", "\\u010e", NULL } /* D-caron */ 133 }; 134 135 void addNormTest(TestNode** root); 136 137 void addNormTest(TestNode** root) 138 { 139 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI"); 140 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp"); 141 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp"); 142 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose"); 143 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose"); 144 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD"); 145 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull"); 146 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck"); 147 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP"); 148 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized"); 149 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD"); 150 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage"); 151 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate"); 152 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious"); 153 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure"); 154 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition"); 155 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition"); 156 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition"); 157 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle"); 158 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance"); 159 } 160 161 static const char* const modeStrings[]={ 162 "UNORM_NONE", 163 "UNORM_NFD", 164 "UNORM_NFKD", 165 "UNORM_NFC", 166 "UNORM_NFKC", 167 "UNORM_FCD", 168 "UNORM_MODE_COUNT" 169 }; 170 171 static void TestNormCases(UNormalizationMode mode, 172 const char* const cases[][3], int32_t lengthOfCases) { 173 int32_t x, neededLen, length2; 174 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1; 175 UChar *source=NULL; 176 UChar result[16]; 177 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]); 178 for(x=0; x < lengthOfCases; x++) 179 { 180 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR; 181 source=CharsToUChars(cases[x][0]); 182 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status); 183 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2); 184 if(neededLen!=length2) { 185 log_err("ERROR in unorm_normalize(%s)[%d]: " 186 "preflight length/NUL %d!=%d preflight length/srcLength\n", 187 modeStrings[mode], (int)x, (int)neededLen, (int)length2); 188 } 189 if(status==U_BUFFER_OVERFLOW_ERROR) 190 { 191 status=U_ZERO_ERROR; 192 } 193 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status); 194 if(U_FAILURE(status) || neededLen!=length2) { 195 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n", 196 modeStrings[mode], austrdup(source), myErrorName(status)); 197 } else { 198 assertEqual(result, cases[x][expIndex], x); 199 } 200 length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status); 201 if(U_FAILURE(status) || neededLen!=length2) { 202 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n", 203 modeStrings[mode], austrdup(source), myErrorName(status)); 204 } else { 205 assertEqual(result, cases[x][expIndex], x); 206 } 207 free(source); 208 } 209 } 210 211 void TestDecomp() { 212 TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests)); 213 } 214 215 void TestCompatDecomp() { 216 TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests)); 217 } 218 219 void TestCanonDecompCompose() { 220 TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests)); 221 } 222 223 void TestCompatDecompCompose() { 224 TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests)); 225 } 226 227 void TestFCD() { 228 TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests)); 229 } 230 231 static void assertEqual(const UChar* result, const char* expected, int32_t index) 232 { 233 UChar *expectedUni = CharsToUChars(expected); 234 if(u_strcmp(result, expectedUni)!=0){ 235 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected, 236 austrdup(result) ); 237 } 238 free(expectedUni); 239 } 240 241 static void TestNull_check(UChar *src, int32_t srcLen, 242 UChar *exp, int32_t expLen, 243 UNormalizationMode mode, 244 const char *name) 245 { 246 UErrorCode status = U_ZERO_ERROR; 247 int32_t len, i; 248 249 UChar result[50]; 250 251 252 status = U_ZERO_ERROR; 253 254 for(i=0;i<50;i++) 255 { 256 result[i] = 0xFFFD; 257 } 258 259 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status); 260 261 if(U_FAILURE(status)) { 262 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status)); 263 } else if (len != expLen) { 264 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len); 265 } 266 267 { 268 for(i=0;i<len;i++){ 269 if(exp[i] != result[i]) { 270 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n", 271 name, 272 i, 273 exp[i], 274 result[i]); 275 return; 276 } 277 log_verbose(" %d: \\u%04X\n", i, result[i]); 278 } 279 } 280 281 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name); 282 } 283 284 void TestNull() 285 { 286 287 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 }; 288 int32_t source_comp_len = 4; 289 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a }; 290 int32_t expect_comp_len = 3; 291 292 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 }; 293 int32_t source_dcmp_len = 3; 294 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C }; 295 int32_t expect_dcmp_len = 5; 296 297 TestNull_check(source_comp, 298 source_comp_len, 299 expect_comp, 300 expect_comp_len, 301 UNORM_NFC, 302 "UNORM_NFC"); 303 304 TestNull_check(source_dcmp, 305 source_dcmp_len, 306 expect_dcmp, 307 expect_dcmp_len, 308 UNORM_NFD, 309 "UNORM_NFD"); 310 311 TestNull_check(source_comp, 312 source_comp_len, 313 expect_comp, 314 expect_comp_len, 315 UNORM_NFKC, 316 "UNORM_NFKC"); 317 318 319 } 320 321 static void TestQuickCheckResultNO() 322 { 323 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C, 324 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E}; 325 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB, 326 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E}; 327 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE, 328 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; 329 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE, 330 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; 331 332 333 const int SIZE = 10; 334 335 int count = 0; 336 UErrorCode error = U_ZERO_ERROR; 337 338 for (; count < SIZE; count ++) 339 { 340 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != 341 UNORM_NO) 342 { 343 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); 344 return; 345 } 346 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != 347 UNORM_NO) 348 { 349 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); 350 return; 351 } 352 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != 353 UNORM_NO) 354 { 355 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); 356 return; 357 } 358 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 359 UNORM_NO) 360 { 361 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); 362 return; 363 } 364 } 365 } 366 367 368 static void TestQuickCheckResultYES() 369 { 370 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A, 371 0x2261, 0x3075, 0x4000, 0x5000, 0xF000}; 372 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500, 373 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000}; 374 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB, 375 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27}; 376 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000, 377 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E}; 378 379 const int SIZE = 10; 380 int count = 0; 381 UErrorCode error = U_ZERO_ERROR; 382 383 UChar cp = 0; 384 while (cp < 0xA0) 385 { 386 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES) 387 { 388 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp); 389 return; 390 } 391 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) != 392 UNORM_YES) 393 { 394 log_err("ERROR in NFC quick check at U+%04x\n", cp); 395 return; 396 } 397 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES) 398 { 399 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp); 400 return; 401 } 402 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) != 403 UNORM_YES) 404 { 405 log_err("ERROR in NFKC quick check at U+%04x\n", cp); 406 return; 407 } 408 cp ++; 409 } 410 411 for (; count < SIZE; count ++) 412 { 413 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != 414 UNORM_YES) 415 { 416 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); 417 return; 418 } 419 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) 420 != UNORM_YES) 421 { 422 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); 423 return; 424 } 425 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != 426 UNORM_YES) 427 { 428 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); 429 return; 430 } 431 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 432 UNORM_YES) 433 { 434 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); 435 return; 436 } 437 } 438 } 439 440 static void TestQuickCheckResultMAYBE() 441 { 442 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161, 443 0x116A, 0x1173, 0x1175, 0x3099, 0x309A}; 444 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E, 445 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099}; 446 447 448 const int SIZE = 10; 449 450 int count = 0; 451 UErrorCode error = U_ZERO_ERROR; 452 453 /* NFD and NFKD does not have any MAYBE codepoints */ 454 for (; count < SIZE; count ++) 455 { 456 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != 457 UNORM_MAYBE) 458 { 459 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]); 460 return; 461 } 462 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 463 UNORM_MAYBE) 464 { 465 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); 466 return; 467 } 468 } 469 } 470 471 static void TestQuickCheckStringResult() 472 { 473 int count; 474 UChar *d = NULL; 475 UChar *c = NULL; 476 UErrorCode error = U_ZERO_ERROR; 477 478 for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++) 479 { 480 d = CharsToUChars(canonTests[count][1]); 481 c = CharsToUChars(canonTests[count][2]); 482 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) != 483 UNORM_YES) 484 { 485 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count); 486 return; 487 } 488 489 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) == 490 UNORM_NO) 491 { 492 log_err("ERROR in NFC quick check for string at count %d\n", count); 493 return; 494 } 495 496 free(d); 497 free(c); 498 } 499 500 for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++) 501 { 502 d = CharsToUChars(compatTests[count][1]); 503 c = CharsToUChars(compatTests[count][2]); 504 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) != 505 UNORM_YES) 506 { 507 log_data_err("ERROR in NFKD quick check for string at count %d\n", count); 508 return; 509 } 510 511 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) != 512 UNORM_YES) 513 { 514 log_err("ERROR in NFKC quick check for string at count %d\n", count); 515 return; 516 } 517 518 free(d); 519 free(c); 520 } 521 } 522 523 void TestQuickCheck() 524 { 525 TestQuickCheckResultNO(); 526 TestQuickCheckResultYES(); 527 TestQuickCheckResultMAYBE(); 528 TestQuickCheckStringResult(); 529 } 530 531 /* 532 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_ 533 * normalized, and some that are not. 534 * Here we pick some specific cases and test the C API. 535 */ 536 static void TestIsNormalized(void) { 537 static const UChar notNFC[][8]={ /* strings that are not in NFC */ 538 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */ 539 { 0xfb1d, 0 }, /* excluded from composition */ 540 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */ 541 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */ 542 }; 543 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */ 544 { 0x1100, 0x1161, 0 }, /* Jamo compose */ 545 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */ 546 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */ 547 }; 548 549 int32_t i; 550 UErrorCode errorCode; 551 552 /* API test */ 553 554 /* normal case with length>=0 (length -1 used for special cases below) */ 555 errorCode=U_ZERO_ERROR; 556 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) { 557 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 558 } 559 560 /* incoming U_FAILURE */ 561 errorCode=U_TRUNCATED_CHAR_FOUND; 562 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode); 563 if(errorCode!=U_TRUNCATED_CHAR_FOUND) { 564 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode)); 565 } 566 567 /* NULL source */ 568 errorCode=U_ZERO_ERROR; 569 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode); 570 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 571 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); 572 } 573 574 /* bad length */ 575 errorCode=U_ZERO_ERROR; 576 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode); 577 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 578 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); 579 } 580 581 /* specific cases */ 582 for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) { 583 errorCode=U_ZERO_ERROR; 584 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) { 585 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode)); 586 } 587 errorCode=U_ZERO_ERROR; 588 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) { 589 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode)); 590 } 591 } 592 for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) { 593 errorCode=U_ZERO_ERROR; 594 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) { 595 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode)); 596 } 597 } 598 } 599 600 void TestCheckFCD() 601 { 602 UErrorCode status = U_ZERO_ERROR; 603 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 604 0x0A}; 605 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301, 606 0x02B9, 0x0314, 0x0315, 0x0316}; 607 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7, 608 0x0050, 0x0730, 0x09EE, 0x1E10}; 609 610 static const UChar datastr[][5] = 611 { {0x0061, 0x030A, 0x1E05, 0x0302, 0}, 612 {0x0061, 0x030A, 0x00E2, 0x0323, 0}, 613 {0x0061, 0x0323, 0x00E2, 0x0323, 0}, 614 {0x0061, 0x0323, 0x1E05, 0x0302, 0} }; 615 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES}; 616 617 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 618 0x6a, 619 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 620 0xea, 621 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 622 0x0307, 0x0308, 0x0309, 0x030a, 623 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 624 0x0327, 0x0328, 0x0329, 0x032a, 625 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06, 626 0x1e07, 0x1e08, 0x1e09, 0x1e0a}; 627 628 int count = 0; 629 630 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES) 631 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n"); 632 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO) 633 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n"); 634 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES) 635 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n"); 636 637 if (U_FAILURE(status)) 638 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status)); 639 640 while (count < 4) 641 { 642 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status); 643 if (U_FAILURE(status)) { 644 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count); 645 break; 646 } 647 else { 648 if (result[count] != fcdresult) { 649 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count, 650 result[count]); 651 } 652 } 653 count ++; 654 } 655 656 /* random checks of long strings */ 657 status = U_ZERO_ERROR; 658 srand((unsigned)time( NULL )); 659 660 for (count = 0; count < 50; count ++) 661 { 662 int size = 0; 663 UBool testresult = UNORM_YES; 664 UChar data[20]; 665 UChar norm[100]; 666 UChar nfd[100]; 667 int normsize = 0; 668 int nfdsize = 0; 669 670 while (size != 19) { 671 data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)]; 672 log_verbose("0x%x", data[size]); 673 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0, 674 norm + normsize, 100 - normsize, &status); 675 if (U_FAILURE(status)) { 676 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n"); 677 break; 678 } 679 size ++; 680 } 681 log_verbose("\n"); 682 683 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0, 684 nfd, 100, &status); 685 if (U_FAILURE(status)) { 686 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n"); 687 } 688 689 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) { 690 testresult = UNORM_NO; 691 } 692 if (testresult == UNORM_YES) { 693 log_verbose("result UNORM_YES\n"); 694 } 695 else { 696 log_verbose("result UNORM_NO\n"); 697 } 698 699 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) { 700 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult); 701 } 702 } 703 } 704 705 static void 706 TestAPI() { 707 static const UChar in[]={ 0x68, 0xe4 }; 708 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff }; 709 UErrorCode errorCode; 710 int32_t length; 711 712 /* try preflighting */ 713 errorCode=U_ZERO_ERROR; 714 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode); 715 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { 716 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 717 return; 718 } 719 720 errorCode=U_ZERO_ERROR; 721 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode); 722 if(U_FAILURE(errorCode)) { 723 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode)); 724 return; 725 } 726 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) { 727 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]); 728 return; 729 } 730 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode); 731 if(U_FAILURE(errorCode)) { 732 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode)); 733 return; 734 } 735 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode); 736 if(U_FAILURE(errorCode)) { 737 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode)); 738 return; 739 } 740 } 741 742 /* test cases to improve test code coverage */ 743 enum { 744 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */ 745 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */ 746 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */ 747 748 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */ 749 HANGUL_WEO=0x116f, /* Jamo V U+116f */ 750 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */ 751 752 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */ 753 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */ 754 755 MUSICAL_VOID_NOTEHEAD=0x1d157, 756 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */ 757 MUSICAL_STEM=0x1d165, /* cc=216 */ 758 MUSICAL_STACCATO=0x1d17c /* cc=220 */ 759 }; 760 761 static void 762 TestNormCoverage() { 763 UChar input[1000], expect[1000], output[1000]; 764 UErrorCode errorCode; 765 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength; 766 767 /* create a long and nasty string with NFKC-unsafe characters */ 768 inLength=0; 769 770 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */ 771 input[inLength++]=HANGUL_KIYEOK; 772 input[inLength++]=HANGUL_WEO; 773 input[inLength++]=HANGUL_KIYEOK_SIOS; 774 775 input[inLength++]=HANGUL_KIYEOK; 776 input[inLength++]=HANGUL_WEO; 777 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 778 779 input[inLength++]=HANGUL_KIYEOK; 780 input[inLength++]=HANGUL_K_WEO; 781 input[inLength++]=HANGUL_KIYEOK_SIOS; 782 783 input[inLength++]=HANGUL_KIYEOK; 784 input[inLength++]=HANGUL_K_WEO; 785 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 786 787 input[inLength++]=HANGUL_K_KIYEOK; 788 input[inLength++]=HANGUL_WEO; 789 input[inLength++]=HANGUL_KIYEOK_SIOS; 790 791 input[inLength++]=HANGUL_K_KIYEOK; 792 input[inLength++]=HANGUL_WEO; 793 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 794 795 input[inLength++]=HANGUL_K_KIYEOK; 796 input[inLength++]=HANGUL_K_WEO; 797 input[inLength++]=HANGUL_KIYEOK_SIOS; 798 799 input[inLength++]=HANGUL_K_KIYEOK; 800 input[inLength++]=HANGUL_K_WEO; 801 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 802 803 /* Hangul LV with normal/compatibility Jamo T */ 804 input[inLength++]=HANGUL_AC00; 805 input[inLength++]=HANGUL_KIYEOK_SIOS; 806 807 input[inLength++]=HANGUL_AC00; 808 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 809 810 /* compatibility Jamo L, V */ 811 input[inLength++]=HANGUL_K_KIYEOK; 812 input[inLength++]=HANGUL_K_WEO; 813 814 hangulPrefixLength=inLength; 815 816 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE); 817 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE); 818 for(i=0; i<200; ++i) { 819 input[inLength++]=U16_LEAD(MUSICAL_STACCATO); 820 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO); 821 input[inLength++]=U16_LEAD(MUSICAL_STEM); 822 input[inLength++]=U16_TRAIL(MUSICAL_STEM); 823 } 824 825 /* (compatibility) Jamo L, T do not compose */ 826 input[inLength++]=HANGUL_K_KIYEOK; 827 input[inLength++]=HANGUL_K_KIYEOK_SIOS; 828 829 /* quick checks */ 830 errorCode=U_ZERO_ERROR; 831 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) { 832 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 833 } 834 errorCode=U_ZERO_ERROR; 835 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) { 836 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 837 } 838 errorCode=U_ZERO_ERROR; 839 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) { 840 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 841 } 842 errorCode=U_ZERO_ERROR; 843 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) { 844 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 845 } 846 errorCode=U_ZERO_ERROR; 847 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) { 848 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode)); 849 } 850 851 /* NFKC */ 852 expectLength=0; 853 expect[expectLength++]=HANGUL_SYLLABLE; 854 855 expect[expectLength++]=HANGUL_SYLLABLE; 856 857 expect[expectLength++]=HANGUL_SYLLABLE; 858 859 expect[expectLength++]=HANGUL_SYLLABLE; 860 861 expect[expectLength++]=HANGUL_SYLLABLE; 862 863 expect[expectLength++]=HANGUL_SYLLABLE; 864 865 expect[expectLength++]=HANGUL_SYLLABLE; 866 867 expect[expectLength++]=HANGUL_SYLLABLE; 868 869 expect[expectLength++]=HANGUL_AC00+3; 870 871 expect[expectLength++]=HANGUL_AC00+3; 872 873 expect[expectLength++]=HANGUL_AC00+14*28; 874 875 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD); 876 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD); 877 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 878 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 879 for(i=0; i<200; ++i) { 880 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 881 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 882 } 883 for(i=0; i<200; ++i) { 884 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO); 885 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO); 886 } 887 888 expect[expectLength++]=HANGUL_KIYEOK; 889 expect[expectLength++]=HANGUL_KIYEOK_SIOS; 890 891 /* try destination overflow first */ 892 errorCode=U_ZERO_ERROR; 893 preflightLength=unorm_normalize(input, inLength, 894 UNORM_NFKC, 0, 895 output, 100, /* too short */ 896 &errorCode); 897 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { 898 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode)); 899 } 900 901 /* real NFKC */ 902 errorCode=U_ZERO_ERROR; 903 length=unorm_normalize(input, inLength, 904 UNORM_NFKC, 0, 905 output, UPRV_LENGTHOF(output), 906 &errorCode); 907 if(U_FAILURE(errorCode)) { 908 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode)); 909 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { 910 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n"); 911 for(i=0; i<length; ++i) { 912 if(output[i]!=expect[i]) { 913 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]); 914 break; 915 } 916 } 917 } 918 if(length!=preflightLength) { 919 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength); 920 } 921 922 /* FCD */ 923 u_memcpy(expect, input, hangulPrefixLength); 924 expectLength=hangulPrefixLength; 925 926 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD); 927 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD); 928 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 929 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 930 for(i=0; i<200; ++i) { 931 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); 932 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); 933 } 934 for(i=0; i<200; ++i) { 935 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO); 936 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO); 937 } 938 939 expect[expectLength++]=HANGUL_K_KIYEOK; 940 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS; 941 942 errorCode=U_ZERO_ERROR; 943 length=unorm_normalize(input, inLength, 944 UNORM_FCD, 0, 945 output, UPRV_LENGTHOF(output), 946 &errorCode); 947 if(U_FAILURE(errorCode)) { 948 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode)); 949 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { 950 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n"); 951 for(i=0; i<length; ++i) { 952 if(output[i]!=expect[i]) { 953 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]); 954 break; 955 } 956 } 957 } 958 } 959 960 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */ 961 static void 962 TestConcatenate(void) { 963 /* "re + 'sume'" */ 964 static const UChar 965 left[]={ 966 0x72, 0x65, 0 967 }, 968 right[]={ 969 0x301, 0x73, 0x75, 0x6d, 0xe9, 0 970 }, 971 expect[]={ 972 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0 973 }; 974 975 UChar buffer[100]; 976 UErrorCode errorCode; 977 int32_t length; 978 979 /* left with length, right NUL-terminated */ 980 errorCode=U_ZERO_ERROR; 981 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode); 982 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) { 983 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 984 } 985 986 /* preflighting */ 987 errorCode=U_ZERO_ERROR; 988 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode); 989 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) { 990 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 991 } 992 993 buffer[2]=0x5555; 994 errorCode=U_ZERO_ERROR; 995 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode); 996 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) { 997 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 998 } 999 1000 /* enter with U_FAILURE */ 1001 buffer[2]=0xaaaa; 1002 errorCode=U_UNEXPECTED_TOKEN; 1003 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode); 1004 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) { 1005 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode)); 1006 } 1007 1008 /* illegal arguments */ 1009 buffer[2]=0xaaaa; 1010 errorCode=U_ZERO_ERROR; 1011 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode); 1012 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) { 1013 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 1014 } 1015 1016 errorCode=U_ZERO_ERROR; 1017 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode); 1018 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1019 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); 1020 } 1021 } 1022 1023 enum { 1024 _PLUS=0x2b 1025 }; 1026 1027 static const char *const _modeString[UNORM_MODE_COUNT]={ 1028 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD" 1029 }; 1030 1031 static void 1032 _testIter(const UChar *src, int32_t srcLength, 1033 UCharIterator *iter, UNormalizationMode mode, UBool forward, 1034 const UChar *out, int32_t outLength, 1035 const int32_t *srcIndexes, int32_t srcIndexesLength) { 1036 UChar buffer[4]; 1037 const UChar *expect, *outLimit, *in; 1038 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength; 1039 UErrorCode errorCode; 1040 UBool neededToNormalize, expectNeeded; 1041 1042 errorCode=U_ZERO_ERROR; 1043 outLimit=out+outLength; 1044 if(forward) { 1045 expect=out; 1046 i=index=0; 1047 } else { 1048 expect=outLimit; 1049 i=srcIndexesLength-2; 1050 index=srcLength; 1051 } 1052 1053 for(;;) { 1054 prevIndex=index; 1055 if(forward) { 1056 if(!iter->hasNext(iter)) { 1057 return; 1058 } 1059 length=unorm_next(iter, 1060 buffer, UPRV_LENGTHOF(buffer), 1061 mode, 0, 1062 (UBool)(out!=NULL), &neededToNormalize, 1063 &errorCode); 1064 expectIndex=srcIndexes[i+1]; 1065 in=src+prevIndex; 1066 inLength=expectIndex-prevIndex; 1067 1068 if(out!=NULL) { 1069 /* get output piece from between plus signs */ 1070 expectLength=0; 1071 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) { 1072 ++expectLength; 1073 } 1074 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); 1075 } else { 1076 expect=in; 1077 expectLength=inLength; 1078 expectNeeded=FALSE; 1079 } 1080 } else { 1081 if(!iter->hasPrevious(iter)) { 1082 return; 1083 } 1084 length=unorm_previous(iter, 1085 buffer, UPRV_LENGTHOF(buffer), 1086 mode, 0, 1087 (UBool)(out!=NULL), &neededToNormalize, 1088 &errorCode); 1089 expectIndex=srcIndexes[i]; 1090 in=src+expectIndex; 1091 inLength=prevIndex-expectIndex; 1092 1093 if(out!=NULL) { 1094 /* get output piece from between plus signs */ 1095 expectLength=0; 1096 while(expect!=out && expect[-1]!=_PLUS) { 1097 ++expectLength; 1098 --expect; 1099 } 1100 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); 1101 } else { 1102 expect=in; 1103 expectLength=inLength; 1104 expectNeeded=FALSE; 1105 } 1106 } 1107 index=iter->getIndex(iter, UITER_CURRENT); 1108 1109 if(U_FAILURE(errorCode)) { 1110 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n", 1111 forward, _modeString[mode], i, u_errorName(errorCode)); 1112 return; 1113 } 1114 if(expectIndex!=index) { 1115 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n", 1116 forward, _modeString[mode], i, index, expectIndex); 1117 return; 1118 } 1119 if(expectLength!=length) { 1120 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n", 1121 forward, _modeString[mode], i, length, expectLength); 1122 return; 1123 } 1124 if(0!=u_memcmp(expect, buffer, length)) { 1125 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n", 1126 forward, _modeString[mode], i); 1127 return; 1128 } 1129 if(neededToNormalize!=expectNeeded) { 1130 } 1131 1132 if(forward) { 1133 expect+=expectLength+1; /* go after the + */ 1134 ++i; 1135 } else { 1136 --expect; /* go before the + */ 1137 --i; 1138 } 1139 } 1140 } 1141 1142 static void 1143 TestNextPrevious() { 1144 static const UChar 1145 src[]={ /* input string */ 1146 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133 1147 }, 1148 nfd[]={ /* + separates expected output pieces */ 1149 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133 1150 }, 1151 nfkd[]={ 1152 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa 1153 }, 1154 nfc[]={ 1155 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133 1156 }, 1157 nfkc[]={ 1158 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03 1159 }, 1160 fcd[]={ 1161 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133 1162 }; 1163 1164 /* expected iterator indexes in the source string for each iteration piece */ 1165 static const int32_t 1166 nfdIndexes[]={ 1167 0, 1, 2, 5, 6, 7 1168 }, 1169 nfkdIndexes[]={ 1170 0, 1, 2, 5, 6, 7 1171 }, 1172 nfcIndexes[]={ 1173 0, 1, 2, 5, 6, 7 1174 }, 1175 nfkcIndexes[]={ 1176 0, 1, 2, 5, 7 1177 }, 1178 fcdIndexes[]={ 1179 0, 1, 2, 5, 6, 7 1180 }; 1181 1182 UCharIterator iter; 1183 1184 UChar buffer[4]; 1185 int32_t length; 1186 1187 UBool neededToNormalize; 1188 UErrorCode errorCode; 1189 1190 uiter_setString(&iter, src, UPRV_LENGTHOF(src)); 1191 1192 /* test iteration with doNormalize */ 1193 iter.index=0; 1194 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4); 1195 iter.index=0; 1196 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4); 1197 iter.index=0; 1198 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4); 1199 iter.index=0; 1200 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4); 1201 iter.index=0; 1202 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4); 1203 1204 iter.index=iter.length; 1205 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4); 1206 iter.index=iter.length; 1207 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4); 1208 iter.index=iter.length; 1209 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4); 1210 iter.index=iter.length; 1211 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4); 1212 iter.index=iter.length; 1213 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4); 1214 1215 /* test iteration without doNormalize */ 1216 iter.index=0; 1217 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4); 1218 iter.index=0; 1219 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4); 1220 iter.index=0; 1221 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4); 1222 iter.index=0; 1223 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4); 1224 iter.index=0; 1225 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4); 1226 1227 iter.index=iter.length; 1228 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4); 1229 iter.index=iter.length; 1230 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4); 1231 iter.index=iter.length; 1232 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4); 1233 iter.index=iter.length; 1234 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4); 1235 iter.index=iter.length; 1236 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4); 1237 1238 /* try without neededToNormalize */ 1239 errorCode=U_ZERO_ERROR; 1240 buffer[0]=5; 1241 iter.index=1; 1242 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer), 1243 UNORM_NFD, 0, TRUE, NULL, 1244 &errorCode); 1245 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) { 1246 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode)); 1247 return; 1248 } 1249 1250 /* preflight */ 1251 neededToNormalize=9; 1252 iter.index=1; 1253 length=unorm_next(&iter, NULL, 0, 1254 UNORM_NFD, 0, TRUE, &neededToNormalize, 1255 &errorCode); 1256 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) { 1257 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode)); 1258 return; 1259 } 1260 1261 errorCode=U_ZERO_ERROR; 1262 buffer[0]=buffer[1]=5; 1263 neededToNormalize=9; 1264 iter.index=1; 1265 length=unorm_next(&iter, buffer, 1, 1266 UNORM_NFD, 0, TRUE, &neededToNormalize, 1267 &errorCode); 1268 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) { 1269 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode)); 1270 return; 1271 } 1272 1273 /* no iterator */ 1274 errorCode=U_ZERO_ERROR; 1275 buffer[0]=buffer[1]=5; 1276 neededToNormalize=9; 1277 iter.index=1; 1278 length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer), 1279 UNORM_NFD, 0, TRUE, &neededToNormalize, 1280 &errorCode); 1281 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1282 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode)); 1283 return; 1284 } 1285 1286 /* illegal mode */ 1287 buffer[0]=buffer[1]=5; 1288 neededToNormalize=9; 1289 iter.index=1; 1290 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer), 1291 (UNormalizationMode)0, 0, TRUE, &neededToNormalize, 1292 &errorCode); 1293 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1294 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode)); 1295 return; 1296 } 1297 1298 /* error coming in */ 1299 errorCode=U_MISPLACED_QUANTIFIER; 1300 buffer[0]=5; 1301 iter.index=1; 1302 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer), 1303 UNORM_NFD, 0, TRUE, NULL, 1304 &errorCode); 1305 if(errorCode!=U_MISPLACED_QUANTIFIER) { 1306 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode)); 1307 return; 1308 } 1309 } 1310 1311 static void 1312 TestFCNFKCClosure(void) { 1313 static const struct { 1314 UChar32 c; 1315 const UChar s[6]; 1316 } tests[]={ 1317 { 0x00C4, { 0 } }, 1318 { 0x00E4, { 0 } }, 1319 { 0x037A, { 0x0020, 0x03B9, 0 } }, 1320 { 0x03D2, { 0x03C5, 0 } }, 1321 { 0x20A8, { 0x0072, 0x0073, 0 } }, 1322 { 0x210B, { 0x0068, 0 } }, 1323 { 0x210C, { 0x0068, 0 } }, 1324 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } }, 1325 { 0x2122, { 0x0074, 0x006D, 0 } }, 1326 { 0x2128, { 0x007A, 0 } }, 1327 { 0x1D5DB, { 0x0068, 0 } }, 1328 { 0x1D5ED, { 0x007A, 0 } }, 1329 { 0x0061, { 0 } } 1330 }; 1331 1332 UChar buffer[8]; 1333 UErrorCode errorCode; 1334 int32_t i, length; 1335 1336 for(i=0; i<UPRV_LENGTHOF(tests); ++i) { 1337 errorCode=U_ZERO_ERROR; 1338 length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode); 1339 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) { 1340 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode)); 1341 } 1342 } 1343 1344 /* error handling */ 1345 errorCode=U_ZERO_ERROR; 1346 length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode); 1347 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1348 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode)); 1349 } 1350 1351 length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode); 1352 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1353 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode)); 1354 } 1355 } 1356 1357 static void 1358 TestQuickCheckPerCP() { 1359 UErrorCode errorCode; 1360 UChar32 c, lead, trail; 1361 UChar s[U16_MAX_LENGTH], nfd[16]; 1362 int32_t length, lccc1, lccc2, tccc1, tccc2; 1363 int32_t qc1, qc2; 1364 1365 if( 1366 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES || 1367 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES || 1368 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE || 1369 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE || 1370 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) || 1371 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) 1372 ) { 1373 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n"); 1374 } 1375 1376 /* 1377 * compare the quick check property values for some code points 1378 * to the quick check results for checking same-code point strings 1379 */ 1380 errorCode=U_ZERO_ERROR; 1381 c=0; 1382 while(c<0x110000) { 1383 length=0; 1384 U16_APPEND_UNSAFE(s, length, c); 1385 1386 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK); 1387 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode); 1388 if(qc1!=qc2) { 1389 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1390 } 1391 1392 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK); 1393 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode); 1394 if(qc1!=qc2) { 1395 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1396 } 1397 1398 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK); 1399 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode); 1400 if(qc1!=qc2) { 1401 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1402 } 1403 1404 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK); 1405 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode); 1406 if(qc1!=qc2) { 1407 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); 1408 } 1409 1410 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode); 1411 /* length-length == 0 is used to get around a compiler warning. */ 1412 U16_GET(nfd, 0, length-length, length, lead); 1413 U16_GET(nfd, 0, length-1, length, trail); 1414 1415 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS); 1416 lccc2=u_getCombiningClass(lead); 1417 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS); 1418 tccc2=u_getCombiningClass(trail); 1419 1420 if(lccc1!=lccc2) { 1421 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n", 1422 lccc1, lccc2, c); 1423 } 1424 if(tccc1!=tccc2) { 1425 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n", 1426 tccc1, tccc2, c); 1427 } 1428 1429 /* skip some code points */ 1430 c=(20*c)/19+1; 1431 } 1432 } 1433 1434 static void 1435 TestComposition(void) { 1436 static const struct { 1437 UNormalizationMode mode; 1438 uint32_t options; 1439 UChar input[12]; 1440 UChar expect[12]; 1441 } cases[]={ 1442 /* 1443 * special cases for UAX #15 bug 1444 * see Unicode Corrigendum #5: Normalization Idempotency 1445 * at http://unicode.org/versions/corrigendum5.html 1446 * (was Public Review Issue #29) 1447 */ 1448 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } }, 1449 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } }, 1450 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } }, 1451 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } }, 1452 1453 /* TODO: add test cases for UNORM_FCC here (j2151) */ 1454 }; 1455 1456 UChar output[16]; 1457 UErrorCode errorCode; 1458 int32_t i, length; 1459 1460 for(i=0; i<UPRV_LENGTHOF(cases); ++i) { 1461 errorCode=U_ZERO_ERROR; 1462 length=unorm_normalize( 1463 cases[i].input, -1, 1464 cases[i].mode, cases[i].options, 1465 output, UPRV_LENGTHOF(output), 1466 &errorCode); 1467 if( U_FAILURE(errorCode) || 1468 length!=u_strlen(cases[i].expect) || 1469 0!=u_memcmp(output, cases[i].expect, length) 1470 ) { 1471 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i); 1472 } 1473 } 1474 } 1475 1476 static void 1477 TestGetDecomposition() { 1478 UChar decomp[32]; 1479 int32_t length; 1480 1481 UErrorCode errorCode=U_ZERO_ERROR; 1482 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode); 1483 if(U_FAILURE(errorCode)) { 1484 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode)); 1485 return; 1486 } 1487 1488 length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1489 if(U_FAILURE(errorCode) || length>=0) { 1490 log_err("unorm2_getDecomposition(fcc, space) failed\n"); 1491 } 1492 errorCode=U_ZERO_ERROR; 1493 length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1494 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) { 1495 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n"); 1496 } 1497 errorCode=U_ZERO_ERROR; 1498 length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1499 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) { 1500 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n"); 1501 } 1502 errorCode=U_ZERO_ERROR; 1503 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode); 1504 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { 1505 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n"); 1506 } 1507 errorCode=U_ZERO_ERROR; 1508 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode); 1509 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1510 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n"); 1511 } 1512 errorCode=U_ZERO_ERROR; 1513 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode); 1514 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1515 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n"); 1516 } 1517 } 1518 1519 static void 1520 TestGetRawDecomposition() { 1521 UChar decomp[32]; 1522 int32_t length; 1523 1524 UErrorCode errorCode=U_ZERO_ERROR; 1525 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode); 1526 if(U_FAILURE(errorCode)) { 1527 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode)); 1528 return; 1529 } 1530 /* 1531 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values, 1532 * without recursive decomposition. 1533 */ 1534 1535 length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1536 if(U_FAILURE(errorCode) || length>=0) { 1537 log_err("unorm2_getDecomposition(nfkc, space) failed\n"); 1538 } 1539 errorCode=U_ZERO_ERROR; 1540 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1541 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) { 1542 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n"); 1543 } 1544 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */ 1545 errorCode=U_ZERO_ERROR; 1546 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1547 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) { 1548 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n"); 1549 } 1550 /* U+212B ANGSTROM SIGN */ 1551 errorCode=U_ZERO_ERROR; 1552 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1553 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) { 1554 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n"); 1555 } 1556 errorCode=U_ZERO_ERROR; 1557 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1558 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) { 1559 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n"); 1560 } 1561 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */ 1562 errorCode=U_ZERO_ERROR; 1563 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode); 1564 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) { 1565 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n"); 1566 } 1567 errorCode=U_ZERO_ERROR; 1568 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode); 1569 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) { 1570 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n"); 1571 } 1572 errorCode=U_ZERO_ERROR; 1573 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode); 1574 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1575 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n"); 1576 } 1577 errorCode=U_ZERO_ERROR; 1578 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode); 1579 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 1580 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n"); 1581 } 1582 } 1583 1584 static void 1585 TestAppendRestoreMiddle() { 1586 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */ 1587 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */ 1588 /* NFC: C5 is 'A with ring above' */ 1589 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 }; 1590 int32_t length; 1591 UErrorCode errorCode=U_ZERO_ERROR; 1592 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); 1593 if(U_FAILURE(errorCode)) { 1594 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode)); 1595 return; 1596 } 1597 /* 1598 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity. 1599 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A> 1600 * still fits into a[] but the full result still overflows this capacity. 1601 * (Let it modify the destination buffer before reallocating internally.) 1602 */ 1603 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode); 1604 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) { 1605 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length); 1606 return; 1607 } 1608 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */ 1609 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) { 1610 log_err("unorm2_append(overflow) modified the first string\n"); 1611 return; 1612 } 1613 errorCode=U_ZERO_ERROR; 1614 length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode); 1615 if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) { 1616 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length); 1617 return; 1618 } 1619 } 1620 1621 static void 1622 TestGetEasyToUseInstance() { 1623 static const UChar in[]={ 1624 0xA0, /* -> <noBreak> 0020 */ 1625 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */ 1626 }; 1627 UChar out[32]; 1628 int32_t length; 1629 1630 UErrorCode errorCode=U_ZERO_ERROR; 1631 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); 1632 if(U_FAILURE(errorCode)) { 1633 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode)); 1634 return; 1635 } 1636 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode); 1637 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) { 1638 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n", 1639 (int)length, u_errorName(errorCode)); 1640 } 1641 1642 errorCode=U_ZERO_ERROR; 1643 n2=unorm2_getNFDInstance(&errorCode); 1644 if(U_FAILURE(errorCode)) { 1645 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode)); 1646 return; 1647 } 1648 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode); 1649 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) { 1650 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n", 1651 (int)length, u_errorName(errorCode)); 1652 } 1653 1654 errorCode=U_ZERO_ERROR; 1655 n2=unorm2_getNFKCInstance(&errorCode); 1656 if(U_FAILURE(errorCode)) { 1657 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode)); 1658 return; 1659 } 1660 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode); 1661 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) { 1662 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n", 1663 (int)length, u_errorName(errorCode)); 1664 } 1665 1666 errorCode=U_ZERO_ERROR; 1667 n2=unorm2_getNFKDInstance(&errorCode); 1668 if(U_FAILURE(errorCode)) { 1669 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode)); 1670 return; 1671 } 1672 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode); 1673 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) { 1674 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n", 1675 (int)length, u_errorName(errorCode)); 1676 } 1677 1678 errorCode=U_ZERO_ERROR; 1679 n2=unorm2_getNFKCCasefoldInstance(&errorCode); 1680 if(U_FAILURE(errorCode)) { 1681 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode)); 1682 return; 1683 } 1684 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode); 1685 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) { 1686 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n", 1687 (int)length, u_errorName(errorCode)); 1688 } 1689 } 1690 1691 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 1692