Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2014, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /********************************************************************************
      7 *
      8 * File CNORMTST.C
      9 *
     10 * Modification History:
     11 *        Name                     Description
     12 *     Madhu Katragadda            Ported for C API
     13 *     synwee                      added test for quick check
     14 *     synwee                      added test for checkFCD
     15 *********************************************************************************/
     16 /*tests for u_normalization*/
     17 #include "unicode/utypes.h"
     18 #include "unicode/unorm.h"
     19 #include "unicode/utf16.h"
     20 #include "cintltst.h"
     21 #include "cmemory.h"
     22 
     23 #if !UCONFIG_NO_NORMALIZATION
     24 
     25 #include <stdlib.h>
     26 #include <time.h>
     27 #include "unicode/uchar.h"
     28 #include "unicode/ustring.h"
     29 #include "unicode/unorm.h"
     30 #include "cnormtst.h"
     31 
     32 static void
     33 TestAPI(void);
     34 
     35 static void
     36 TestNormCoverage(void);
     37 
     38 static void
     39 TestConcatenate(void);
     40 
     41 static void
     42 TestNextPrevious(void);
     43 
     44 static void TestIsNormalized(void);
     45 
     46 static void
     47 TestFCNFKCClosure(void);
     48 
     49 static void
     50 TestQuickCheckPerCP(void);
     51 
     52 static void
     53 TestComposition(void);
     54 
     55 static void
     56 TestFCD(void);
     57 
     58 static void
     59 TestGetDecomposition(void);
     60 
     61 static void
     62 TestGetRawDecomposition(void);
     63 
     64 static void TestAppendRestoreMiddle(void);
     65 static void TestGetEasyToUseInstance(void);
     66 
     67 static const char* const canonTests[][3] = {
     68     /* Input*/                    /*Decomposed*/                /*Composed*/
     69     { "cat",                    "cat",                        "cat"                    },
     70     { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
     71 
     72     { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
     73     { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
     74 
     75     { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
     76     { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
     77     { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
     78 
     79     { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
     80     { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
     81 
     82     { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
     83     { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
     84     { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
     85 
     86     { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
     87     { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
     88 
     89     { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
     90     { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
     91 
     92     { "Henry IV",                "Henry IV",                    "Henry IV"                },
     93     { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
     94 
     95     { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
     96     { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
     97     { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
     98     { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
     99     { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
    100     { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
    101     { "", "", "" }
    102 };
    103 
    104 static const char* const compatTests[][3] = {
    105     /* Input*/                        /*Decomposed    */                /*Composed*/
    106     { "cat",                        "cat",                            "cat"                },
    107 
    108     { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
    109 
    110     { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
    111     { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
    112 
    113     { "Henry IV",                    "Henry IV",                        "Henry IV"            },
    114     { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
    115 
    116     { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
    117     { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
    118 
    119     { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
    120 
    121     /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
    122     { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
    123     { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
    124     { "", "", "" }
    125 };
    126 
    127 static const char* const fcdTests[][3] = {
    128     /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
    129     { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
    130     { "\\u010e", "\\u010e", NULL }  /* D-caron */
    131 };
    132 
    133 void addNormTest(TestNode** root);
    134 
    135 void addNormTest(TestNode** root)
    136 {
    137     addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
    138     addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
    139     addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
    140     addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
    141     addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
    142     addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
    143     addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
    144     addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
    145     addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
    146     addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
    147     addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
    148     addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
    149     addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
    150     addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
    151     addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
    152     addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
    153     addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
    154     addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
    155     addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
    156     addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
    157 }
    158 
    159 static const char* const modeStrings[]={
    160     "UNORM_NONE",
    161     "UNORM_NFD",
    162     "UNORM_NFKD",
    163     "UNORM_NFC",
    164     "UNORM_NFKC",
    165     "UNORM_FCD",
    166     "UNORM_MODE_COUNT"
    167 };
    168 
    169 static void TestNormCases(UNormalizationMode mode,
    170                           const char* const cases[][3], int32_t lengthOfCases) {
    171     int32_t x, neededLen, length2;
    172     int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
    173     UChar *source=NULL;
    174     UChar result[16];
    175     log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
    176     for(x=0; x < lengthOfCases; x++)
    177     {
    178         UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
    179         source=CharsToUChars(cases[x][0]);
    180         neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
    181         length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
    182         if(neededLen!=length2) {
    183           log_err("ERROR in unorm_normalize(%s)[%d]: "
    184                   "preflight length/NUL %d!=%d preflight length/srcLength\n",
    185                   modeStrings[mode], (int)x, (int)neededLen, (int)length2);
    186         }
    187         if(status==U_BUFFER_OVERFLOW_ERROR)
    188         {
    189             status=U_ZERO_ERROR;
    190         }
    191         length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
    192         if(U_FAILURE(status) || neededLen!=length2) {
    193             log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
    194                          modeStrings[mode], austrdup(source), myErrorName(status));
    195         } else {
    196             assertEqual(result, cases[x][expIndex], x);
    197         }
    198         length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
    199         if(U_FAILURE(status) || neededLen!=length2) {
    200             log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
    201                          modeStrings[mode], austrdup(source), myErrorName(status));
    202         } else {
    203             assertEqual(result, cases[x][expIndex], x);
    204         }
    205         free(source);
    206     }
    207 }
    208 
    209 void TestDecomp() {
    210     TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
    211 }
    212 
    213 void TestCompatDecomp() {
    214     TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
    215 }
    216 
    217 void TestCanonDecompCompose() {
    218     TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
    219 }
    220 
    221 void TestCompatDecompCompose() {
    222     TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
    223 }
    224 
    225 void TestFCD() {
    226     TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
    227 }
    228 
    229 static void assertEqual(const UChar* result, const char* expected, int32_t index)
    230 {
    231     UChar *expectedUni = CharsToUChars(expected);
    232     if(u_strcmp(result, expectedUni)!=0){
    233         log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
    234             austrdup(result) );
    235     }
    236     free(expectedUni);
    237 }
    238 
    239 static void TestNull_check(UChar *src, int32_t srcLen,
    240                     UChar *exp, int32_t expLen,
    241                     UNormalizationMode mode,
    242                     const char *name)
    243 {
    244     UErrorCode status = U_ZERO_ERROR;
    245     int32_t len, i;
    246 
    247     UChar   result[50];
    248 
    249 
    250     status = U_ZERO_ERROR;
    251 
    252     for(i=0;i<50;i++)
    253       {
    254         result[i] = 0xFFFD;
    255       }
    256 
    257     len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
    258 
    259     if(U_FAILURE(status)) {
    260       log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
    261     } else if (len != expLen) {
    262       log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
    263     }
    264 
    265     {
    266       for(i=0;i<len;i++){
    267         if(exp[i] != result[i]) {
    268           log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
    269                   name,
    270                   i,
    271                   exp[i],
    272                   result[i]);
    273           return;
    274         }
    275         log_verbose("     %d: \\u%04X\n", i, result[i]);
    276       }
    277     }
    278 
    279     log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
    280 }
    281 
    282 void TestNull()
    283 {
    284 
    285     UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
    286     int32_t source_comp_len = 4;
    287     UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
    288     int32_t expect_comp_len = 3;
    289 
    290     UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
    291     int32_t source_dcmp_len = 3;
    292     UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
    293     int32_t expect_dcmp_len = 5;
    294 
    295     TestNull_check(source_comp,
    296                    source_comp_len,
    297                    expect_comp,
    298                    expect_comp_len,
    299                    UNORM_NFC,
    300                    "UNORM_NFC");
    301 
    302     TestNull_check(source_dcmp,
    303                    source_dcmp_len,
    304                    expect_dcmp,
    305                    expect_dcmp_len,
    306                    UNORM_NFD,
    307                    "UNORM_NFD");
    308 
    309     TestNull_check(source_comp,
    310                    source_comp_len,
    311                    expect_comp,
    312                    expect_comp_len,
    313                    UNORM_NFKC,
    314                    "UNORM_NFKC");
    315 
    316 
    317 }
    318 
    319 static void TestQuickCheckResultNO()
    320 {
    321   const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
    322                          0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
    323   const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
    324                           0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
    325   const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
    326                            0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
    327   const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
    328                            0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
    329 
    330 
    331   const int SIZE = 10;
    332 
    333   int count = 0;
    334   UErrorCode error = U_ZERO_ERROR;
    335 
    336   for (; count < SIZE; count ++)
    337   {
    338     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
    339                                                               UNORM_NO)
    340     {
    341       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
    342       return;
    343     }
    344     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
    345                                                               UNORM_NO)
    346     {
    347       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
    348       return;
    349     }
    350     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
    351                                                               UNORM_NO)
    352     {
    353       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
    354       return;
    355     }
    356     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
    357                                                               UNORM_NO)
    358     {
    359       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
    360       return;
    361     }
    362   }
    363 }
    364 
    365 
    366 static void TestQuickCheckResultYES()
    367 {
    368   const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
    369                          0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
    370   const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
    371                          0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
    372   const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
    373                           0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
    374   const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
    375                           0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
    376 
    377   const int SIZE = 10;
    378   int count = 0;
    379   UErrorCode error = U_ZERO_ERROR;
    380 
    381   UChar cp = 0;
    382   while (cp < 0xA0)
    383   {
    384     if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
    385     {
    386       log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
    387       return;
    388     }
    389     if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
    390                                                              UNORM_YES)
    391     {
    392       log_err("ERROR in NFC quick check at U+%04x\n", cp);
    393       return;
    394     }
    395     if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
    396     {
    397       log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
    398       return;
    399     }
    400     if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
    401                                                              UNORM_YES)
    402     {
    403       log_err("ERROR in NFKC quick check at U+%04x\n", cp);
    404       return;
    405     }
    406     cp ++;
    407   }
    408 
    409   for (; count < SIZE; count ++)
    410   {
    411     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
    412                                                              UNORM_YES)
    413     {
    414       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
    415       return;
    416     }
    417     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
    418                                                           != UNORM_YES)
    419     {
    420       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
    421       return;
    422     }
    423     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
    424                                                              UNORM_YES)
    425     {
    426       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
    427       return;
    428     }
    429     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
    430                                                              UNORM_YES)
    431     {
    432       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
    433       return;
    434     }
    435   }
    436 }
    437 
    438 static void TestQuickCheckResultMAYBE()
    439 {
    440   const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
    441                          0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
    442   const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
    443                           0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
    444 
    445 
    446   const int SIZE = 10;
    447 
    448   int count = 0;
    449   UErrorCode error = U_ZERO_ERROR;
    450 
    451   /* NFD and NFKD does not have any MAYBE codepoints */
    452   for (; count < SIZE; count ++)
    453   {
    454     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
    455                                                            UNORM_MAYBE)
    456     {
    457       log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
    458       return;
    459     }
    460     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
    461                                                            UNORM_MAYBE)
    462     {
    463       log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
    464       return;
    465     }
    466   }
    467 }
    468 
    469 static void TestQuickCheckStringResult()
    470 {
    471   int count;
    472   UChar *d = NULL;
    473   UChar *c = NULL;
    474   UErrorCode error = U_ZERO_ERROR;
    475 
    476   for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
    477   {
    478     d = CharsToUChars(canonTests[count][1]);
    479     c = CharsToUChars(canonTests[count][2]);
    480     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
    481                                                             UNORM_YES)
    482     {
    483       log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
    484       return;
    485     }
    486 
    487     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
    488                                                             UNORM_NO)
    489     {
    490       log_err("ERROR in NFC quick check for string at count %d\n", count);
    491       return;
    492     }
    493 
    494     free(d);
    495     free(c);
    496   }
    497 
    498   for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
    499   {
    500     d = CharsToUChars(compatTests[count][1]);
    501     c = CharsToUChars(compatTests[count][2]);
    502     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
    503                                                             UNORM_YES)
    504     {
    505       log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
    506       return;
    507     }
    508 
    509     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
    510                                                             UNORM_YES)
    511     {
    512       log_err("ERROR in NFKC quick check for string at count %d\n", count);
    513       return;
    514     }
    515 
    516     free(d);
    517     free(c);
    518   }
    519 }
    520 
    521 void TestQuickCheck()
    522 {
    523   TestQuickCheckResultNO();
    524   TestQuickCheckResultYES();
    525   TestQuickCheckResultMAYBE();
    526   TestQuickCheckStringResult();
    527 }
    528 
    529 /*
    530  * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
    531  * normalized, and some that are not.
    532  * Here we pick some specific cases and test the C API.
    533  */
    534 static void TestIsNormalized(void) {
    535     static const UChar notNFC[][8]={            /* strings that are not in NFC */
    536         { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
    537         { 0xfb1d, 0 },                          /* excluded from composition */
    538         { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
    539         { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
    540     };
    541     static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
    542         { 0x1100, 0x1161, 0 },                  /* Jamo compose */
    543         { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
    544         { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
    545     };
    546 
    547     int32_t i;
    548     UErrorCode errorCode;
    549 
    550     /* API test */
    551 
    552     /* normal case with length>=0 (length -1 used for special cases below) */
    553     errorCode=U_ZERO_ERROR;
    554     if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
    555         log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
    556     }
    557 
    558     /* incoming U_FAILURE */
    559     errorCode=U_TRUNCATED_CHAR_FOUND;
    560     (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
    561     if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
    562         log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
    563     }
    564 
    565     /* NULL source */
    566     errorCode=U_ZERO_ERROR;
    567     (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
    568     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
    569         log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
    570     }
    571 
    572     /* bad length */
    573     errorCode=U_ZERO_ERROR;
    574     (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
    575     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
    576         log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
    577     }
    578 
    579     /* specific cases */
    580     for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
    581         errorCode=U_ZERO_ERROR;
    582         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
    583             log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
    584         }
    585         errorCode=U_ZERO_ERROR;
    586         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
    587             log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
    588         }
    589     }
    590     for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
    591         errorCode=U_ZERO_ERROR;
    592         if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
    593             log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
    594         }
    595     }
    596 }
    597 
    598 void TestCheckFCD()
    599 {
    600   UErrorCode status = U_ZERO_ERROR;
    601   static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
    602                          0x0A};
    603   static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
    604                           0x02B9, 0x0314, 0x0315, 0x0316};
    605   static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
    606                          0x0050, 0x0730, 0x09EE, 0x1E10};
    607 
    608   static const UChar datastr[][5] =
    609   { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
    610     {0x0061, 0x030A, 0x00E2, 0x0323, 0},
    611     {0x0061, 0x0323, 0x00E2, 0x0323, 0},
    612     {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
    613   static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
    614 
    615   static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
    616                             0x6a,
    617                             0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
    618                             0xea,
    619                             0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
    620                             0x0307, 0x0308, 0x0309, 0x030a,
    621                             0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
    622                             0x0327, 0x0328, 0x0329, 0x032a,
    623                             0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
    624                             0x1e07, 0x1e08, 0x1e09, 0x1e0a};
    625 
    626   int count = 0;
    627 
    628   if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
    629     log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
    630   if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
    631     log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
    632   if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
    633     log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
    634 
    635   if (U_FAILURE(status))
    636     log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
    637 
    638   while (count < 4)
    639   {
    640     UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
    641     if (U_FAILURE(status)) {
    642       log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
    643       break;
    644     }
    645     else {
    646       if (result[count] != fcdresult) {
    647         log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
    648                  result[count]);
    649       }
    650     }
    651     count ++;
    652   }
    653 
    654   /* random checks of long strings */
    655   status = U_ZERO_ERROR;
    656   srand((unsigned)time( NULL ));
    657 
    658   for (count = 0; count < 50; count ++)
    659   {
    660     int size = 0;
    661     UBool testresult = UNORM_YES;
    662     UChar data[20];
    663     UChar norm[100];
    664     UChar nfd[100];
    665     int normsize = 0;
    666     int nfdsize = 0;
    667 
    668     while (size != 19) {
    669       data[size] = datachar[(rand() * 50) / RAND_MAX];
    670       log_verbose("0x%x", data[size]);
    671       normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
    672                                   norm + normsize, 100 - normsize, &status);
    673       if (U_FAILURE(status)) {
    674         log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
    675         break;
    676       }
    677       size ++;
    678     }
    679     log_verbose("\n");
    680 
    681     nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
    682                               nfd, 100, &status);
    683     if (U_FAILURE(status)) {
    684       log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
    685     }
    686 
    687     if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
    688       testresult = UNORM_NO;
    689     }
    690     if (testresult == UNORM_YES) {
    691       log_verbose("result UNORM_YES\n");
    692     }
    693     else {
    694       log_verbose("result UNORM_NO\n");
    695     }
    696 
    697     if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
    698       log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
    699     }
    700   }
    701 }
    702 
    703 static void
    704 TestAPI() {
    705     static const UChar in[]={ 0x68, 0xe4 };
    706     UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
    707     UErrorCode errorCode;
    708     int32_t length;
    709 
    710     /* try preflighting */
    711     errorCode=U_ZERO_ERROR;
    712     length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
    713     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
    714         log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
    715         return;
    716     }
    717 
    718     errorCode=U_ZERO_ERROR;
    719     length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
    720     if(U_FAILURE(errorCode)) {
    721         log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
    722         return;
    723     }
    724     if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
    725         log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
    726         return;
    727     }
    728     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
    729     if(U_FAILURE(errorCode)) {
    730         log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
    731         return;
    732     }
    733     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
    734     if(U_FAILURE(errorCode)) {
    735         log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
    736         return;
    737     }
    738 }
    739 
    740 /* test cases to improve test code coverage */
    741 enum {
    742     HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
    743     HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
    744     HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
    745 
    746     HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
    747     HANGUL_WEO=0x116f,              /* Jamo V U+116f */
    748     HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
    749 
    750     HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
    751     HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
    752 
    753     MUSICAL_VOID_NOTEHEAD=0x1d157,
    754     MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
    755     MUSICAL_STEM=0x1d165,       /* cc=216 */
    756     MUSICAL_STACCATO=0x1d17c    /* cc=220 */
    757 };
    758 
    759 static void
    760 TestNormCoverage() {
    761     UChar input[1000], expect[1000], output[1000];
    762     UErrorCode errorCode;
    763     int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
    764 
    765     /* create a long and nasty string with NFKC-unsafe characters */
    766     inLength=0;
    767 
    768     /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
    769     input[inLength++]=HANGUL_KIYEOK;
    770     input[inLength++]=HANGUL_WEO;
    771     input[inLength++]=HANGUL_KIYEOK_SIOS;
    772 
    773     input[inLength++]=HANGUL_KIYEOK;
    774     input[inLength++]=HANGUL_WEO;
    775     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
    776 
    777     input[inLength++]=HANGUL_KIYEOK;
    778     input[inLength++]=HANGUL_K_WEO;
    779     input[inLength++]=HANGUL_KIYEOK_SIOS;
    780 
    781     input[inLength++]=HANGUL_KIYEOK;
    782     input[inLength++]=HANGUL_K_WEO;
    783     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
    784 
    785     input[inLength++]=HANGUL_K_KIYEOK;
    786     input[inLength++]=HANGUL_WEO;
    787     input[inLength++]=HANGUL_KIYEOK_SIOS;
    788 
    789     input[inLength++]=HANGUL_K_KIYEOK;
    790     input[inLength++]=HANGUL_WEO;
    791     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
    792 
    793     input[inLength++]=HANGUL_K_KIYEOK;
    794     input[inLength++]=HANGUL_K_WEO;
    795     input[inLength++]=HANGUL_KIYEOK_SIOS;
    796 
    797     input[inLength++]=HANGUL_K_KIYEOK;
    798     input[inLength++]=HANGUL_K_WEO;
    799     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
    800 
    801     /* Hangul LV with normal/compatibility Jamo T */
    802     input[inLength++]=HANGUL_AC00;
    803     input[inLength++]=HANGUL_KIYEOK_SIOS;
    804 
    805     input[inLength++]=HANGUL_AC00;
    806     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
    807 
    808     /* compatibility Jamo L, V */
    809     input[inLength++]=HANGUL_K_KIYEOK;
    810     input[inLength++]=HANGUL_K_WEO;
    811 
    812     hangulPrefixLength=inLength;
    813 
    814     input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
    815     input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
    816     for(i=0; i<200; ++i) {
    817         input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
    818         input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
    819         input[inLength++]=U16_LEAD(MUSICAL_STEM);
    820         input[inLength++]=U16_TRAIL(MUSICAL_STEM);
    821     }
    822 
    823     /* (compatibility) Jamo L, T do not compose */
    824     input[inLength++]=HANGUL_K_KIYEOK;
    825     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
    826 
    827     /* quick checks */
    828     errorCode=U_ZERO_ERROR;
    829     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
    830         log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
    831     }
    832     errorCode=U_ZERO_ERROR;
    833     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
    834         log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
    835     }
    836     errorCode=U_ZERO_ERROR;
    837     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
    838         log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
    839     }
    840     errorCode=U_ZERO_ERROR;
    841     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
    842         log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
    843     }
    844     errorCode=U_ZERO_ERROR;
    845     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
    846         log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
    847     }
    848 
    849     /* NFKC */
    850     expectLength=0;
    851     expect[expectLength++]=HANGUL_SYLLABLE;
    852 
    853     expect[expectLength++]=HANGUL_SYLLABLE;
    854 
    855     expect[expectLength++]=HANGUL_SYLLABLE;
    856 
    857     expect[expectLength++]=HANGUL_SYLLABLE;
    858 
    859     expect[expectLength++]=HANGUL_SYLLABLE;
    860 
    861     expect[expectLength++]=HANGUL_SYLLABLE;
    862 
    863     expect[expectLength++]=HANGUL_SYLLABLE;
    864 
    865     expect[expectLength++]=HANGUL_SYLLABLE;
    866 
    867     expect[expectLength++]=HANGUL_AC00+3;
    868 
    869     expect[expectLength++]=HANGUL_AC00+3;
    870 
    871     expect[expectLength++]=HANGUL_AC00+14*28;
    872 
    873     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
    874     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
    875     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
    876     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
    877     for(i=0; i<200; ++i) {
    878         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
    879         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
    880     }
    881     for(i=0; i<200; ++i) {
    882         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
    883         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
    884     }
    885 
    886     expect[expectLength++]=HANGUL_KIYEOK;
    887     expect[expectLength++]=HANGUL_KIYEOK_SIOS;
    888 
    889     /* try destination overflow first */
    890     errorCode=U_ZERO_ERROR;
    891     preflightLength=unorm_normalize(input, inLength,
    892                            UNORM_NFKC, 0,
    893                            output, 100, /* too short */
    894                            &errorCode);
    895     if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
    896         log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
    897     }
    898 
    899     /* real NFKC */
    900     errorCode=U_ZERO_ERROR;
    901     length=unorm_normalize(input, inLength,
    902                            UNORM_NFKC, 0,
    903                            output, sizeof(output)/U_SIZEOF_UCHAR,
    904                            &errorCode);
    905     if(U_FAILURE(errorCode)) {
    906         log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
    907     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
    908         log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
    909         for(i=0; i<length; ++i) {
    910             if(output[i]!=expect[i]) {
    911                 log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
    912                 break;
    913             }
    914         }
    915     }
    916     if(length!=preflightLength) {
    917         log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
    918     }
    919 
    920     /* FCD */
    921     u_memcpy(expect, input, hangulPrefixLength);
    922     expectLength=hangulPrefixLength;
    923 
    924     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
    925     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
    926     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
    927     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
    928     for(i=0; i<200; ++i) {
    929         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
    930         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
    931     }
    932     for(i=0; i<200; ++i) {
    933         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
    934         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
    935     }
    936 
    937     expect[expectLength++]=HANGUL_K_KIYEOK;
    938     expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
    939 
    940     errorCode=U_ZERO_ERROR;
    941     length=unorm_normalize(input, inLength,
    942                            UNORM_FCD, 0,
    943                            output, sizeof(output)/U_SIZEOF_UCHAR,
    944                            &errorCode);
    945     if(U_FAILURE(errorCode)) {
    946         log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
    947     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
    948         log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
    949         for(i=0; i<length; ++i) {
    950             if(output[i]!=expect[i]) {
    951                 log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
    952                 break;
    953             }
    954         }
    955     }
    956 }
    957 
    958 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
    959 static void
    960 TestConcatenate(void) {
    961     /* "re + 'sume'" */
    962     static const UChar
    963     left[]={
    964         0x72, 0x65, 0
    965     },
    966     right[]={
    967         0x301, 0x73, 0x75, 0x6d, 0xe9, 0
    968     },
    969     expect[]={
    970         0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
    971     };
    972 
    973     UChar buffer[100];
    974     UErrorCode errorCode;
    975     int32_t length;
    976 
    977     /* left with length, right NUL-terminated */
    978     errorCode=U_ZERO_ERROR;
    979     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
    980     if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
    981         log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
    982     }
    983 
    984     /* preflighting */
    985     errorCode=U_ZERO_ERROR;
    986     length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
    987     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
    988         log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
    989     }
    990 
    991     buffer[2]=0x5555;
    992     errorCode=U_ZERO_ERROR;
    993     length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
    994     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
    995         log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
    996     }
    997 
    998     /* enter with U_FAILURE */
    999     buffer[2]=0xaaaa;
   1000     errorCode=U_UNEXPECTED_TOKEN;
   1001     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
   1002     if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
   1003         log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
   1004     }
   1005 
   1006     /* illegal arguments */
   1007     buffer[2]=0xaaaa;
   1008     errorCode=U_ZERO_ERROR;
   1009     length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
   1010     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
   1011         log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
   1012     }
   1013 
   1014     errorCode=U_ZERO_ERROR;
   1015     length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
   1016     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
   1017         log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
   1018     }
   1019 }
   1020 
   1021 enum {
   1022     _PLUS=0x2b
   1023 };
   1024 
   1025 static const char *const _modeString[UNORM_MODE_COUNT]={
   1026     "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
   1027 };
   1028 
   1029 static void
   1030 _testIter(const UChar *src, int32_t srcLength,
   1031           UCharIterator *iter, UNormalizationMode mode, UBool forward,
   1032           const UChar *out, int32_t outLength,
   1033           const int32_t *srcIndexes, int32_t srcIndexesLength) {
   1034     UChar buffer[4];
   1035     const UChar *expect, *outLimit, *in;
   1036     int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
   1037     UErrorCode errorCode;
   1038     UBool neededToNormalize, expectNeeded;
   1039 
   1040     errorCode=U_ZERO_ERROR;
   1041     outLimit=out+outLength;
   1042     if(forward) {
   1043         expect=out;
   1044         i=index=0;
   1045     } else {
   1046         expect=outLimit;
   1047         i=srcIndexesLength-2;
   1048         index=srcLength;
   1049     }
   1050 
   1051     for(;;) {
   1052         prevIndex=index;
   1053         if(forward) {
   1054             if(!iter->hasNext(iter)) {
   1055                 return;
   1056             }
   1057             length=unorm_next(iter,
   1058                               buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
   1059                               mode, 0,
   1060                               (UBool)(out!=NULL), &neededToNormalize,
   1061                               &errorCode);
   1062             expectIndex=srcIndexes[i+1];
   1063             in=src+prevIndex;
   1064             inLength=expectIndex-prevIndex;
   1065 
   1066             if(out!=NULL) {
   1067                 /* get output piece from between plus signs */
   1068                 expectLength=0;
   1069                 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
   1070                     ++expectLength;
   1071                 }
   1072                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
   1073             } else {
   1074                 expect=in;
   1075                 expectLength=inLength;
   1076                 expectNeeded=FALSE;
   1077             }
   1078         } else {
   1079             if(!iter->hasPrevious(iter)) {
   1080                 return;
   1081             }
   1082             length=unorm_previous(iter,
   1083                                   buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
   1084                                   mode, 0,
   1085                                   (UBool)(out!=NULL), &neededToNormalize,
   1086                                   &errorCode);
   1087             expectIndex=srcIndexes[i];
   1088             in=src+expectIndex;
   1089             inLength=prevIndex-expectIndex;
   1090 
   1091             if(out!=NULL) {
   1092                 /* get output piece from between plus signs */
   1093                 expectLength=0;
   1094                 while(expect!=out && expect[-1]!=_PLUS) {
   1095                     ++expectLength;
   1096                     --expect;
   1097                 }
   1098                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
   1099             } else {
   1100                 expect=in;
   1101                 expectLength=inLength;
   1102                 expectNeeded=FALSE;
   1103             }
   1104         }
   1105         index=iter->getIndex(iter, UITER_CURRENT);
   1106 
   1107         if(U_FAILURE(errorCode)) {
   1108             log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
   1109                     forward, _modeString[mode], i, u_errorName(errorCode));
   1110             return;
   1111         }
   1112         if(expectIndex!=index) {
   1113             log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
   1114                     forward, _modeString[mode], i, index, expectIndex);
   1115             return;
   1116         }
   1117         if(expectLength!=length) {
   1118             log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
   1119                     forward, _modeString[mode], i, length, expectLength);
   1120             return;
   1121         }
   1122         if(0!=u_memcmp(expect, buffer, length)) {
   1123             log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
   1124                     forward, _modeString[mode], i);
   1125             return;
   1126         }
   1127         if(neededToNormalize!=expectNeeded) {
   1128         }
   1129 
   1130         if(forward) {
   1131             expect+=expectLength+1; /* go after the + */
   1132             ++i;
   1133         } else {
   1134             --expect; /* go before the + */
   1135             --i;
   1136         }
   1137     }
   1138 }
   1139 
   1140 static void
   1141 TestNextPrevious() {
   1142     static const UChar
   1143     src[]={ /* input string */
   1144         0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
   1145     },
   1146     nfd[]={ /* + separates expected output pieces */
   1147         0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
   1148     },
   1149     nfkd[]={
   1150         0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
   1151     },
   1152     nfc[]={
   1153         0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
   1154     },
   1155     nfkc[]={
   1156         0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
   1157     },
   1158     fcd[]={
   1159         0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
   1160     };
   1161 
   1162     /* expected iterator indexes in the source string for each iteration piece */
   1163     static const int32_t
   1164     nfdIndexes[]={
   1165         0, 1, 2, 5, 6, 7
   1166     },
   1167     nfkdIndexes[]={
   1168         0, 1, 2, 5, 6, 7
   1169     },
   1170     nfcIndexes[]={
   1171         0, 1, 2, 5, 6, 7
   1172     },
   1173     nfkcIndexes[]={
   1174         0, 1, 2, 5, 7
   1175     },
   1176     fcdIndexes[]={
   1177         0, 1, 2, 5, 6, 7
   1178     };
   1179 
   1180     UCharIterator iter;
   1181 
   1182     UChar buffer[4];
   1183     int32_t length;
   1184 
   1185     UBool neededToNormalize;
   1186     UErrorCode errorCode;
   1187 
   1188     uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR);
   1189 
   1190     /* test iteration with doNormalize */
   1191     iter.index=0;
   1192     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
   1193     iter.index=0;
   1194     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
   1195     iter.index=0;
   1196     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
   1197     iter.index=0;
   1198     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
   1199     iter.index=0;
   1200     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
   1201 
   1202     iter.index=iter.length;
   1203     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
   1204     iter.index=iter.length;
   1205     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
   1206     iter.index=iter.length;
   1207     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
   1208     iter.index=iter.length;
   1209     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
   1210     iter.index=iter.length;
   1211     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
   1212 
   1213     /* test iteration without doNormalize */
   1214     iter.index=0;
   1215     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
   1216     iter.index=0;
   1217     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
   1218     iter.index=0;
   1219     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
   1220     iter.index=0;
   1221     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
   1222     iter.index=0;
   1223     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
   1224 
   1225     iter.index=iter.length;
   1226     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
   1227     iter.index=iter.length;
   1228     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
   1229     iter.index=iter.length;
   1230     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
   1231     iter.index=iter.length;
   1232     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
   1233     iter.index=iter.length;
   1234     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
   1235 
   1236     /* try without neededToNormalize */
   1237     errorCode=U_ZERO_ERROR;
   1238     buffer[0]=5;
   1239     iter.index=1;
   1240     length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
   1241                       UNORM_NFD, 0, TRUE, NULL,
   1242                       &errorCode);
   1243     if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
   1244         log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
   1245         return;
   1246     }
   1247 
   1248     /* preflight */
   1249     neededToNormalize=9;
   1250     iter.index=1;
   1251     length=unorm_next(&iter, NULL, 0,
   1252                       UNORM_NFD, 0, TRUE, &neededToNormalize,
   1253                       &errorCode);
   1254     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
   1255         log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
   1256         return;
   1257     }
   1258 
   1259     errorCode=U_ZERO_ERROR;
   1260     buffer[0]=buffer[1]=5;
   1261     neededToNormalize=9;
   1262     iter.index=1;
   1263     length=unorm_next(&iter, buffer, 1,
   1264                       UNORM_NFD, 0, TRUE, &neededToNormalize,
   1265                       &errorCode);
   1266     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
   1267         log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
   1268         return;
   1269     }
   1270 
   1271     /* no iterator */
   1272     errorCode=U_ZERO_ERROR;
   1273     buffer[0]=buffer[1]=5;
   1274     neededToNormalize=9;
   1275     iter.index=1;
   1276     length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
   1277                       UNORM_NFD, 0, TRUE, &neededToNormalize,
   1278                       &errorCode);
   1279     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
   1280         log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
   1281         return;
   1282     }
   1283 
   1284     /* illegal mode */
   1285     buffer[0]=buffer[1]=5;
   1286     neededToNormalize=9;
   1287     iter.index=1;
   1288     length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
   1289                       (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
   1290                       &errorCode);
   1291     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
   1292         log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
   1293         return;
   1294     }
   1295 
   1296     /* error coming in */
   1297     errorCode=U_MISPLACED_QUANTIFIER;
   1298     buffer[0]=5;
   1299     iter.index=1;
   1300     length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
   1301                       UNORM_NFD, 0, TRUE, NULL,
   1302                       &errorCode);
   1303     if(errorCode!=U_MISPLACED_QUANTIFIER) {
   1304         log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
   1305         return;
   1306     }
   1307 }
   1308 
   1309 static void
   1310 TestFCNFKCClosure(void) {
   1311     static const struct {
   1312         UChar32 c;
   1313         const UChar s[6];
   1314     } tests[]={
   1315         { 0x00C4, { 0 } },
   1316         { 0x00E4, { 0 } },
   1317         { 0x037A, { 0x0020, 0x03B9, 0 } },
   1318         { 0x03D2, { 0x03C5, 0 } },
   1319         { 0x20A8, { 0x0072, 0x0073, 0 } },
   1320         { 0x210B, { 0x0068, 0 } },
   1321         { 0x210C, { 0x0068, 0 } },
   1322         { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
   1323         { 0x2122, { 0x0074, 0x006D, 0 } },
   1324         { 0x2128, { 0x007A, 0 } },
   1325         { 0x1D5DB, { 0x0068, 0 } },
   1326         { 0x1D5ED, { 0x007A, 0 } },
   1327         { 0x0061, { 0 } }
   1328     };
   1329 
   1330     UChar buffer[8];
   1331     UErrorCode errorCode;
   1332     int32_t i, length;
   1333 
   1334     for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
   1335         errorCode=U_ZERO_ERROR;
   1336         length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
   1337         if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
   1338             log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
   1339         }
   1340     }
   1341 
   1342     /* error handling */
   1343     errorCode=U_ZERO_ERROR;
   1344     length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
   1345     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
   1346         log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
   1347     }
   1348 
   1349     length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
   1350     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
   1351         log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
   1352     }
   1353 }
   1354 
   1355 static void
   1356 TestQuickCheckPerCP() {
   1357     UErrorCode errorCode;
   1358     UChar32 c, lead, trail;
   1359     UChar s[U16_MAX_LENGTH], nfd[16];
   1360     int32_t length, lccc1, lccc2, tccc1, tccc2;
   1361     int32_t qc1, qc2;
   1362 
   1363     if(
   1364         u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
   1365         u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
   1366         u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
   1367         u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
   1368         u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
   1369         u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
   1370     ) {
   1371         log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
   1372     }
   1373 
   1374     /*
   1375      * compare the quick check property values for some code points
   1376      * to the quick check results for checking same-code point strings
   1377      */
   1378     errorCode=U_ZERO_ERROR;
   1379     c=0;
   1380     while(c<0x110000) {
   1381         length=0;
   1382         U16_APPEND_UNSAFE(s, length, c);
   1383 
   1384         qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
   1385         qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
   1386         if(qc1!=qc2) {
   1387             log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
   1388         }
   1389 
   1390         qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
   1391         qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
   1392         if(qc1!=qc2) {
   1393             log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
   1394         }
   1395 
   1396         qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
   1397         qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
   1398         if(qc1!=qc2) {
   1399             log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
   1400         }
   1401 
   1402         qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
   1403         qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
   1404         if(qc1!=qc2) {
   1405             log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
   1406         }
   1407 
   1408         length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
   1409         /* length-length == 0 is used to get around a compiler warning. */
   1410         U16_GET(nfd, 0, length-length, length, lead);
   1411         U16_GET(nfd, 0, length-1, length, trail);
   1412 
   1413         lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
   1414         lccc2=u_getCombiningClass(lead);
   1415         tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
   1416         tccc2=u_getCombiningClass(trail);
   1417 
   1418         if(lccc1!=lccc2) {
   1419             log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
   1420                     lccc1, lccc2, c);
   1421         }
   1422         if(tccc1!=tccc2) {
   1423             log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
   1424                     tccc1, tccc2, c);
   1425         }
   1426 
   1427         /* skip some code points */
   1428         c=(20*c)/19+1;
   1429     }
   1430 }
   1431 
   1432 static void
   1433 TestComposition(void) {
   1434     static const struct {
   1435         UNormalizationMode mode;
   1436         uint32_t options;
   1437         UChar input[12];
   1438         UChar expect[12];
   1439     } cases[]={
   1440         /*
   1441          * special cases for UAX #15 bug
   1442          * see Unicode Corrigendum #5: Normalization Idempotency
   1443          * at http://unicode.org/versions/corrigendum5.html
   1444          * (was Public Review Issue #29)
   1445          */
   1446         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
   1447         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
   1448         { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
   1449         { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
   1450 
   1451         /* TODO: add test cases for UNORM_FCC here (j2151) */
   1452     };
   1453 
   1454     UChar output[16];
   1455     UErrorCode errorCode;
   1456     int32_t i, length;
   1457 
   1458     for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
   1459         errorCode=U_ZERO_ERROR;
   1460         length=unorm_normalize(
   1461                     cases[i].input, -1,
   1462                     cases[i].mode, cases[i].options,
   1463                     output, UPRV_LENGTHOF(output),
   1464                     &errorCode);
   1465         if( U_FAILURE(errorCode) ||
   1466             length!=u_strlen(cases[i].expect) ||
   1467             0!=u_memcmp(output, cases[i].expect, length)
   1468         ) {
   1469             log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
   1470         }
   1471     }
   1472 }
   1473 
   1474 static void
   1475 TestGetDecomposition() {
   1476     UChar decomp[32];
   1477     int32_t length;
   1478 
   1479     UErrorCode errorCode=U_ZERO_ERROR;
   1480     const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
   1481     if(U_FAILURE(errorCode)) {
   1482         log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
   1483         return;
   1484     }
   1485 
   1486     length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
   1487     if(U_FAILURE(errorCode) || length>=0) {
   1488         log_err("unorm2_getDecomposition(fcc, space) failed\n");
   1489     }
   1490     errorCode=U_ZERO_ERROR;
   1491     length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
   1492     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
   1493         log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
   1494     }
   1495     errorCode=U_ZERO_ERROR;
   1496     length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
   1497     if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
   1498         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
   1499     }
   1500     errorCode=U_ZERO_ERROR;
   1501     length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
   1502     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
   1503         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
   1504     }
   1505     errorCode=U_ZERO_ERROR;
   1506     length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
   1507     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
   1508         log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
   1509     }
   1510     errorCode=U_ZERO_ERROR;
   1511     length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
   1512     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
   1513         log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
   1514     }
   1515 }
   1516 
   1517 static void
   1518 TestGetRawDecomposition() {
   1519     UChar decomp[32];
   1520     int32_t length;
   1521 
   1522     UErrorCode errorCode=U_ZERO_ERROR;
   1523     const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
   1524     if(U_FAILURE(errorCode)) {
   1525         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
   1526         return;
   1527     }
   1528     /*
   1529      * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
   1530      * without recursive decomposition.
   1531      */
   1532 
   1533     length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
   1534     if(U_FAILURE(errorCode) || length>=0) {
   1535         log_err("unorm2_getDecomposition(nfkc, space) failed\n");
   1536     }
   1537     errorCode=U_ZERO_ERROR;
   1538     length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
   1539     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
   1540         log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
   1541     }
   1542     /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
   1543     errorCode=U_ZERO_ERROR;
   1544     length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
   1545     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
   1546         log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
   1547     }
   1548     /* U+212B ANGSTROM SIGN */
   1549     errorCode=U_ZERO_ERROR;
   1550     length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
   1551     if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
   1552         log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
   1553     }
   1554     errorCode=U_ZERO_ERROR;
   1555     length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
   1556     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
   1557         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
   1558     }
   1559     /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
   1560     errorCode=U_ZERO_ERROR;
   1561     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
   1562     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
   1563         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
   1564     }
   1565     errorCode=U_ZERO_ERROR;
   1566     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
   1567     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
   1568         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
   1569     }
   1570     errorCode=U_ZERO_ERROR;
   1571     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
   1572     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
   1573         log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
   1574     }
   1575     errorCode=U_ZERO_ERROR;
   1576     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
   1577     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
   1578         log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
   1579     }
   1580 }
   1581 
   1582 static void
   1583 TestAppendRestoreMiddle() {
   1584     UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
   1585     static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
   1586     /* NFC: C5 is 'A with ring above' */
   1587     static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
   1588     int32_t length;
   1589     UErrorCode errorCode=U_ZERO_ERROR;
   1590     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
   1591     if(U_FAILURE(errorCode)) {
   1592         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
   1593         return;
   1594     }
   1595     /*
   1596      * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
   1597      * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
   1598      * still fits into a[] but the full result still overflows this capacity.
   1599      * (Let it modify the destination buffer before reallocating internally.)
   1600      */
   1601     length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
   1602     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
   1603         log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
   1604         return;
   1605     }
   1606     /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
   1607     if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
   1608         log_err("unorm2_append(overflow) modified the first string\n");
   1609         return;
   1610     }
   1611     errorCode=U_ZERO_ERROR;
   1612     length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
   1613     if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
   1614         log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
   1615         return;
   1616     }
   1617 }
   1618 
   1619 static void
   1620 TestGetEasyToUseInstance() {
   1621     static const UChar in[]={
   1622         0xA0,  /* -> <noBreak> 0020 */
   1623         0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
   1624     };
   1625     UChar out[32];
   1626     int32_t length;
   1627 
   1628     UErrorCode errorCode=U_ZERO_ERROR;
   1629     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
   1630     if(U_FAILURE(errorCode)) {
   1631         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
   1632         return;
   1633     }
   1634     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
   1635     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
   1636         log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
   1637                 (int)length, u_errorName(errorCode));
   1638     }
   1639 
   1640     errorCode=U_ZERO_ERROR;
   1641     n2=unorm2_getNFDInstance(&errorCode);
   1642     if(U_FAILURE(errorCode)) {
   1643         log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
   1644         return;
   1645     }
   1646     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
   1647     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
   1648         log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
   1649                 (int)length, u_errorName(errorCode));
   1650     }
   1651 
   1652     errorCode=U_ZERO_ERROR;
   1653     n2=unorm2_getNFKCInstance(&errorCode);
   1654     if(U_FAILURE(errorCode)) {
   1655         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
   1656         return;
   1657     }
   1658     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
   1659     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
   1660         log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
   1661                 (int)length, u_errorName(errorCode));
   1662     }
   1663 
   1664     errorCode=U_ZERO_ERROR;
   1665     n2=unorm2_getNFKDInstance(&errorCode);
   1666     if(U_FAILURE(errorCode)) {
   1667         log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
   1668         return;
   1669     }
   1670     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
   1671     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
   1672         log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
   1673                 (int)length, u_errorName(errorCode));
   1674     }
   1675 
   1676     errorCode=U_ZERO_ERROR;
   1677     n2=unorm2_getNFKCCasefoldInstance(&errorCode);
   1678     if(U_FAILURE(errorCode)) {
   1679         log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
   1680         return;
   1681     }
   1682     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
   1683     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
   1684         log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
   1685                 (int)length, u_errorName(errorCode));
   1686     }
   1687 }
   1688 
   1689 #endif /* #if !UCONFIG_NO_NORMALIZATION */
   1690