Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2010, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /********************************************************************************
      7 *
      8 * File CITERTST.C
      9 *
     10 * Modification History:
     11 * Date      Name               Description
     12 *           Madhu Katragadda   Ported for C API
     13 * 02/19/01  synwee             Modified test case for new collation iterator
     14 *********************************************************************************/
     15 /*
     16  * Collation Iterator tests.
     17  * (Let me reiterate my position...)
     18  */
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_COLLATION
     23 
     24 #include "unicode/ucol.h"
     25 #include "unicode/uloc.h"
     26 #include "unicode/uchar.h"
     27 #include "unicode/ustring.h"
     28 #include "unicode/putil.h"
     29 #include "callcoll.h"
     30 #include "cmemory.h"
     31 #include "cintltst.h"
     32 #include "citertst.h"
     33 #include "ccolltst.h"
     34 #include "filestrm.h"
     35 #include "cstring.h"
     36 #include "ucol_imp.h"
     37 #include "ucol_tok.h"
     38 #include "uparse.h"
     39 #include <stdio.h>
     40 
     41 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
     42 
     43 void addCollIterTest(TestNode** root)
     44 {
     45     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
     46     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
     47     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
     48     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
     49     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
     50     addTest(root, &TestNormalizedUnicodeChar,
     51                                 "tscoll/citertst/TestNormalizedUnicodeChar");
     52     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
     53     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
     54     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
     55     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
     56     addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
     57     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
     58     addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
     59     addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
     60     addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
     61 }
     62 
     63 /* The locales we support */
     64 
     65 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
     66 
     67 static void TestBug672() {
     68     UErrorCode  status = U_ZERO_ERROR;
     69     UChar       pattern[20];
     70     UChar       text[50];
     71     int         i;
     72     int         result[3][3];
     73 
     74     u_uastrcpy(pattern, "resume");
     75     u_uastrcpy(text, "Time to resume updating my resume.");
     76 
     77     for (i = 0; i < 3; ++ i) {
     78         UCollator          *coll = ucol_open(LOCALES[i], &status);
     79         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
     80                                                      &status);
     81         UCollationElements *titer = ucol_openElements(coll, text, -1,
     82                                                      &status);
     83         if (U_FAILURE(status)) {
     84             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
     85                     myErrorName(status));
     86             return;
     87         }
     88 
     89         log_verbose("locale tested %s\n", LOCALES[i]);
     90 
     91         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
     92                U_SUCCESS(status)) {
     93         }
     94         if (U_FAILURE(status)) {
     95             log_err("ERROR: reversing collation iterator :%s\n",
     96                     myErrorName(status));
     97             return;
     98         }
     99         ucol_reset(pitr);
    100 
    101         ucol_setOffset(titer, u_strlen(pattern), &status);
    102         if (U_FAILURE(status)) {
    103             log_err("ERROR: setting offset in collator :%s\n",
    104                     myErrorName(status));
    105             return;
    106         }
    107         result[i][0] = ucol_getOffset(titer);
    108         log_verbose("Text iterator set to offset %d\n", result[i][0]);
    109 
    110         /* Use previous() */
    111         ucol_previous(titer, &status);
    112         result[i][1] = ucol_getOffset(titer);
    113         log_verbose("Current offset %d after previous\n", result[i][1]);
    114 
    115         /* Add one to index */
    116         log_verbose("Adding one to current offset...\n");
    117         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
    118         if (U_FAILURE(status)) {
    119             log_err("ERROR: setting offset in collator :%s\n",
    120                     myErrorName(status));
    121             return;
    122         }
    123         result[i][2] = ucol_getOffset(titer);
    124         log_verbose("Current offset in text = %d\n", result[i][2]);
    125         ucol_closeElements(pitr);
    126         ucol_closeElements(titer);
    127         ucol_close(coll);
    128     }
    129 
    130     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
    131         uprv_memcmp(result[1], result[2], 3) != 0) {
    132         log_err("ERROR: Different locales have different offsets at the same character\n");
    133     }
    134 }
    135 
    136 
    137 
    138 /*  Running this test with normalization enabled showed up a bug in the incremental
    139     normalization code. */
    140 static void TestBug672Normalize() {
    141     UErrorCode  status = U_ZERO_ERROR;
    142     UChar       pattern[20];
    143     UChar       text[50];
    144     int         i;
    145     int         result[3][3];
    146 
    147     u_uastrcpy(pattern, "resume");
    148     u_uastrcpy(text, "Time to resume updating my resume.");
    149 
    150     for (i = 0; i < 3; ++ i) {
    151         UCollator          *coll = ucol_open(LOCALES[i], &status);
    152         UCollationElements *pitr = NULL;
    153         UCollationElements *titer = NULL;
    154 
    155         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    156 
    157         pitr = ucol_openElements(coll, pattern, -1, &status);
    158         titer = ucol_openElements(coll, text, -1, &status);
    159         if (U_FAILURE(status)) {
    160             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
    161                     myErrorName(status));
    162             return;
    163         }
    164 
    165         log_verbose("locale tested %s\n", LOCALES[i]);
    166 
    167         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
    168                U_SUCCESS(status)) {
    169         }
    170         if (U_FAILURE(status)) {
    171             log_err("ERROR: reversing collation iterator :%s\n",
    172                     myErrorName(status));
    173             return;
    174         }
    175         ucol_reset(pitr);
    176 
    177         ucol_setOffset(titer, u_strlen(pattern), &status);
    178         if (U_FAILURE(status)) {
    179             log_err("ERROR: setting offset in collator :%s\n",
    180                     myErrorName(status));
    181             return;
    182         }
    183         result[i][0] = ucol_getOffset(titer);
    184         log_verbose("Text iterator set to offset %d\n", result[i][0]);
    185 
    186         /* Use previous() */
    187         ucol_previous(titer, &status);
    188         result[i][1] = ucol_getOffset(titer);
    189         log_verbose("Current offset %d after previous\n", result[i][1]);
    190 
    191         /* Add one to index */
    192         log_verbose("Adding one to current offset...\n");
    193         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
    194         if (U_FAILURE(status)) {
    195             log_err("ERROR: setting offset in collator :%s\n",
    196                     myErrorName(status));
    197             return;
    198         }
    199         result[i][2] = ucol_getOffset(titer);
    200         log_verbose("Current offset in text = %d\n", result[i][2]);
    201         ucol_closeElements(pitr);
    202         ucol_closeElements(titer);
    203         ucol_close(coll);
    204     }
    205 
    206     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
    207         uprv_memcmp(result[1], result[2], 3) != 0) {
    208         log_err("ERROR: Different locales have different offsets at the same character\n");
    209     }
    210 }
    211 
    212 
    213 
    214 
    215 /**
    216  * Test for CollationElementIterator previous and next for the whole set of
    217  * unicode characters.
    218  */
    219 static void TestUnicodeChar()
    220 {
    221     UChar source[0x100];
    222     UCollator *en_us;
    223     UCollationElements *iter;
    224     UErrorCode status = U_ZERO_ERROR;
    225     UChar codepoint;
    226 
    227     UChar *test;
    228     en_us = ucol_open("en_US", &status);
    229     if (U_FAILURE(status)){
    230        log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
    231               myErrorName(status));
    232        return;
    233     }
    234 
    235     for (codepoint = 1; codepoint < 0xFFFE;)
    236     {
    237       test = source;
    238 
    239       while (codepoint % 0xFF != 0)
    240       {
    241         if (u_isdefined(codepoint))
    242           *(test ++) = codepoint;
    243         codepoint ++;
    244       }
    245 
    246       if (u_isdefined(codepoint))
    247         *(test ++) = codepoint;
    248 
    249       if (codepoint != 0xFFFF)
    250         codepoint ++;
    251 
    252       *test = 0;
    253       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
    254       if(U_FAILURE(status)){
    255           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    256               myErrorName(status));
    257           ucol_close(en_us);
    258           return;
    259       }
    260       /* A basic test to see if it's working at all */
    261       log_verbose("codepoint testing %x\n", codepoint);
    262       backAndForth(iter);
    263       ucol_closeElements(iter);
    264 
    265       /* null termination test */
    266       iter=ucol_openElements(en_us, source, -1, &status);
    267       if(U_FAILURE(status)){
    268           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    269               myErrorName(status));
    270           ucol_close(en_us);
    271           return;
    272       }
    273       /* A basic test to see if it's working at all */
    274       backAndForth(iter);
    275       ucol_closeElements(iter);
    276     }
    277 
    278     ucol_close(en_us);
    279 }
    280 
    281 /**
    282  * Test for CollationElementIterator previous and next for the whole set of
    283  * unicode characters with normalization on.
    284  */
    285 static void TestNormalizedUnicodeChar()
    286 {
    287     UChar source[0x100];
    288     UCollator *th_th;
    289     UCollationElements *iter;
    290     UErrorCode status = U_ZERO_ERROR;
    291     UChar codepoint;
    292 
    293     UChar *test;
    294     /* thai should have normalization on */
    295     th_th = ucol_open("th_TH", &status);
    296     if (U_FAILURE(status)){
    297         log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
    298               myErrorName(status));
    299         return;
    300     }
    301 
    302     for (codepoint = 1; codepoint < 0xFFFE;)
    303     {
    304       test = source;
    305 
    306       while (codepoint % 0xFF != 0)
    307       {
    308         if (u_isdefined(codepoint))
    309           *(test ++) = codepoint;
    310         codepoint ++;
    311       }
    312 
    313       if (u_isdefined(codepoint))
    314         *(test ++) = codepoint;
    315 
    316       if (codepoint != 0xFFFF)
    317         codepoint ++;
    318 
    319       *test = 0;
    320       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
    321       if(U_FAILURE(status)){
    322           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    323               myErrorName(status));
    324             ucol_close(th_th);
    325           return;
    326       }
    327 
    328       backAndForth(iter);
    329       ucol_closeElements(iter);
    330 
    331       iter=ucol_openElements(th_th, source, -1, &status);
    332       if(U_FAILURE(status)){
    333           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    334               myErrorName(status));
    335             ucol_close(th_th);
    336           return;
    337       }
    338 
    339       backAndForth(iter);
    340       ucol_closeElements(iter);
    341     }
    342 
    343     ucol_close(th_th);
    344 }
    345 
    346 /**
    347 * Test the incremental normalization
    348 */
    349 static void TestNormalization()
    350 {
    351           UErrorCode          status = U_ZERO_ERROR;
    352     const char               *str    =
    353                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
    354           UCollator          *coll;
    355           UChar               rule[50];
    356           int                 rulelen = u_unescape(str, rule, 50);
    357           int                 count = 0;
    358     const char                *testdata[] =
    359                         {"\\u1ED9", "o\\u0323\\u0302",
    360                         "\\u0300\\u0315", "\\u0315\\u0300",
    361                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
    362                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
    363                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
    364                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
    365                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
    366     int32_t   srclen;
    367     UChar source[10];
    368     UCollationElements *iter;
    369 
    370     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
    371     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    372     if (U_FAILURE(status)){
    373         log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
    374               myErrorName(status));
    375         return;
    376     }
    377 
    378     srclen = u_unescape(testdata[0], source, 10);
    379     iter = ucol_openElements(coll, source, srclen, &status);
    380     backAndForth(iter);
    381     ucol_closeElements(iter);
    382 
    383     srclen = u_unescape(testdata[1], source, 10);
    384     iter = ucol_openElements(coll, source, srclen, &status);
    385     backAndForth(iter);
    386     ucol_closeElements(iter);
    387 
    388     while (count < 12) {
    389         srclen = u_unescape(testdata[count], source, 10);
    390         iter = ucol_openElements(coll, source, srclen, &status);
    391 
    392         if (U_FAILURE(status)){
    393             log_err("ERROR: in creation of collator element iterator\n %s\n",
    394                   myErrorName(status));
    395             return;
    396         }
    397         backAndForth(iter);
    398         ucol_closeElements(iter);
    399 
    400         iter = ucol_openElements(coll, source, -1, &status);
    401 
    402         if (U_FAILURE(status)){
    403             log_err("ERROR: in creation of collator element iterator\n %s\n",
    404                   myErrorName(status));
    405             return;
    406         }
    407         backAndForth(iter);
    408         ucol_closeElements(iter);
    409         count ++;
    410     }
    411     ucol_close(coll);
    412 }
    413 
    414 /**
    415  * Test for CollationElementIterator.previous()
    416  *
    417  * @bug 4108758 - Make sure it works with contracting characters
    418  *
    419  */
    420 static void TestPrevious()
    421 {
    422     UCollator *coll=NULL;
    423     UChar rule[50];
    424     UChar *source;
    425     UCollator *c1, *c2, *c3;
    426     UCollationElements *iter;
    427     UErrorCode status = U_ZERO_ERROR;
    428     UChar test1[50];
    429     UChar test2[50];
    430 
    431     u_uastrcpy(test1, "What subset of all possible test cases?");
    432     u_uastrcpy(test2, "has the highest probability of detecting");
    433     coll = ucol_open("en_US", &status);
    434 
    435     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
    436     log_verbose("English locale testing back and forth\n");
    437     if(U_FAILURE(status)){
    438         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    439             myErrorName(status));
    440         ucol_close(coll);
    441         return;
    442     }
    443     /* A basic test to see if it's working at all */
    444     backAndForth(iter);
    445     ucol_closeElements(iter);
    446     ucol_close(coll);
    447 
    448     /* Test with a contracting character sequence */
    449     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
    450     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    451 
    452     log_verbose("Contraction rule testing back and forth with no normalization\n");
    453 
    454     if (c1 == NULL || U_FAILURE(status))
    455     {
    456         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
    457             myErrorName(status));
    458         return;
    459     }
    460     source=(UChar*)malloc(sizeof(UChar) * 20);
    461     u_uastrcpy(source, "abchdcba");
    462     iter=ucol_openElements(c1, source, u_strlen(source), &status);
    463     if(U_FAILURE(status)){
    464         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    465             myErrorName(status));
    466         return;
    467     }
    468     backAndForth(iter);
    469     ucol_closeElements(iter);
    470     ucol_close(c1);
    471 
    472     /* Test with an expanding character sequence */
    473     u_uastrcpy(rule, "&a < b < c/abd < d");
    474     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    475     log_verbose("Expansion rule testing back and forth with no normalization\n");
    476     if (c2 == NULL || U_FAILURE(status))
    477     {
    478         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
    479             myErrorName(status));
    480         return;
    481     }
    482     u_uastrcpy(source, "abcd");
    483     iter=ucol_openElements(c2, source, u_strlen(source), &status);
    484     if(U_FAILURE(status)){
    485         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    486             myErrorName(status));
    487         return;
    488     }
    489     backAndForth(iter);
    490     ucol_closeElements(iter);
    491     ucol_close(c2);
    492     /* Now try both */
    493     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
    494     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
    495     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
    496 
    497     if (c3 == NULL || U_FAILURE(status))
    498     {
    499         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
    500             myErrorName(status));
    501         return;
    502     }
    503     u_uastrcpy(source, "abcdbchdc");
    504     iter=ucol_openElements(c3, source, u_strlen(source), &status);
    505     if(U_FAILURE(status)){
    506         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    507             myErrorName(status));
    508         return;
    509     }
    510     backAndForth(iter);
    511     ucol_closeElements(iter);
    512     ucol_close(c3);
    513     source[0] = 0x0e41;
    514     source[1] = 0x0e02;
    515     source[2] = 0x0e41;
    516     source[3] = 0x0e02;
    517     source[4] = 0x0e27;
    518     source[5] = 0x61;
    519     source[6] = 0x62;
    520     source[7] = 0x63;
    521     source[8] = 0;
    522 
    523     coll = ucol_open("th_TH", &status);
    524     log_verbose("Thai locale testing back and forth with normalization\n");
    525     iter=ucol_openElements(coll, source, u_strlen(source), &status);
    526     if(U_FAILURE(status)){
    527         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    528             myErrorName(status));
    529         return;
    530     }
    531     backAndForth(iter);
    532     ucol_closeElements(iter);
    533     ucol_close(coll);
    534 
    535     /* prev test */
    536     source[0] = 0x0061;
    537     source[1] = 0x30CF;
    538     source[2] = 0x3099;
    539     source[3] = 0x30FC;
    540     source[4] = 0;
    541 
    542     coll = ucol_open("ja_JP", &status);
    543     log_verbose("Japanese locale testing back and forth with normalization\n");
    544     iter=ucol_openElements(coll, source, u_strlen(source), &status);
    545     if(U_FAILURE(status)){
    546         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    547             myErrorName(status));
    548         return;
    549     }
    550     backAndForth(iter);
    551     ucol_closeElements(iter);
    552     ucol_close(coll);
    553 
    554     free(source);
    555 }
    556 
    557 /**
    558  * Test for getOffset() and setOffset()
    559  */
    560 static void TestOffset()
    561 {
    562     UErrorCode status= U_ZERO_ERROR;
    563     UCollator *en_us=NULL;
    564     UCollationElements *iter, *pristine;
    565     int32_t offset;
    566     OrderAndOffset *orders;
    567     int32_t orderLength=0;
    568     int     count = 0;
    569     UChar test1[50];
    570     UChar test2[50];
    571 
    572     u_uastrcpy(test1, "What subset of all possible test cases?");
    573     u_uastrcpy(test2, "has the highest probability of detecting");
    574     en_us = ucol_open("en_US", &status);
    575     log_verbose("Testing getOffset and setOffset for collations\n");
    576     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
    577     if(U_FAILURE(status)){
    578         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    579             myErrorName(status));
    580         ucol_close(en_us);
    581         return;
    582     }
    583 
    584     /* testing boundaries */
    585     ucol_setOffset(iter, 0, &status);
    586     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
    587         log_err("Error: After setting offset to 0, we should be at the end "
    588                 "of the backwards iteration");
    589     }
    590     ucol_setOffset(iter, u_strlen(test1), &status);
    591     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
    592         log_err("Error: After setting offset to end of the string, we should "
    593                 "be at the end of the backwards iteration");
    594     }
    595 
    596     /* Run all the way through the iterator, then get the offset */
    597 
    598     orders = getOrders(iter, &orderLength);
    599 
    600     offset = ucol_getOffset(iter);
    601 
    602     if (offset != u_strlen(test1))
    603     {
    604         log_err("offset at end != length %d vs %d\n", offset,
    605             u_strlen(test1) );
    606     }
    607 
    608     /* Now set the offset back to the beginning and see if it works */
    609     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
    610     if(U_FAILURE(status)){
    611         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    612             myErrorName(status));
    613     ucol_close(en_us);
    614         return;
    615     }
    616     status = U_ZERO_ERROR;
    617 
    618     ucol_setOffset(iter, 0, &status);
    619     if (U_FAILURE(status))
    620     {
    621         log_err("setOffset failed. %s\n",    myErrorName(status));
    622     }
    623     else
    624     {
    625         assertEqual(iter, pristine);
    626     }
    627 
    628     ucol_closeElements(pristine);
    629     ucol_closeElements(iter);
    630     free(orders);
    631 
    632     /* testing offsets in normalization buffer */
    633     test1[0] = 0x61;
    634     test1[1] = 0x300;
    635     test1[2] = 0x316;
    636     test1[3] = 0x62;
    637     test1[4] = 0;
    638     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    639     iter = ucol_openElements(en_us, test1, 4, &status);
    640     if(U_FAILURE(status)){
    641         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    642             myErrorName(status));
    643         ucol_close(en_us);
    644         return;
    645     }
    646 
    647     count = 0;
    648     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
    649         U_SUCCESS(status)) {
    650         switch (count) {
    651         case 0:
    652             if (ucol_getOffset(iter) != 1) {
    653                 log_err("ERROR: Offset of iteration should be 1\n");
    654             }
    655             break;
    656         case 3:
    657             if (ucol_getOffset(iter) != 4) {
    658                 log_err("ERROR: Offset of iteration should be 4\n");
    659             }
    660             break;
    661         default:
    662             if (ucol_getOffset(iter) != 3) {
    663                 log_err("ERROR: Offset of iteration should be 3\n");
    664             }
    665         }
    666         count ++;
    667     }
    668 
    669     ucol_reset(iter);
    670     count = 0;
    671     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
    672         U_SUCCESS(status)) {
    673         switch (count) {
    674         case 0:
    675         case 1:
    676             if (ucol_getOffset(iter) != 3) {
    677                 log_err("ERROR: Offset of iteration should be 3\n");
    678             }
    679             break;
    680         case 2:
    681             if (ucol_getOffset(iter) != 1) {
    682                 log_err("ERROR: Offset of iteration should be 1\n");
    683             }
    684             break;
    685         default:
    686             if (ucol_getOffset(iter) != 0) {
    687                 log_err("ERROR: Offset of iteration should be 0\n");
    688             }
    689         }
    690         count ++;
    691     }
    692 
    693     if(U_FAILURE(status)){
    694         log_err("ERROR: in iterating collation elements %s\n",
    695             myErrorName(status));
    696     }
    697 
    698     ucol_closeElements(iter);
    699     ucol_close(en_us);
    700 }
    701 
    702 /**
    703  * Test for setText()
    704  */
    705 static void TestSetText()
    706 {
    707     int32_t c,i;
    708     UErrorCode status = U_ZERO_ERROR;
    709     UCollator *en_us=NULL;
    710     UCollationElements *iter1, *iter2;
    711     UChar test1[50];
    712     UChar test2[50];
    713 
    714     u_uastrcpy(test1, "What subset of all possible test cases?");
    715     u_uastrcpy(test2, "has the highest probability of detecting");
    716     en_us = ucol_open("en_US", &status);
    717     log_verbose("testing setText for Collation elements\n");
    718     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
    719     if(U_FAILURE(status)){
    720         log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
    721             myErrorName(status));
    722     ucol_close(en_us);
    723         return;
    724     }
    725     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
    726     if(U_FAILURE(status)){
    727         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
    728             myErrorName(status));
    729     ucol_close(en_us);
    730         return;
    731     }
    732 
    733     /* Run through the second iterator just to exercise it */
    734     c = ucol_next(iter2, &status);
    735     i = 0;
    736 
    737     while ( ++i < 10 && (c != UCOL_NULLORDER))
    738     {
    739         if (U_FAILURE(status))
    740         {
    741             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
    742             ucol_closeElements(iter2);
    743             ucol_closeElements(iter1);
    744     ucol_close(en_us);
    745             return;
    746         }
    747 
    748         c = ucol_next(iter2, &status);
    749     }
    750 
    751     /* Now set it to point to the same string as the first iterator */
    752     ucol_setText(iter2, test1, u_strlen(test1), &status);
    753     if (U_FAILURE(status))
    754     {
    755         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
    756     }
    757     else
    758     {
    759         assertEqual(iter1, iter2);
    760     }
    761 
    762     /* Now set it to point to a null string with fake length*/
    763     ucol_setText(iter2, NULL, 2, &status);
    764     if (U_FAILURE(status))
    765     {
    766         log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
    767     }
    768     else
    769     {
    770         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
    771             log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
    772         }
    773     }
    774 
    775     ucol_closeElements(iter2);
    776     ucol_closeElements(iter1);
    777     ucol_close(en_us);
    778 }
    779 
    780 /** @bug 4108762
    781  * Test for getMaxExpansion()
    782  */
    783 static void TestMaxExpansion()
    784 {
    785     UErrorCode          status = U_ZERO_ERROR;
    786     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
    787     UChar               ch     = 0;
    788     UChar32             unassigned = 0xEFFFD;
    789     UChar               supplementary[2];
    790     uint32_t            stringOffset = 0;
    791     UBool               isError = FALSE;
    792     uint32_t            sorder = 0;
    793     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
    794     uint32_t            temporder = 0;
    795 
    796     UChar rule[256];
    797     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
    798     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
    799         UCOL_DEFAULT_STRENGTH,NULL, &status);
    800     if(U_SUCCESS(status) && coll) {
    801       iter = ucol_openElements(coll, &ch, 1, &status);
    802 
    803       while (ch < 0xFFFF && U_SUCCESS(status)) {
    804           int      count = 1;
    805           uint32_t order;
    806           int32_t  size = 0;
    807 
    808           ch ++;
    809 
    810           ucol_setText(iter, &ch, 1, &status);
    811           order = ucol_previous(iter, &status);
    812 
    813           /* thai management */
    814           if (order == 0)
    815               order = ucol_previous(iter, &status);
    816 
    817           while (U_SUCCESS(status) &&
    818               ucol_previous(iter, &status) != UCOL_NULLORDER) {
    819               count ++;
    820           }
    821 
    822           size = ucol_getMaxExpansion(iter, order);
    823           if (U_FAILURE(status) || size < count) {
    824               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    825                   ch, count);
    826           }
    827       }
    828 
    829       /* testing for exact max expansion */
    830       ch = 0;
    831       while (ch < 0x61) {
    832           uint32_t order;
    833           int32_t  size;
    834           ucol_setText(iter, &ch, 1, &status);
    835           order = ucol_previous(iter, &status);
    836           size  = ucol_getMaxExpansion(iter, order);
    837           if (U_FAILURE(status) || size != 1) {
    838               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    839                   ch, 1);
    840           }
    841           ch ++;
    842       }
    843 
    844       ch = 0x63;
    845       ucol_setText(iter, &ch, 1, &status);
    846       temporder = ucol_previous(iter, &status);
    847 
    848       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
    849           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
    850                   ch, 3);
    851       }
    852 
    853       ch = 0x64;
    854       ucol_setText(iter, &ch, 1, &status);
    855       temporder = ucol_previous(iter, &status);
    856 
    857       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
    858           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
    859                   ch, 3);
    860       }
    861 
    862       U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
    863       ucol_setText(iter, supplementary, 2, &status);
    864       sorder = ucol_previous(iter, &status);
    865 
    866       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
    867           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    868                   ch, 2);
    869       }
    870 
    871       /* testing jamo */
    872       ch = 0x1165;
    873 
    874       ucol_setText(iter, &ch, 1, &status);
    875       temporder = ucol_previous(iter, &status);
    876       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
    877           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
    878                   ch, 3);
    879       }
    880 
    881       ucol_closeElements(iter);
    882       ucol_close(coll);
    883 
    884       /* testing special jamo &a<\u1160 */
    885       rule[0] = 0x26;
    886       rule[1] = 0x71;
    887       rule[2] = 0x3c;
    888       rule[3] = 0x1165;
    889       rule[4] = 0x2f;
    890       rule[5] = 0x71;
    891       rule[6] = 0x71;
    892       rule[7] = 0x71;
    893       rule[8] = 0x71;
    894       rule[9] = 0;
    895 
    896       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
    897           UCOL_DEFAULT_STRENGTH,NULL, &status);
    898       iter = ucol_openElements(coll, &ch, 1, &status);
    899 
    900       temporder = ucol_previous(iter, &status);
    901       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
    902           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
    903                   ch, 5);
    904       }
    905 
    906       ucol_closeElements(iter);
    907       ucol_close(coll);
    908     } else {
    909       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
    910     }
    911 
    912 }
    913 
    914 
    915 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
    916 {
    917     int32_t c1, c2;
    918     int32_t count = 0;
    919     UErrorCode status = U_ZERO_ERROR;
    920 
    921     do
    922     {
    923         c1 = ucol_next(i1, &status);
    924         c2 = ucol_next(i2, &status);
    925 
    926         if (c1 != c2)
    927         {
    928             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
    929             break;
    930         }
    931 
    932         count += 1;
    933     }
    934     while (c1 != UCOL_NULLORDER);
    935 }
    936 
    937 /**
    938  * Testing iterators with extremely small buffers
    939  */
    940 static void TestSmallBuffer()
    941 {
    942     UErrorCode          status = U_ZERO_ERROR;
    943     UCollator          *coll;
    944     UCollationElements *testiter,
    945                        *iter;
    946     int32_t             count = 0;
    947     OrderAndOffset     *testorders,
    948                        *orders;
    949 
    950     UChar teststr[500];
    951     UChar str[] = {0x300, 0x31A, 0};
    952     /*
    953     creating a long string of decomposable characters,
    954     since by default the writable buffer is of size 256
    955     */
    956     while (count < 500) {
    957         if ((count & 1) == 0) {
    958             teststr[count ++] = 0x300;
    959         }
    960         else {
    961             teststr[count ++] = 0x31A;
    962         }
    963     }
    964 
    965     coll = ucol_open("th_TH", &status);
    966     if(U_SUCCESS(status) && coll) {
    967       testiter = ucol_openElements(coll, teststr, 500, &status);
    968       iter = ucol_openElements(coll, str, 2, &status);
    969 
    970       orders     = getOrders(iter, &count);
    971       if (count != 2) {
    972           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
    973       }
    974 
    975       /*
    976       this will rearrange the string data to 250 characters of 0x300 first then
    977       250 characters of 0x031A
    978       */
    979       testorders = getOrders(testiter, &count);
    980 
    981       if (count != 500) {
    982           log_err("Error decomposition does not give the right sized collation elements\n");
    983       }
    984 
    985       while (count != 0) {
    986           /* UCA collation element for 0x0F76 */
    987           if ((count > 250 && testorders[-- count].order != orders[1].order) ||
    988               (count <= 250 && testorders[-- count].order != orders[0].order)) {
    989               log_err("Error decomposition does not give the right collation element at %d count\n", count);
    990               break;
    991           }
    992       }
    993 
    994       free(testorders);
    995       free(orders);
    996 
    997       ucol_reset(testiter);
    998 
    999       /* ensures closing of elements done properly to clear writable buffer */
   1000       ucol_next(testiter, &status);
   1001       ucol_next(testiter, &status);
   1002       ucol_closeElements(testiter);
   1003       ucol_closeElements(iter);
   1004       ucol_close(coll);
   1005     } else {
   1006       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   1007     }
   1008 }
   1009 
   1010 /**
   1011 * Sniplets of code from genuca
   1012 */
   1013 static int32_t hex2num(char hex) {
   1014     if(hex>='0' && hex <='9') {
   1015         return hex-'0';
   1016     } else if(hex>='a' && hex<='f') {
   1017         return hex-'a'+10;
   1018     } else if(hex>='A' && hex<='F') {
   1019         return hex-'A'+10;
   1020     } else {
   1021         return 0;
   1022     }
   1023 }
   1024 
   1025 /**
   1026 * Getting codepoints from a string
   1027 * @param str character string contain codepoints seperated by space and ended
   1028 *        by a semicolon
   1029 * @param codepoints array for storage, assuming size > 5
   1030 * @return position at the end of the codepoint section
   1031 */
   1032 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
   1033     UErrorCode errorCode = U_ZERO_ERROR;
   1034     char *semi = uprv_strchr(str, ';');
   1035     char *pipe = uprv_strchr(str, '|');
   1036     char *s;
   1037     *codepoints = 0;
   1038     *contextCPs = 0;
   1039     if(semi == NULL) {
   1040         log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
   1041         return str;
   1042     }
   1043     if(pipe != NULL) {
   1044         int32_t contextLength;
   1045         *pipe = 0;
   1046         contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
   1047         *pipe = '|';
   1048         if(U_FAILURE(errorCode)) {
   1049             log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
   1050             return str;
   1051         }
   1052         /* prepend the precontext string to the codepoints */
   1053         u_memcpy(codepoints, contextCPs, contextLength);
   1054         codepoints += contextLength;
   1055         /* start of the code point string */
   1056         s = pipe + 1;
   1057     } else {
   1058         s = str;
   1059     }
   1060     u_parseString(s, codepoints, 99, NULL, &errorCode);
   1061     if(U_FAILURE(errorCode)) {
   1062         log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
   1063         return str;
   1064     }
   1065     return semi + 1;
   1066 }
   1067 
   1068 /**
   1069 * Sniplets of code from genuca
   1070 */
   1071 static int32_t
   1072 readElement(char **from, char *to, char separator, UErrorCode *status)
   1073 {
   1074     if (U_SUCCESS(*status)) {
   1075         char    buffer[1024];
   1076         int32_t i = 0;
   1077         while (**from != separator) {
   1078             if (**from != ' ') {
   1079                 *(buffer+i++) = **from;
   1080             }
   1081             (*from)++;
   1082         }
   1083         (*from)++;
   1084         *(buffer + i) = 0;
   1085         strcpy(to, buffer);
   1086         return i/2;
   1087     }
   1088 
   1089     return 0;
   1090 }
   1091 
   1092 /**
   1093 * Sniplets of code from genuca
   1094 */
   1095 static uint32_t
   1096 getSingleCEValue(char *primary, char *secondary, char *tertiary,
   1097                           UErrorCode *status)
   1098 {
   1099     if (U_SUCCESS(*status)) {
   1100         uint32_t  value    = 0;
   1101         char      primsave = '\0';
   1102         char      secsave  = '\0';
   1103         char      tersave  = '\0';
   1104         char     *primend  = primary+4;
   1105         char     *secend   = secondary+2;
   1106         char     *terend   = tertiary+2;
   1107         uint32_t  primvalue;
   1108         uint32_t  secvalue;
   1109         uint32_t  tervalue;
   1110 
   1111         if (uprv_strlen(primary) > 4) {
   1112             primsave = *primend;
   1113             *primend = '\0';
   1114         }
   1115 
   1116         if (uprv_strlen(secondary) > 2) {
   1117             secsave = *secend;
   1118             *secend = '\0';
   1119         }
   1120 
   1121         if (uprv_strlen(tertiary) > 2) {
   1122             tersave = *terend;
   1123             *terend = '\0';
   1124         }
   1125 
   1126         primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
   1127         secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
   1128         tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
   1129         if(primvalue <= 0xFF) {
   1130           primvalue <<= 8;
   1131         }
   1132 
   1133         value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
   1134            | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
   1135            | (tervalue & UCOL_TERTIARYORDERMASK);
   1136 
   1137         if(primsave!='\0') {
   1138             *primend = primsave;
   1139         }
   1140         if(secsave!='\0') {
   1141             *secend = secsave;
   1142         }
   1143         if(tersave!='\0') {
   1144             *terend = tersave;
   1145         }
   1146         return value;
   1147     }
   1148     return 0;
   1149 }
   1150 
   1151 /**
   1152 * Getting collation elements generated from a string
   1153 * @param str character string contain collation elements contained in [] and
   1154 *        seperated by space
   1155 * @param ce array for storage, assuming size > 20
   1156 * @param status error status
   1157 * @return position at the end of the codepoint section
   1158 */
   1159 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
   1160     char       *pStartCP     = uprv_strchr(str, '[');
   1161     int         count        = 0;
   1162     char       *pEndCP;
   1163     char        primary[100];
   1164     char        secondary[100];
   1165     char        tertiary[100];
   1166 
   1167     while (*pStartCP == '[') {
   1168         uint32_t primarycount   = 0;
   1169         uint32_t secondarycount = 0;
   1170         uint32_t tertiarycount  = 0;
   1171         uint32_t CEi = 1;
   1172         pEndCP = strchr(pStartCP, ']');
   1173         if(pEndCP == NULL) {
   1174             break;
   1175         }
   1176         pStartCP ++;
   1177 
   1178         primarycount   = readElement(&pStartCP, primary, ',', status);
   1179         secondarycount = readElement(&pStartCP, secondary, ',', status);
   1180         tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
   1181 
   1182         /* I want to get the CEs entered right here, including continuation */
   1183         ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
   1184         if (U_FAILURE(*status)) {
   1185             break;
   1186         }
   1187 
   1188         while (2 * CEi < primarycount || CEi < secondarycount ||
   1189                CEi < tertiarycount) {
   1190             uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
   1191             if (2 * CEi < primarycount) {
   1192                 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
   1193                 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
   1194             }
   1195 
   1196             if (2 * CEi + 1 < primarycount) {
   1197                 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
   1198                 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
   1199             }
   1200 
   1201             if (CEi < secondarycount) {
   1202                 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
   1203                 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
   1204             }
   1205 
   1206             if (CEi < tertiarycount) {
   1207                 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
   1208                 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
   1209             }
   1210 
   1211             CEi ++;
   1212             ces[count ++] = value;
   1213         }
   1214 
   1215       pStartCP = pEndCP + 1;
   1216     }
   1217     ces[count] = 0;
   1218     return pStartCP;
   1219 }
   1220 
   1221 /**
   1222 * Getting the FractionalUCA.txt file stream
   1223 */
   1224 static FileStream * getFractionalUCA(void)
   1225 {
   1226     char        newPath[256];
   1227     char        backupPath[256];
   1228     FileStream *result = NULL;
   1229 
   1230     /* Look inside ICU_DATA first */
   1231     uprv_strcpy(newPath, ctest_dataSrcDir());
   1232     uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
   1233     uprv_strcat(newPath, "FractionalUCA.txt");
   1234 
   1235     /* As a fallback, try to guess where the source data was located
   1236      *   at the time ICU was built, and look there.
   1237      */
   1238 #if defined (U_TOPSRCDIR)
   1239     strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
   1240 #else
   1241     {
   1242         UErrorCode errorCode = U_ZERO_ERROR;
   1243         strcpy(backupPath, loadTestData(&errorCode));
   1244         strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
   1245     }
   1246 #endif
   1247     strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
   1248 
   1249     result = T_FileStream_open(newPath, "rb");
   1250 
   1251     if (result == NULL) {
   1252         result = T_FileStream_open(backupPath, "rb");
   1253         if (result == NULL) {
   1254             log_err("Failed to open either %s or %s\n", newPath, backupPath);
   1255         }
   1256     }
   1257     return result;
   1258 }
   1259 
   1260 /**
   1261 * Testing the CEs returned by the iterator
   1262 */
   1263 static void TestCEs() {
   1264     FileStream *file = NULL;
   1265     char        line[2048];
   1266     char       *str;
   1267     UChar       codepoints[10];
   1268     uint32_t    ces[20];
   1269     UErrorCode  status = U_ZERO_ERROR;
   1270     UCollator          *coll = ucol_open("", &status);
   1271     uint32_t lineNo = 0;
   1272     UChar       contextCPs[5];
   1273 
   1274     if (U_FAILURE(status)) {
   1275         log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
   1276         return;
   1277     }
   1278 
   1279     file = getFractionalUCA();
   1280 
   1281     if (file == NULL) {
   1282         log_err("*** unable to open input FractionalUCA.txt file ***\n");
   1283         return;
   1284     }
   1285 
   1286 
   1287     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1288         int                 count = 0;
   1289         UCollationElements *iter;
   1290         int32_t            preContextCeLen=0;
   1291         lineNo++;
   1292         /* skip this line if it is empty or a comment or is a return value
   1293         or start of some variable section */
   1294         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1295             line[0] == 0x000D || line[0] == '[') {
   1296             continue;
   1297         }
   1298 
   1299         str = getCodePoints(line, codepoints, contextCPs);
   1300 
   1301         /* these are 'fake' codepoints in the fractional UCA, and are used just
   1302          * for positioning of indirect values. They should not go through this
   1303          * test.
   1304          */
   1305         if(*codepoints == 0xFDD0) {
   1306           continue;
   1307         }
   1308         if (*contextCPs != 0) {
   1309             iter = ucol_openElements(coll, contextCPs, -1, &status);
   1310             if (U_FAILURE(status)) {
   1311                 log_err("Error in opening collation elements\n");
   1312                 break;
   1313             }
   1314             while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
   1315                 preContextCeLen++;
   1316             }
   1317             ucol_closeElements(iter);
   1318         }
   1319 
   1320         getCEs(str, ces+preContextCeLen, &status);
   1321         if (U_FAILURE(status)) {
   1322             log_err("Error in parsing collation elements in FractionalUCA.txt\n");
   1323             break;
   1324         }
   1325         iter = ucol_openElements(coll, codepoints, -1, &status);
   1326         if (U_FAILURE(status)) {
   1327             log_err("Error in opening collation elements\n");
   1328             break;
   1329         }
   1330         for (;;) {
   1331             uint32_t ce = (uint32_t)ucol_next(iter, &status);
   1332             if (ce == 0xFFFFFFFF) {
   1333                 ce = 0;
   1334             }
   1335             /* we now unconditionally reorder Thai/Lao prevowels, so this
   1336              * test would fail if we don't skip here.
   1337              */
   1338             if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
   1339               continue;
   1340             }
   1341             if (ce != ces[count] || U_FAILURE(status)) {
   1342                 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
   1343                 break;
   1344             }
   1345             if (ces[count] == 0) {
   1346                 break;
   1347             }
   1348             count ++;
   1349         }
   1350         ucol_closeElements(iter);
   1351     }
   1352 
   1353     T_FileStream_close(file);
   1354     ucol_close(coll);
   1355 }
   1356 
   1357 /**
   1358 * Testing the discontigous contractions
   1359 */
   1360 static void TestDiscontiguos() {
   1361     const char               *rulestr    =
   1362                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
   1363           UChar               rule[50];
   1364           int                 rulelen = u_unescape(rulestr, rule, 50);
   1365     const char               *src[] = {
   1366      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
   1367     /* base character blocked */
   1368      "XD\\u0300", "XD\\u0300\\u0315",
   1369     /* non blocking combining character */
   1370      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
   1371      /* blocking combining character */
   1372      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
   1373      /* contraction prefix */
   1374      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
   1375      "X\\u0300\\u031A\\u0315",
   1376      /* ends not with a contraction character */
   1377      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
   1378      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
   1379     };
   1380     const char               *tgt[] = {
   1381      /* non blocking combining character */
   1382      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
   1383     /* base character blocked */
   1384      "X D \\u0300", "X D \\u0300\\u0315",
   1385     /* non blocking combining character */
   1386      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
   1387      /* blocking combining character */
   1388      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
   1389      /* contraction prefix */
   1390      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
   1391      "X\\u0300 \\u031A \\u0315",
   1392      /* ends not with a contraction character */
   1393      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
   1394      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
   1395     };
   1396           int                 size   = 20;
   1397           UCollator          *coll;
   1398           UErrorCode          status    = U_ZERO_ERROR;
   1399           int                 count     = 0;
   1400           UCollationElements *iter;
   1401           UCollationElements *resultiter;
   1402 
   1403     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
   1404     iter       = ucol_openElements(coll, rule, 1, &status);
   1405     resultiter = ucol_openElements(coll, rule, 1, &status);
   1406 
   1407     if (U_FAILURE(status)) {
   1408         log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
   1409         return;
   1410     }
   1411 
   1412     while (count < size) {
   1413         UChar  str[20];
   1414         UChar  tstr[20];
   1415         int    strLen = u_unescape(src[count], str, 20);
   1416         UChar *s;
   1417 
   1418         ucol_setText(iter, str, strLen, &status);
   1419         if (U_FAILURE(status)) {
   1420             log_err("Error opening collation iterator\n");
   1421             return;
   1422         }
   1423 
   1424         u_unescape(tgt[count], tstr, 20);
   1425         s = tstr;
   1426 
   1427         log_verbose("count %d\n", count);
   1428 
   1429         for (;;) {
   1430             uint32_t  ce;
   1431             UChar    *e = u_strchr(s, 0x20);
   1432             if (e == 0) {
   1433                 e = u_strchr(s, 0);
   1434             }
   1435             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
   1436             ce = ucol_next(resultiter, &status);
   1437             if (U_FAILURE(status)) {
   1438                 log_err("Error manipulating collation iterator\n");
   1439                 return;
   1440             }
   1441             while (ce != UCOL_NULLORDER) {
   1442                 if (ce != (uint32_t)ucol_next(iter, &status) ||
   1443                     U_FAILURE(status)) {
   1444                     log_err("Discontiguos contraction test mismatch\n");
   1445                     return;
   1446                 }
   1447                 ce = ucol_next(resultiter, &status);
   1448                 if (U_FAILURE(status)) {
   1449                     log_err("Error getting next collation element\n");
   1450                     return;
   1451                 }
   1452             }
   1453             s = e + 1;
   1454             if (*e == 0) {
   1455                 break;
   1456             }
   1457         }
   1458         ucol_reset(iter);
   1459         backAndForth(iter);
   1460         count ++;
   1461     }
   1462     ucol_closeElements(resultiter);
   1463     ucol_closeElements(iter);
   1464     ucol_close(coll);
   1465 }
   1466 
   1467 static void TestCEBufferOverflow()
   1468 {
   1469     UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
   1470     UErrorCode          status = U_ZERO_ERROR;
   1471     UChar               rule[10];
   1472     UCollator          *coll;
   1473     UCollationElements *iter;
   1474 
   1475     u_uastrcpy(rule, "&z < AB");
   1476     coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
   1477     if (U_FAILURE(status)) {
   1478         log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
   1479         return;
   1480     }
   1481 
   1482     /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
   1483     test. this will cause an overflow in getPrev */
   1484     str[0] = 0x0041;    /* 'A' */
   1485     /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
   1486     uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
   1487     str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
   1488     iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
   1489                              &status);
   1490     if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
   1491         status == U_BUFFER_OVERFLOW_ERROR) {
   1492         log_err("CE buffer should not overflow with long string of trail surrogates\n");
   1493     }
   1494     ucol_closeElements(iter);
   1495     ucol_close(coll);
   1496 }
   1497 
   1498 /**
   1499 * Checking collation element validity.
   1500 */
   1501 #define MAX_CODEPOINTS_TO_SHOW 10
   1502 static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
   1503     int i, lengthToUse = length;
   1504     if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
   1505         lengthToUse = MAX_CODEPOINTS_TO_SHOW;
   1506     }
   1507     for (i = 0; i < lengthToUse; ++i) {
   1508         int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
   1509         if (bytesWritten <= 0) {
   1510             break;
   1511         }
   1512         codepointText += bytesWritten;
   1513     }
   1514     if (i < length) {
   1515         sprintf(codepointText, " ...");
   1516     }
   1517 }
   1518 
   1519 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
   1520                              int length)
   1521 {
   1522     UErrorCode          status = U_ZERO_ERROR;
   1523     UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
   1524                                                   &status);
   1525     UBool result = FALSE;
   1526     UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
   1527     const char * collLocale;
   1528 
   1529     if (U_FAILURE(status)) {
   1530         log_err("Error creating iterator for testing validity\n");
   1531         return FALSE;
   1532     }
   1533     collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
   1534     if (U_FAILURE(status) || collLocale==NULL) {
   1535         status = U_ZERO_ERROR;
   1536         collLocale = "?";
   1537     }
   1538 
   1539     for (;;) {
   1540         uint32_t ce = ucol_next(iter, &status);
   1541         uint32_t primary, p1, p2, secondary, tertiary;
   1542         if (ce == UCOL_NULLORDER) {
   1543             result = TRUE;
   1544             break;
   1545         }
   1546         if (ce == 0) {
   1547             continue;
   1548         }
   1549         if (ce == 0x02000202) {
   1550             /* special CE for merge-sort character */
   1551             if (*codepoints == 0xFFFE /* && length == 1 */) {
   1552                 /*
   1553                  * Note: We should check for length==1 but the token parser appears
   1554                  * to give us trailing NUL characters.
   1555                  * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
   1556                  *                     rather than the internal collation rule parser
   1557                  */
   1558                 continue;
   1559             } else {
   1560                 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
   1561                         (int)*codepoints, (int)length);
   1562                 break;
   1563             }
   1564         }
   1565         primary   = UCOL_PRIMARYORDER(ce);
   1566         p1 = primary >> 8;
   1567         p2 = primary & 0xFF;
   1568         secondary = UCOL_SECONDARYORDER(ce);
   1569         tertiary  = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
   1570 
   1571         if (!isContinuation(ce)) {
   1572             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
   1573                 log_err("Empty CE %08lX except for case bits\n", (long)ce);
   1574                 break;
   1575             }
   1576             if (p1 == 0) {
   1577                 if (p2 != 0) {
   1578                     log_err("Primary 00 xx in %08lX\n", (long)ce);
   1579                     break;
   1580                 }
   1581                 primaryDone = TRUE;
   1582             } else {
   1583                 if (p1 <= 2 || p1 >= 0xF0) {
   1584                     /* Primary first bytes F0..FF are specials. */
   1585                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
   1586                     break;
   1587                 }
   1588                 if (p2 == 0) {
   1589                     primaryDone = TRUE;
   1590                 } else {
   1591                     if (p2 <= 3 || p2 >= 0xFF) {
   1592                         /* Primary second bytes 03 and FF are sort key compression terminators. */
   1593                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
   1594                         break;
   1595                     }
   1596                     primaryDone = FALSE;
   1597                 }
   1598             }
   1599             if (secondary == 0) {
   1600                 if (primary != 0) {
   1601                     log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
   1602                     break;
   1603                 }
   1604                 secondaryDone = TRUE;
   1605             } else {
   1606                 if (secondary <= 2 ||
   1607                     (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
   1608                 ) {
   1609                     /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
   1610                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
   1611                     break;
   1612                 }
   1613                 secondaryDone = FALSE;
   1614             }
   1615             if (tertiary == 0) {
   1616                 /* We know that ce != 0. */
   1617                 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
   1618                 break;
   1619             }
   1620             if (tertiary <= 2) {
   1621                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
   1622                 break;
   1623             }
   1624             tertiaryDone = FALSE;
   1625         } else {
   1626             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
   1627                 log_err("Empty continuation %08lX\n", (long)ce);
   1628                 break;
   1629             }
   1630             if (primaryDone && primary != 0) {
   1631                 log_err("Primary was done but continues in %08lX\n", (long)ce);
   1632                 break;
   1633             }
   1634             if (p1 == 0) {
   1635                 if (p2 != 0) {
   1636                     log_err("Primary 00 xx in %08lX\n", (long)ce);
   1637                     break;
   1638                 }
   1639                 primaryDone = TRUE;
   1640             } else {
   1641                 if (p1 <= 2) {
   1642                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
   1643                     break;
   1644                 }
   1645                 if (p2 == 0) {
   1646                     primaryDone = TRUE;
   1647                 } else {
   1648                     if (p2 <= 3) {
   1649                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
   1650                         break;
   1651                     }
   1652                 }
   1653             }
   1654             if (secondaryDone && secondary != 0) {
   1655                 log_err("Secondary was done but continues in %08lX\n", (long)ce);
   1656                 break;
   1657             }
   1658             if (secondary == 0) {
   1659                 secondaryDone = TRUE;
   1660             } else {
   1661                 if (secondary <= 2) {
   1662                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
   1663                     break;
   1664                 }
   1665             }
   1666             if (tertiaryDone && tertiary != 0) {
   1667                 log_err("Tertiary was done but continues in %08lX\n", (long)ce);
   1668                 break;
   1669             }
   1670             if (tertiary == 0) {
   1671                 tertiaryDone = TRUE;
   1672             } else if (tertiary <= 2) {
   1673                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
   1674                 break;
   1675             }
   1676         }
   1677     }
   1678     if (!result) {
   1679         char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
   1680         showCodepoints(codepoints, length, codepointText);
   1681         log_err("Locale: %s  Code point string: %s\n", collLocale, codepointText);
   1682     }
   1683     ucol_closeElements(iter);
   1684     return result;
   1685 }
   1686 
   1687 static void TestCEValidity()
   1688 {
   1689     /* testing UCA collation elements */
   1690     UErrorCode  status      = U_ZERO_ERROR;
   1691     /* en_US has no tailorings */
   1692     UCollator  *coll        = ucol_open("root", &status);
   1693     /* tailored locales */
   1694     char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
   1695     const char *loc;
   1696     FileStream *file = NULL;
   1697     char        line[2048];
   1698     UChar       codepoints[11];
   1699     int         count = 0;
   1700     int         maxCount = 0;
   1701     UChar       contextCPs[3];
   1702     UChar32     c;
   1703     UParseError parseError;
   1704     if (U_FAILURE(status)) {
   1705         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
   1706         return;
   1707     }
   1708     log_verbose("Testing UCA elements\n");
   1709     file = getFractionalUCA();
   1710     if (file == NULL) {
   1711         log_err("Fractional UCA data can not be opened\n");
   1712         return;
   1713     }
   1714 
   1715     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1716         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1717             line[0] == 0x000D || line[0] == '[') {
   1718             continue;
   1719         }
   1720 
   1721         getCodePoints(line, codepoints, contextCPs);
   1722         checkCEValidity(coll, codepoints, u_strlen(codepoints));
   1723     }
   1724 
   1725     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
   1726     for (c = 0; c <= 0xffff; ++c) {
   1727         if (u_isdefined(c)) {
   1728             codepoints[0] = (UChar)c;
   1729             checkCEValidity(coll, codepoints, 1);
   1730         }
   1731     }
   1732     for (; c <= 0x10ffff; ++c) {
   1733         if (u_isdefined(c)) {
   1734             int32_t i = 0;
   1735             U16_APPEND_UNSAFE(codepoints, i, c);
   1736             checkCEValidity(coll, codepoints, i);
   1737         }
   1738     }
   1739 
   1740     ucol_close(coll);
   1741 
   1742     /* testing tailored collation elements */
   1743     log_verbose("Testing tailored elements\n");
   1744     if(getTestOption(QUICK_OPTION)) {
   1745         maxCount = sizeof(locale)/sizeof(locale[0]);
   1746     } else {
   1747         maxCount = uloc_countAvailable();
   1748     }
   1749     while (count < maxCount) {
   1750         const UChar *rules = NULL,
   1751                     *current = NULL;
   1752         UChar *rulesCopy = NULL;
   1753         int32_t ruleLen = 0;
   1754 
   1755         uint32_t chOffset = 0;
   1756         uint32_t chLen = 0;
   1757         uint32_t exOffset = 0;
   1758         uint32_t exLen = 0;
   1759         uint32_t prefixOffset = 0;
   1760         uint32_t prefixLen = 0;
   1761         UBool    startOfRules = TRUE;
   1762         UColOptionSet opts;
   1763 
   1764         UColTokenParser src;
   1765         uint32_t strength = 0;
   1766         uint16_t specs = 0;
   1767         if(getTestOption(QUICK_OPTION)) {
   1768             loc = locale[count];
   1769         } else {
   1770             loc = uloc_getAvailable(count);
   1771             if(!hasCollationElements(loc)) {
   1772                 count++;
   1773                 continue;
   1774             }
   1775         }
   1776 
   1777         uprv_memset(&src, 0, sizeof(UColTokenParser));
   1778 
   1779         log_verbose("Testing CEs for %s\n", loc);
   1780 
   1781         coll      = ucol_open(loc, &status);
   1782         if (U_FAILURE(status)) {
   1783             log_err("%s collator creation failed\n", loc);
   1784             return;
   1785         }
   1786 
   1787         src.opts = &opts;
   1788         rules = ucol_getRules(coll, &ruleLen);
   1789 
   1790         if (ruleLen > 0) {
   1791             rulesCopy = (UChar *)uprv_malloc((ruleLen +
   1792                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
   1793             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
   1794             src.current = src.source = rulesCopy;
   1795             src.end = rulesCopy + ruleLen;
   1796             src.extraCurrent = src.end;
   1797             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1798 
   1799 	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   1800 	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   1801             while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
   1802               strength = src.parsedToken.strength;
   1803               chOffset = src.parsedToken.charsOffset;
   1804               chLen = src.parsedToken.charsLen;
   1805               exOffset = src.parsedToken.extensionOffset;
   1806               exLen = src.parsedToken.extensionLen;
   1807               prefixOffset = src.parsedToken.prefixOffset;
   1808               prefixLen = src.parsedToken.prefixLen;
   1809               specs = src.parsedToken.flags;
   1810 
   1811                 startOfRules = FALSE;
   1812                 uprv_memcpy(codepoints, src.source + chOffset,
   1813                                                        chLen * sizeof(UChar));
   1814                 codepoints[chLen] = 0;
   1815                 checkCEValidity(coll, codepoints, chLen);
   1816             }
   1817             uprv_free(src.source);
   1818         }
   1819 
   1820         ucol_close(coll);
   1821         count ++;
   1822     }
   1823     T_FileStream_close(file);
   1824 }
   1825 
   1826 static void printSortKeyError(const UChar   *codepoints, int length,
   1827                                     uint8_t *sortkey, int sklen)
   1828 {
   1829     int count = 0;
   1830     log_err("Sortkey not valid for ");
   1831     while (length > 0) {
   1832         log_err("0x%04x ", *codepoints);
   1833         length --;
   1834         codepoints ++;
   1835     }
   1836     log_err("\nSortkey : ");
   1837     while (count < sklen) {
   1838         log_err("0x%02x ", sortkey[count]);
   1839         count ++;
   1840     }
   1841     log_err("\n");
   1842 }
   1843 
   1844 /**
   1845 * Checking sort key validity for all levels
   1846 */
   1847 static UBool checkSortKeyValidity(UCollator *coll,
   1848                                   const UChar *codepoints,
   1849                                   int length)
   1850 {
   1851     UErrorCode status  = U_ZERO_ERROR;
   1852     UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
   1853                                       UCOL_TERTIARY, UCOL_QUATERNARY,
   1854                                       UCOL_IDENTICAL};
   1855     int        strengthlen = 5;
   1856     int        strengthIndex = 0;
   1857     int        caselevel   = 0;
   1858 
   1859     while (caselevel < 1) {
   1860         if (caselevel == 0) {
   1861             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
   1862         }
   1863         else {
   1864             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
   1865         }
   1866 
   1867         while (strengthIndex < strengthlen) {
   1868             int        count01 = 0;
   1869             uint32_t   count   = 0;
   1870             uint8_t    sortkey[128];
   1871             uint32_t   sklen;
   1872 
   1873             ucol_setStrength(coll, strength[strengthIndex]);
   1874             sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
   1875             while (sortkey[count] != 0) {
   1876                 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
   1877                     printSortKeyError(codepoints, length, sortkey, sklen);
   1878                     return FALSE;
   1879                 }
   1880                 if (sortkey[count] == 1) {
   1881                     count01 ++;
   1882                 }
   1883                 count ++;
   1884             }
   1885 
   1886             if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
   1887                 printSortKeyError(codepoints, length, sortkey, sklen);
   1888                 return FALSE;
   1889             }
   1890             strengthIndex ++;
   1891         }
   1892         caselevel ++;
   1893     }
   1894     return TRUE;
   1895 }
   1896 
   1897 static void TestSortKeyValidity(void)
   1898 {
   1899     /* testing UCA collation elements */
   1900     UErrorCode  status      = U_ZERO_ERROR;
   1901     /* en_US has no tailorings */
   1902     UCollator  *coll        = ucol_open("en_US", &status);
   1903     /* tailored locales */
   1904     char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
   1905     FileStream *file = NULL;
   1906     char        line[2048];
   1907     UChar       codepoints[10];
   1908     int         count = 0;
   1909     UChar       contextCPs[5];
   1910     UParseError parseError;
   1911     if (U_FAILURE(status)) {
   1912         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
   1913         return;
   1914     }
   1915     log_verbose("Testing UCA elements\n");
   1916     file = getFractionalUCA();
   1917     if (file == NULL) {
   1918         log_err("Fractional UCA data can not be opened\n");
   1919         return;
   1920     }
   1921 
   1922     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1923         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1924             line[0] == 0x000D || line[0] == '[') {
   1925             continue;
   1926         }
   1927 
   1928         getCodePoints(line, codepoints, contextCPs);
   1929         if(codepoints[0] == 0xFFFE) {
   1930             /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
   1931             continue;
   1932         }
   1933         checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
   1934     }
   1935 
   1936     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
   1937     codepoints[0] = 0;
   1938 
   1939     while (codepoints[0] < 0xFFFF) {
   1940         if (u_isdefined((UChar32)codepoints[0])) {
   1941             checkSortKeyValidity(coll, codepoints, 1);
   1942         }
   1943         codepoints[0] ++;
   1944     }
   1945 
   1946     ucol_close(coll);
   1947 
   1948     /* testing tailored collation elements */
   1949     log_verbose("Testing tailored elements\n");
   1950     while (count < 5) {
   1951         const UChar *rules = NULL,
   1952                     *current = NULL;
   1953         UChar *rulesCopy = NULL;
   1954         int32_t ruleLen = 0;
   1955 
   1956         uint32_t chOffset = 0;
   1957         uint32_t chLen = 0;
   1958         uint32_t exOffset = 0;
   1959         uint32_t exLen = 0;
   1960         uint32_t prefixOffset = 0;
   1961         uint32_t prefixLen = 0;
   1962         UBool    startOfRules = TRUE;
   1963         UColOptionSet opts;
   1964 
   1965         UColTokenParser src;
   1966         uint32_t strength = 0;
   1967         uint16_t specs = 0;
   1968 
   1969         uprv_memset(&src, 0, sizeof(UColTokenParser));
   1970 
   1971         coll      = ucol_open(locale[count], &status);
   1972         if (U_FAILURE(status)) {
   1973             log_err("%s collator creation failed\n", locale[count]);
   1974             return;
   1975         }
   1976 
   1977         src.opts = &opts;
   1978         rules = ucol_getRules(coll, &ruleLen);
   1979 
   1980         if (ruleLen > 0) {
   1981             rulesCopy = (UChar *)uprv_malloc((ruleLen +
   1982                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
   1983             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
   1984             src.current = src.source = rulesCopy;
   1985             src.end = rulesCopy + ruleLen;
   1986             src.extraCurrent = src.end;
   1987             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1988 
   1989 	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   1990 	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   1991             while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
   1992                 strength = src.parsedToken.strength;
   1993                 chOffset = src.parsedToken.charsOffset;
   1994                 chLen = src.parsedToken.charsLen;
   1995                 exOffset = src.parsedToken.extensionOffset;
   1996                 exLen = src.parsedToken.extensionLen;
   1997                 prefixOffset = src.parsedToken.prefixOffset;
   1998                 prefixLen = src.parsedToken.prefixLen;
   1999                 specs = src.parsedToken.flags;
   2000 
   2001                 startOfRules = FALSE;
   2002                 uprv_memcpy(codepoints, src.source + chOffset,
   2003                                                        chLen * sizeof(UChar));
   2004                 codepoints[chLen] = 0;
   2005                 if(codepoints[0] == 0xFFFE) {
   2006                     /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
   2007                     continue;
   2008                 }
   2009                 checkSortKeyValidity(coll, codepoints, chLen);
   2010             }
   2011             uprv_free(src.source);
   2012         }
   2013 
   2014         ucol_close(coll);
   2015         count ++;
   2016     }
   2017     T_FileStream_close(file);
   2018 }
   2019 
   2020 #endif /* #if !UCONFIG_NO_COLLATION */
   2021