Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2013, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /********************************************************************************
      7 *
      8 * File CITERTST.C
      9 *
     10 * Modification History:
     11 * Date      Name               Description
     12 *           Madhu Katragadda   Ported for C API
     13 * 02/19/01  synwee             Modified test case for new collation iterator
     14 *********************************************************************************/
     15 /*
     16  * Collation Iterator tests.
     17  * (Let me reiterate my position...)
     18  */
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_COLLATION
     23 
     24 #include "unicode/ucol.h"
     25 #include "unicode/ucoleitr.h"
     26 #include "unicode/uloc.h"
     27 #include "unicode/uchar.h"
     28 #include "unicode/ustring.h"
     29 #include "unicode/putil.h"
     30 #include "callcoll.h"
     31 #include "cmemory.h"
     32 #include "cintltst.h"
     33 #include "citertst.h"
     34 #include "ccolltst.h"
     35 #include "filestrm.h"
     36 #include "cstring.h"
     37 #include "ucol_imp.h"
     38 #include "ucol_tok.h"
     39 #include "uparse.h"
     40 #include <stdio.h>
     41 
     42 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
     43 
     44 void addCollIterTest(TestNode** root)
     45 {
     46     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
     47     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
     48     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
     49     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
     50     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
     51     addTest(root, &TestNormalizedUnicodeChar,
     52                                 "tscoll/citertst/TestNormalizedUnicodeChar");
     53     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
     54     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
     55     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
     56     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
     57     addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
     58     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
     59     addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
     60     addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
     61     addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
     62     addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
     63 }
     64 
     65 /* The locales we support */
     66 
     67 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
     68 
     69 static void TestBug672() {
     70     UErrorCode  status = U_ZERO_ERROR;
     71     UChar       pattern[20];
     72     UChar       text[50];
     73     int         i;
     74     int         result[3][3];
     75 
     76     u_uastrcpy(pattern, "resume");
     77     u_uastrcpy(text, "Time to resume updating my resume.");
     78 
     79     for (i = 0; i < 3; ++ i) {
     80         UCollator          *coll = ucol_open(LOCALES[i], &status);
     81         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
     82                                                      &status);
     83         UCollationElements *titer = ucol_openElements(coll, text, -1,
     84                                                      &status);
     85         if (U_FAILURE(status)) {
     86             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
     87                     myErrorName(status));
     88             return;
     89         }
     90 
     91         log_verbose("locale tested %s\n", LOCALES[i]);
     92 
     93         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
     94                U_SUCCESS(status)) {
     95         }
     96         if (U_FAILURE(status)) {
     97             log_err("ERROR: reversing collation iterator :%s\n",
     98                     myErrorName(status));
     99             return;
    100         }
    101         ucol_reset(pitr);
    102 
    103         ucol_setOffset(titer, u_strlen(pattern), &status);
    104         if (U_FAILURE(status)) {
    105             log_err("ERROR: setting offset in collator :%s\n",
    106                     myErrorName(status));
    107             return;
    108         }
    109         result[i][0] = ucol_getOffset(titer);
    110         log_verbose("Text iterator set to offset %d\n", result[i][0]);
    111 
    112         /* Use previous() */
    113         ucol_previous(titer, &status);
    114         result[i][1] = ucol_getOffset(titer);
    115         log_verbose("Current offset %d after previous\n", result[i][1]);
    116 
    117         /* Add one to index */
    118         log_verbose("Adding one to current offset...\n");
    119         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
    120         if (U_FAILURE(status)) {
    121             log_err("ERROR: setting offset in collator :%s\n",
    122                     myErrorName(status));
    123             return;
    124         }
    125         result[i][2] = ucol_getOffset(titer);
    126         log_verbose("Current offset in text = %d\n", result[i][2]);
    127         ucol_closeElements(pitr);
    128         ucol_closeElements(titer);
    129         ucol_close(coll);
    130     }
    131 
    132     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
    133         uprv_memcmp(result[1], result[2], 3) != 0) {
    134         log_err("ERROR: Different locales have different offsets at the same character\n");
    135     }
    136 }
    137 
    138 
    139 
    140 /*  Running this test with normalization enabled showed up a bug in the incremental
    141     normalization code. */
    142 static void TestBug672Normalize() {
    143     UErrorCode  status = U_ZERO_ERROR;
    144     UChar       pattern[20];
    145     UChar       text[50];
    146     int         i;
    147     int         result[3][3];
    148 
    149     u_uastrcpy(pattern, "resume");
    150     u_uastrcpy(text, "Time to resume updating my resume.");
    151 
    152     for (i = 0; i < 3; ++ i) {
    153         UCollator          *coll = ucol_open(LOCALES[i], &status);
    154         UCollationElements *pitr = NULL;
    155         UCollationElements *titer = NULL;
    156 
    157         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    158 
    159         pitr = ucol_openElements(coll, pattern, -1, &status);
    160         titer = ucol_openElements(coll, text, -1, &status);
    161         if (U_FAILURE(status)) {
    162             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
    163                     myErrorName(status));
    164             return;
    165         }
    166 
    167         log_verbose("locale tested %s\n", LOCALES[i]);
    168 
    169         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
    170                U_SUCCESS(status)) {
    171         }
    172         if (U_FAILURE(status)) {
    173             log_err("ERROR: reversing collation iterator :%s\n",
    174                     myErrorName(status));
    175             return;
    176         }
    177         ucol_reset(pitr);
    178 
    179         ucol_setOffset(titer, u_strlen(pattern), &status);
    180         if (U_FAILURE(status)) {
    181             log_err("ERROR: setting offset in collator :%s\n",
    182                     myErrorName(status));
    183             return;
    184         }
    185         result[i][0] = ucol_getOffset(titer);
    186         log_verbose("Text iterator set to offset %d\n", result[i][0]);
    187 
    188         /* Use previous() */
    189         ucol_previous(titer, &status);
    190         result[i][1] = ucol_getOffset(titer);
    191         log_verbose("Current offset %d after previous\n", result[i][1]);
    192 
    193         /* Add one to index */
    194         log_verbose("Adding one to current offset...\n");
    195         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
    196         if (U_FAILURE(status)) {
    197             log_err("ERROR: setting offset in collator :%s\n",
    198                     myErrorName(status));
    199             return;
    200         }
    201         result[i][2] = ucol_getOffset(titer);
    202         log_verbose("Current offset in text = %d\n", result[i][2]);
    203         ucol_closeElements(pitr);
    204         ucol_closeElements(titer);
    205         ucol_close(coll);
    206     }
    207 
    208     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
    209         uprv_memcmp(result[1], result[2], 3) != 0) {
    210         log_err("ERROR: Different locales have different offsets at the same character\n");
    211     }
    212 }
    213 
    214 
    215 
    216 
    217 /**
    218  * Test for CollationElementIterator previous and next for the whole set of
    219  * unicode characters.
    220  */
    221 static void TestUnicodeChar()
    222 {
    223     UChar source[0x100];
    224     UCollator *en_us;
    225     UCollationElements *iter;
    226     UErrorCode status = U_ZERO_ERROR;
    227     UChar codepoint;
    228 
    229     UChar *test;
    230     en_us = ucol_open("en_US", &status);
    231     if (U_FAILURE(status)){
    232        log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
    233               myErrorName(status));
    234        return;
    235     }
    236 
    237     for (codepoint = 1; codepoint < 0xFFFE;)
    238     {
    239       test = source;
    240 
    241       while (codepoint % 0xFF != 0)
    242       {
    243         if (u_isdefined(codepoint))
    244           *(test ++) = codepoint;
    245         codepoint ++;
    246       }
    247 
    248       if (u_isdefined(codepoint))
    249         *(test ++) = codepoint;
    250 
    251       if (codepoint != 0xFFFF)
    252         codepoint ++;
    253 
    254       *test = 0;
    255       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
    256       if(U_FAILURE(status)){
    257           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    258               myErrorName(status));
    259           ucol_close(en_us);
    260           return;
    261       }
    262       /* A basic test to see if it's working at all */
    263       log_verbose("codepoint testing %x\n", codepoint);
    264       backAndForth(iter);
    265       ucol_closeElements(iter);
    266 
    267       /* null termination test */
    268       iter=ucol_openElements(en_us, source, -1, &status);
    269       if(U_FAILURE(status)){
    270           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    271               myErrorName(status));
    272           ucol_close(en_us);
    273           return;
    274       }
    275       /* A basic test to see if it's working at all */
    276       backAndForth(iter);
    277       ucol_closeElements(iter);
    278     }
    279 
    280     ucol_close(en_us);
    281 }
    282 
    283 /**
    284  * Test for CollationElementIterator previous and next for the whole set of
    285  * unicode characters with normalization on.
    286  */
    287 static void TestNormalizedUnicodeChar()
    288 {
    289     UChar source[0x100];
    290     UCollator *th_th;
    291     UCollationElements *iter;
    292     UErrorCode status = U_ZERO_ERROR;
    293     UChar codepoint;
    294 
    295     UChar *test;
    296     /* thai should have normalization on */
    297     th_th = ucol_open("th_TH", &status);
    298     if (U_FAILURE(status)){
    299         log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
    300               myErrorName(status));
    301         return;
    302     }
    303 
    304     for (codepoint = 1; codepoint < 0xFFFE;)
    305     {
    306       test = source;
    307 
    308       while (codepoint % 0xFF != 0)
    309       {
    310         if (u_isdefined(codepoint))
    311           *(test ++) = codepoint;
    312         codepoint ++;
    313       }
    314 
    315       if (u_isdefined(codepoint))
    316         *(test ++) = codepoint;
    317 
    318       if (codepoint != 0xFFFF)
    319         codepoint ++;
    320 
    321       *test = 0;
    322       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
    323       if(U_FAILURE(status)){
    324           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    325               myErrorName(status));
    326             ucol_close(th_th);
    327           return;
    328       }
    329 
    330       backAndForth(iter);
    331       ucol_closeElements(iter);
    332 
    333       iter=ucol_openElements(th_th, source, -1, &status);
    334       if(U_FAILURE(status)){
    335           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    336               myErrorName(status));
    337             ucol_close(th_th);
    338           return;
    339       }
    340 
    341       backAndForth(iter);
    342       ucol_closeElements(iter);
    343     }
    344 
    345     ucol_close(th_th);
    346 }
    347 
    348 /**
    349 * Test the incremental normalization
    350 */
    351 static void TestNormalization()
    352 {
    353           UErrorCode          status = U_ZERO_ERROR;
    354     const char               *str    =
    355                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
    356           UCollator          *coll;
    357           UChar               rule[50];
    358           int                 rulelen = u_unescape(str, rule, 50);
    359           int                 count = 0;
    360     const char                *testdata[] =
    361                         {"\\u1ED9", "o\\u0323\\u0302",
    362                         "\\u0300\\u0315", "\\u0315\\u0300",
    363                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
    364                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
    365                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
    366                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
    367                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
    368     int32_t   srclen;
    369     UChar source[10];
    370     UCollationElements *iter;
    371 
    372     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
    373     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    374     if (U_FAILURE(status)){
    375         log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
    376               myErrorName(status));
    377         return;
    378     }
    379 
    380     srclen = u_unescape(testdata[0], source, 10);
    381     iter = ucol_openElements(coll, source, srclen, &status);
    382     backAndForth(iter);
    383     ucol_closeElements(iter);
    384 
    385     srclen = u_unescape(testdata[1], source, 10);
    386     iter = ucol_openElements(coll, source, srclen, &status);
    387     backAndForth(iter);
    388     ucol_closeElements(iter);
    389 
    390     while (count < 12) {
    391         srclen = u_unescape(testdata[count], source, 10);
    392         iter = ucol_openElements(coll, source, srclen, &status);
    393 
    394         if (U_FAILURE(status)){
    395             log_err("ERROR: in creation of collator element iterator\n %s\n",
    396                   myErrorName(status));
    397             return;
    398         }
    399         backAndForth(iter);
    400         ucol_closeElements(iter);
    401 
    402         iter = ucol_openElements(coll, source, -1, &status);
    403 
    404         if (U_FAILURE(status)){
    405             log_err("ERROR: in creation of collator element iterator\n %s\n",
    406                   myErrorName(status));
    407             return;
    408         }
    409         backAndForth(iter);
    410         ucol_closeElements(iter);
    411         count ++;
    412     }
    413     ucol_close(coll);
    414 }
    415 
    416 /**
    417  * Test for CollationElementIterator.previous()
    418  *
    419  * @bug 4108758 - Make sure it works with contracting characters
    420  *
    421  */
    422 static void TestPrevious()
    423 {
    424     UCollator *coll=NULL;
    425     UChar rule[50];
    426     UChar *source;
    427     UCollator *c1, *c2, *c3;
    428     UCollationElements *iter;
    429     UErrorCode status = U_ZERO_ERROR;
    430     UChar test1[50];
    431     UChar test2[50];
    432 
    433     u_uastrcpy(test1, "What subset of all possible test cases?");
    434     u_uastrcpy(test2, "has the highest probability of detecting");
    435     coll = ucol_open("en_US", &status);
    436 
    437     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
    438     log_verbose("English locale testing back and forth\n");
    439     if(U_FAILURE(status)){
    440         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    441             myErrorName(status));
    442         ucol_close(coll);
    443         return;
    444     }
    445     /* A basic test to see if it's working at all */
    446     backAndForth(iter);
    447     ucol_closeElements(iter);
    448     ucol_close(coll);
    449 
    450     /* Test with a contracting character sequence */
    451     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
    452     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    453 
    454     log_verbose("Contraction rule testing back and forth with no normalization\n");
    455 
    456     if (c1 == NULL || U_FAILURE(status))
    457     {
    458         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
    459             myErrorName(status));
    460         return;
    461     }
    462     source=(UChar*)malloc(sizeof(UChar) * 20);
    463     u_uastrcpy(source, "abchdcba");
    464     iter=ucol_openElements(c1, source, u_strlen(source), &status);
    465     if(U_FAILURE(status)){
    466         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    467             myErrorName(status));
    468         return;
    469     }
    470     backAndForth(iter);
    471     ucol_closeElements(iter);
    472     ucol_close(c1);
    473 
    474     /* Test with an expanding character sequence */
    475     u_uastrcpy(rule, "&a < b < c/abd < d");
    476     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    477     log_verbose("Expansion rule testing back and forth with no normalization\n");
    478     if (c2 == NULL || U_FAILURE(status))
    479     {
    480         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
    481             myErrorName(status));
    482         return;
    483     }
    484     u_uastrcpy(source, "abcd");
    485     iter=ucol_openElements(c2, source, u_strlen(source), &status);
    486     if(U_FAILURE(status)){
    487         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    488             myErrorName(status));
    489         return;
    490     }
    491     backAndForth(iter);
    492     ucol_closeElements(iter);
    493     ucol_close(c2);
    494     /* Now try both */
    495     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
    496     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
    497     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
    498 
    499     if (c3 == NULL || U_FAILURE(status))
    500     {
    501         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
    502             myErrorName(status));
    503         return;
    504     }
    505     u_uastrcpy(source, "abcdbchdc");
    506     iter=ucol_openElements(c3, source, u_strlen(source), &status);
    507     if(U_FAILURE(status)){
    508         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    509             myErrorName(status));
    510         return;
    511     }
    512     backAndForth(iter);
    513     ucol_closeElements(iter);
    514     ucol_close(c3);
    515     source[0] = 0x0e41;
    516     source[1] = 0x0e02;
    517     source[2] = 0x0e41;
    518     source[3] = 0x0e02;
    519     source[4] = 0x0e27;
    520     source[5] = 0x61;
    521     source[6] = 0x62;
    522     source[7] = 0x63;
    523     source[8] = 0;
    524 
    525     coll = ucol_open("th_TH", &status);
    526     log_verbose("Thai locale testing back and forth with normalization\n");
    527     iter=ucol_openElements(coll, source, u_strlen(source), &status);
    528     if(U_FAILURE(status)){
    529         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    530             myErrorName(status));
    531         return;
    532     }
    533     backAndForth(iter);
    534     ucol_closeElements(iter);
    535     ucol_close(coll);
    536 
    537     /* prev test */
    538     source[0] = 0x0061;
    539     source[1] = 0x30CF;
    540     source[2] = 0x3099;
    541     source[3] = 0x30FC;
    542     source[4] = 0;
    543 
    544     coll = ucol_open("ja_JP", &status);
    545     log_verbose("Japanese locale testing back and forth with normalization\n");
    546     iter=ucol_openElements(coll, source, u_strlen(source), &status);
    547     if(U_FAILURE(status)){
    548         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    549             myErrorName(status));
    550         return;
    551     }
    552     backAndForth(iter);
    553     ucol_closeElements(iter);
    554     ucol_close(coll);
    555 
    556     free(source);
    557 }
    558 
    559 /**
    560  * Test for getOffset() and setOffset()
    561  */
    562 static void TestOffset()
    563 {
    564     UErrorCode status= U_ZERO_ERROR;
    565     UCollator *en_us=NULL;
    566     UCollationElements *iter, *pristine;
    567     int32_t offset;
    568     OrderAndOffset *orders;
    569     int32_t orderLength=0;
    570     int     count = 0;
    571     UChar test1[50];
    572     UChar test2[50];
    573 
    574     u_uastrcpy(test1, "What subset of all possible test cases?");
    575     u_uastrcpy(test2, "has the highest probability of detecting");
    576     en_us = ucol_open("en_US", &status);
    577     log_verbose("Testing getOffset and setOffset for collations\n");
    578     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
    579     if(U_FAILURE(status)){
    580         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    581             myErrorName(status));
    582         ucol_close(en_us);
    583         return;
    584     }
    585 
    586     /* testing boundaries */
    587     ucol_setOffset(iter, 0, &status);
    588     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
    589         log_err("Error: After setting offset to 0, we should be at the end "
    590                 "of the backwards iteration");
    591     }
    592     ucol_setOffset(iter, u_strlen(test1), &status);
    593     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
    594         log_err("Error: After setting offset to end of the string, we should "
    595                 "be at the end of the backwards iteration");
    596     }
    597 
    598     /* Run all the way through the iterator, then get the offset */
    599 
    600     orders = getOrders(iter, &orderLength);
    601 
    602     offset = ucol_getOffset(iter);
    603 
    604     if (offset != u_strlen(test1))
    605     {
    606         log_err("offset at end != length %d vs %d\n", offset,
    607             u_strlen(test1) );
    608     }
    609 
    610     /* Now set the offset back to the beginning and see if it works */
    611     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
    612     if(U_FAILURE(status)){
    613         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    614             myErrorName(status));
    615     ucol_close(en_us);
    616         return;
    617     }
    618     status = U_ZERO_ERROR;
    619 
    620     ucol_setOffset(iter, 0, &status);
    621     if (U_FAILURE(status))
    622     {
    623         log_err("setOffset failed. %s\n",    myErrorName(status));
    624     }
    625     else
    626     {
    627         assertEqual(iter, pristine);
    628     }
    629 
    630     ucol_closeElements(pristine);
    631     ucol_closeElements(iter);
    632     free(orders);
    633 
    634     /* testing offsets in normalization buffer */
    635     test1[0] = 0x61;
    636     test1[1] = 0x300;
    637     test1[2] = 0x316;
    638     test1[3] = 0x62;
    639     test1[4] = 0;
    640     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    641     iter = ucol_openElements(en_us, test1, 4, &status);
    642     if(U_FAILURE(status)){
    643         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    644             myErrorName(status));
    645         ucol_close(en_us);
    646         return;
    647     }
    648 
    649     count = 0;
    650     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
    651         U_SUCCESS(status)) {
    652         switch (count) {
    653         case 0:
    654             if (ucol_getOffset(iter) != 1) {
    655                 log_err("ERROR: Offset of iteration should be 1\n");
    656             }
    657             break;
    658         case 3:
    659             if (ucol_getOffset(iter) != 4) {
    660                 log_err("ERROR: Offset of iteration should be 4\n");
    661             }
    662             break;
    663         default:
    664             if (ucol_getOffset(iter) != 3) {
    665                 log_err("ERROR: Offset of iteration should be 3\n");
    666             }
    667         }
    668         count ++;
    669     }
    670 
    671     ucol_reset(iter);
    672     count = 0;
    673     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
    674         U_SUCCESS(status)) {
    675         switch (count) {
    676         case 0:
    677         case 1:
    678             if (ucol_getOffset(iter) != 3) {
    679                 log_err("ERROR: Offset of iteration should be 3\n");
    680             }
    681             break;
    682         case 2:
    683             if (ucol_getOffset(iter) != 1) {
    684                 log_err("ERROR: Offset of iteration should be 1\n");
    685             }
    686             break;
    687         default:
    688             if (ucol_getOffset(iter) != 0) {
    689                 log_err("ERROR: Offset of iteration should be 0\n");
    690             }
    691         }
    692         count ++;
    693     }
    694 
    695     if(U_FAILURE(status)){
    696         log_err("ERROR: in iterating collation elements %s\n",
    697             myErrorName(status));
    698     }
    699 
    700     ucol_closeElements(iter);
    701     ucol_close(en_us);
    702 }
    703 
    704 /**
    705  * Test for setText()
    706  */
    707 static void TestSetText()
    708 {
    709     int32_t c,i;
    710     UErrorCode status = U_ZERO_ERROR;
    711     UCollator *en_us=NULL;
    712     UCollationElements *iter1, *iter2;
    713     UChar test1[50];
    714     UChar test2[50];
    715 
    716     u_uastrcpy(test1, "What subset of all possible test cases?");
    717     u_uastrcpy(test2, "has the highest probability of detecting");
    718     en_us = ucol_open("en_US", &status);
    719     log_verbose("testing setText for Collation elements\n");
    720     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
    721     if(U_FAILURE(status)){
    722         log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
    723             myErrorName(status));
    724     ucol_close(en_us);
    725         return;
    726     }
    727     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
    728     if(U_FAILURE(status)){
    729         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
    730             myErrorName(status));
    731     ucol_close(en_us);
    732         return;
    733     }
    734 
    735     /* Run through the second iterator just to exercise it */
    736     c = ucol_next(iter2, &status);
    737     i = 0;
    738 
    739     while ( ++i < 10 && (c != UCOL_NULLORDER))
    740     {
    741         if (U_FAILURE(status))
    742         {
    743             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
    744             ucol_closeElements(iter2);
    745             ucol_closeElements(iter1);
    746     ucol_close(en_us);
    747             return;
    748         }
    749 
    750         c = ucol_next(iter2, &status);
    751     }
    752 
    753     /* Now set it to point to the same string as the first iterator */
    754     ucol_setText(iter2, test1, u_strlen(test1), &status);
    755     if (U_FAILURE(status))
    756     {
    757         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
    758     }
    759     else
    760     {
    761         assertEqual(iter1, iter2);
    762     }
    763 
    764     /* Now set it to point to a null string with fake length*/
    765     ucol_setText(iter2, NULL, 2, &status);
    766     if (U_FAILURE(status))
    767     {
    768         log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
    769     }
    770     else
    771     {
    772         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
    773             log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
    774         }
    775     }
    776 
    777     ucol_closeElements(iter2);
    778     ucol_closeElements(iter1);
    779     ucol_close(en_us);
    780 }
    781 
    782 /** @bug 4108762
    783  * Test for getMaxExpansion()
    784  */
    785 static void TestMaxExpansion()
    786 {
    787     UErrorCode          status = U_ZERO_ERROR;
    788     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
    789     UChar               ch     = 0;
    790     UChar32             unassigned = 0xEFFFD;
    791     UChar               supplementary[2];
    792     uint32_t            stringOffset = 0;
    793     UBool               isError = FALSE;
    794     uint32_t            sorder = 0;
    795     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
    796     uint32_t            temporder = 0;
    797 
    798     UChar rule[256];
    799     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
    800     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
    801         UCOL_DEFAULT_STRENGTH,NULL, &status);
    802     if(U_SUCCESS(status) && coll) {
    803       iter = ucol_openElements(coll, &ch, 1, &status);
    804 
    805       while (ch < 0xFFFF && U_SUCCESS(status)) {
    806           int      count = 1;
    807           uint32_t order;
    808           int32_t  size = 0;
    809 
    810           ch ++;
    811 
    812           ucol_setText(iter, &ch, 1, &status);
    813           order = ucol_previous(iter, &status);
    814 
    815           /* thai management */
    816           if (order == 0)
    817               order = ucol_previous(iter, &status);
    818 
    819           while (U_SUCCESS(status) &&
    820               ucol_previous(iter, &status) != UCOL_NULLORDER) {
    821               count ++;
    822           }
    823 
    824           size = ucol_getMaxExpansion(iter, order);
    825           if (U_FAILURE(status) || size < count) {
    826               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    827                   ch, count);
    828           }
    829       }
    830 
    831       /* testing for exact max expansion */
    832       ch = 0;
    833       while (ch < 0x61) {
    834           uint32_t order;
    835           int32_t  size;
    836           ucol_setText(iter, &ch, 1, &status);
    837           order = ucol_previous(iter, &status);
    838           size  = ucol_getMaxExpansion(iter, order);
    839           if (U_FAILURE(status) || size != 1) {
    840               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    841                   ch, 1);
    842           }
    843           ch ++;
    844       }
    845 
    846       ch = 0x63;
    847       ucol_setText(iter, &ch, 1, &status);
    848       temporder = ucol_previous(iter, &status);
    849 
    850       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
    851           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
    852                   ch, 3);
    853       }
    854 
    855       ch = 0x64;
    856       ucol_setText(iter, &ch, 1, &status);
    857       temporder = ucol_previous(iter, &status);
    858 
    859       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
    860           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
    861                   ch, 3);
    862       }
    863 
    864       U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
    865       ucol_setText(iter, supplementary, 2, &status);
    866       sorder = ucol_previous(iter, &status);
    867 
    868       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
    869           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    870                   ch, 2);
    871       }
    872 
    873       /* testing jamo */
    874       ch = 0x1165;
    875 
    876       ucol_setText(iter, &ch, 1, &status);
    877       temporder = ucol_previous(iter, &status);
    878       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
    879           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
    880                   ch, 3);
    881       }
    882 
    883       ucol_closeElements(iter);
    884       ucol_close(coll);
    885 
    886       /* testing special jamo &a<\u1160 */
    887       rule[0] = 0x26;
    888       rule[1] = 0x71;
    889       rule[2] = 0x3c;
    890       rule[3] = 0x1165;
    891       rule[4] = 0x2f;
    892       rule[5] = 0x71;
    893       rule[6] = 0x71;
    894       rule[7] = 0x71;
    895       rule[8] = 0x71;
    896       rule[9] = 0;
    897 
    898       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
    899           UCOL_DEFAULT_STRENGTH,NULL, &status);
    900       iter = ucol_openElements(coll, &ch, 1, &status);
    901 
    902       temporder = ucol_previous(iter, &status);
    903       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
    904           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
    905                   ch, 5);
    906       }
    907 
    908       ucol_closeElements(iter);
    909       ucol_close(coll);
    910     } else {
    911       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
    912     }
    913 
    914 }
    915 
    916 
    917 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
    918 {
    919     int32_t c1, c2;
    920     int32_t count = 0;
    921     UErrorCode status = U_ZERO_ERROR;
    922 
    923     do
    924     {
    925         c1 = ucol_next(i1, &status);
    926         c2 = ucol_next(i2, &status);
    927 
    928         if (c1 != c2)
    929         {
    930             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
    931             break;
    932         }
    933 
    934         count += 1;
    935     }
    936     while (c1 != UCOL_NULLORDER);
    937 }
    938 
    939 /**
    940  * Testing iterators with extremely small buffers
    941  */
    942 static void TestSmallBuffer()
    943 {
    944     UErrorCode          status = U_ZERO_ERROR;
    945     UCollator          *coll;
    946     UCollationElements *testiter,
    947                        *iter;
    948     int32_t             count = 0;
    949     OrderAndOffset     *testorders,
    950                        *orders;
    951 
    952     UChar teststr[500];
    953     UChar str[] = {0x300, 0x31A, 0};
    954     /*
    955     creating a long string of decomposable characters,
    956     since by default the writable buffer is of size 256
    957     */
    958     while (count < 500) {
    959         if ((count & 1) == 0) {
    960             teststr[count ++] = 0x300;
    961         }
    962         else {
    963             teststr[count ++] = 0x31A;
    964         }
    965     }
    966 
    967     coll = ucol_open("th_TH", &status);
    968     if(U_SUCCESS(status) && coll) {
    969       testiter = ucol_openElements(coll, teststr, 500, &status);
    970       iter = ucol_openElements(coll, str, 2, &status);
    971 
    972       orders     = getOrders(iter, &count);
    973       if (count != 2) {
    974           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
    975       }
    976 
    977       /*
    978       this will rearrange the string data to 250 characters of 0x300 first then
    979       250 characters of 0x031A
    980       */
    981       testorders = getOrders(testiter, &count);
    982 
    983       if (count != 500) {
    984           log_err("Error decomposition does not give the right sized collation elements\n");
    985       }
    986 
    987       while (count != 0) {
    988           /* UCA collation element for 0x0F76 */
    989           if ((count > 250 && testorders[-- count].order != orders[1].order) ||
    990               (count <= 250 && testorders[-- count].order != orders[0].order)) {
    991               log_err("Error decomposition does not give the right collation element at %d count\n", count);
    992               break;
    993           }
    994       }
    995 
    996       free(testorders);
    997       free(orders);
    998 
    999       ucol_reset(testiter);
   1000 
   1001       /* ensures closing of elements done properly to clear writable buffer */
   1002       ucol_next(testiter, &status);
   1003       ucol_next(testiter, &status);
   1004       ucol_closeElements(testiter);
   1005       ucol_closeElements(iter);
   1006       ucol_close(coll);
   1007     } else {
   1008       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   1009     }
   1010 }
   1011 
   1012 /**
   1013 * Sniplets of code from genuca
   1014 */
   1015 static int32_t hex2num(char hex) {
   1016     if(hex>='0' && hex <='9') {
   1017         return hex-'0';
   1018     } else if(hex>='a' && hex<='f') {
   1019         return hex-'a'+10;
   1020     } else if(hex>='A' && hex<='F') {
   1021         return hex-'A'+10;
   1022     } else {
   1023         return 0;
   1024     }
   1025 }
   1026 
   1027 /**
   1028 * Getting codepoints from a string
   1029 * @param str character string contain codepoints seperated by space and ended
   1030 *        by a semicolon
   1031 * @param codepoints array for storage, assuming size > 5
   1032 * @return position at the end of the codepoint section
   1033 */
   1034 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
   1035     UErrorCode errorCode = U_ZERO_ERROR;
   1036     char *semi = uprv_strchr(str, ';');
   1037     char *pipe = uprv_strchr(str, '|');
   1038     char *s;
   1039     *codepoints = 0;
   1040     *contextCPs = 0;
   1041     if(semi == NULL) {
   1042         log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
   1043         return str;
   1044     }
   1045     if(pipe != NULL) {
   1046         int32_t contextLength;
   1047         *pipe = 0;
   1048         contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
   1049         *pipe = '|';
   1050         if(U_FAILURE(errorCode)) {
   1051             log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
   1052             return str;
   1053         }
   1054         /* prepend the precontext string to the codepoints */
   1055         u_memcpy(codepoints, contextCPs, contextLength);
   1056         codepoints += contextLength;
   1057         /* start of the code point string */
   1058         s = pipe + 1;
   1059     } else {
   1060         s = str;
   1061     }
   1062     u_parseString(s, codepoints, 99, NULL, &errorCode);
   1063     if(U_FAILURE(errorCode)) {
   1064         log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
   1065         return str;
   1066     }
   1067     return semi + 1;
   1068 }
   1069 
   1070 /**
   1071 * Sniplets of code from genuca
   1072 */
   1073 static int32_t
   1074 readElement(char **from, char *to, char separator, UErrorCode *status)
   1075 {
   1076     if (U_SUCCESS(*status)) {
   1077         char    buffer[1024];
   1078         int32_t i = 0;
   1079         while (**from != separator) {
   1080             if (**from != ' ') {
   1081                 *(buffer+i++) = **from;
   1082             }
   1083             (*from)++;
   1084         }
   1085         (*from)++;
   1086         *(buffer + i) = 0;
   1087         strcpy(to, buffer);
   1088         return i/2;
   1089     }
   1090 
   1091     return 0;
   1092 }
   1093 
   1094 /**
   1095 * Sniplets of code from genuca
   1096 */
   1097 static uint32_t
   1098 getSingleCEValue(char *primary, char *secondary, char *tertiary,
   1099                           UErrorCode *status)
   1100 {
   1101     if (U_SUCCESS(*status)) {
   1102         uint32_t  value    = 0;
   1103         char      primsave = '\0';
   1104         char      secsave  = '\0';
   1105         char      tersave  = '\0';
   1106         char     *primend  = primary+4;
   1107         char     *secend   = secondary+2;
   1108         char     *terend   = tertiary+2;
   1109         uint32_t  primvalue;
   1110         uint32_t  secvalue;
   1111         uint32_t  tervalue;
   1112 
   1113         if (uprv_strlen(primary) > 4) {
   1114             primsave = *primend;
   1115             *primend = '\0';
   1116         }
   1117 
   1118         if (uprv_strlen(secondary) > 2) {
   1119             secsave = *secend;
   1120             *secend = '\0';
   1121         }
   1122 
   1123         if (uprv_strlen(tertiary) > 2) {
   1124             tersave = *terend;
   1125             *terend = '\0';
   1126         }
   1127 
   1128         primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
   1129         secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
   1130         tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
   1131         if(primvalue <= 0xFF) {
   1132           primvalue <<= 8;
   1133         }
   1134 
   1135         value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
   1136            | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
   1137            | (tervalue & UCOL_TERTIARYORDERMASK);
   1138 
   1139         if(primsave!='\0') {
   1140             *primend = primsave;
   1141         }
   1142         if(secsave!='\0') {
   1143             *secend = secsave;
   1144         }
   1145         if(tersave!='\0') {
   1146             *terend = tersave;
   1147         }
   1148         return value;
   1149     }
   1150     return 0;
   1151 }
   1152 
   1153 /**
   1154 * Getting collation elements generated from a string
   1155 * @param str character string contain collation elements contained in [] and
   1156 *        seperated by space
   1157 * @param ce array for storage, assuming size > 20
   1158 * @param status error status
   1159 * @return position at the end of the codepoint section
   1160 */
   1161 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
   1162     char       *pStartCP     = uprv_strchr(str, '[');
   1163     int         count        = 0;
   1164     char       *pEndCP;
   1165     char        primary[100];
   1166     char        secondary[100];
   1167     char        tertiary[100];
   1168 
   1169     while (*pStartCP == '[') {
   1170         uint32_t primarycount   = 0;
   1171         uint32_t secondarycount = 0;
   1172         uint32_t tertiarycount  = 0;
   1173         uint32_t CEi = 1;
   1174         pEndCP = strchr(pStartCP, ']');
   1175         if(pEndCP == NULL) {
   1176             break;
   1177         }
   1178         pStartCP ++;
   1179 
   1180         primarycount   = readElement(&pStartCP, primary, ',', status);
   1181         secondarycount = readElement(&pStartCP, secondary, ',', status);
   1182         tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
   1183 
   1184         /* I want to get the CEs entered right here, including continuation */
   1185         ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
   1186         if (U_FAILURE(*status)) {
   1187             break;
   1188         }
   1189 
   1190         while (2 * CEi < primarycount || CEi < secondarycount ||
   1191                CEi < tertiarycount) {
   1192             uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
   1193             if (2 * CEi < primarycount) {
   1194                 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
   1195                 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
   1196             }
   1197 
   1198             if (2 * CEi + 1 < primarycount) {
   1199                 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
   1200                 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
   1201             }
   1202 
   1203             if (CEi < secondarycount) {
   1204                 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
   1205                 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
   1206             }
   1207 
   1208             if (CEi < tertiarycount) {
   1209                 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
   1210                 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
   1211             }
   1212 
   1213             CEi ++;
   1214             ces[count ++] = value;
   1215         }
   1216 
   1217       pStartCP = pEndCP + 1;
   1218     }
   1219     ces[count] = 0;
   1220     return pStartCP;
   1221 }
   1222 
   1223 /**
   1224 * Getting the FractionalUCA.txt file stream
   1225 */
   1226 static FileStream * getFractionalUCA(void)
   1227 {
   1228     char        newPath[256];
   1229     char        backupPath[256];
   1230     FileStream *result = NULL;
   1231 
   1232     /* Look inside ICU_DATA first */
   1233     uprv_strcpy(newPath, ctest_dataSrcDir());
   1234     uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
   1235     uprv_strcat(newPath, "FractionalUCA.txt");
   1236 
   1237     /* As a fallback, try to guess where the source data was located
   1238      *   at the time ICU was built, and look there.
   1239      */
   1240 #if defined (U_TOPSRCDIR)
   1241     strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
   1242 #else
   1243     {
   1244         UErrorCode errorCode = U_ZERO_ERROR;
   1245         strcpy(backupPath, loadTestData(&errorCode));
   1246         strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
   1247     }
   1248 #endif
   1249     strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
   1250 
   1251     result = T_FileStream_open(newPath, "rb");
   1252 
   1253     if (result == NULL) {
   1254         result = T_FileStream_open(backupPath, "rb");
   1255         if (result == NULL) {
   1256             log_err("Failed to open either %s or %s\n", newPath, backupPath);
   1257         }
   1258     }
   1259     return result;
   1260 }
   1261 
   1262 /**
   1263 * Testing the CEs returned by the iterator
   1264 */
   1265 static void TestCEs() {
   1266     FileStream *file = NULL;
   1267     char        line[2048];
   1268     char       *str;
   1269     UChar       codepoints[10];
   1270     uint32_t    ces[20];
   1271     UErrorCode  status = U_ZERO_ERROR;
   1272     UCollator          *coll = ucol_open("", &status);
   1273     uint32_t lineNo = 0;
   1274     UChar       contextCPs[5];
   1275 
   1276     if (U_FAILURE(status)) {
   1277         log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
   1278         return;
   1279     }
   1280 
   1281     file = getFractionalUCA();
   1282 
   1283     if (file == NULL) {
   1284         log_err("*** unable to open input FractionalUCA.txt file ***\n");
   1285         return;
   1286     }
   1287 
   1288 
   1289     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1290         int                 count = 0;
   1291         UCollationElements *iter;
   1292         int32_t            preContextCeLen=0;
   1293         lineNo++;
   1294         /* skip this line if it is empty or a comment or is a return value
   1295         or start of some variable section */
   1296         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1297             line[0] == 0x000D || line[0] == '[') {
   1298             continue;
   1299         }
   1300 
   1301         str = getCodePoints(line, codepoints, contextCPs);
   1302 
   1303         /* these are 'fake' codepoints in the fractional UCA, and are used just
   1304          * for positioning of indirect values. They should not go through this
   1305          * test.
   1306          */
   1307         if(*codepoints == 0xFDD0) {
   1308           continue;
   1309         }
   1310         if (*contextCPs != 0) {
   1311             iter = ucol_openElements(coll, contextCPs, -1, &status);
   1312             if (U_FAILURE(status)) {
   1313                 log_err("Error in opening collation elements\n");
   1314                 break;
   1315             }
   1316             while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
   1317                 preContextCeLen++;
   1318             }
   1319             ucol_closeElements(iter);
   1320         }
   1321 
   1322         getCEs(str, ces+preContextCeLen, &status);
   1323         if (U_FAILURE(status)) {
   1324             log_err("Error in parsing collation elements in FractionalUCA.txt\n");
   1325             break;
   1326         }
   1327         iter = ucol_openElements(coll, codepoints, -1, &status);
   1328         if (U_FAILURE(status)) {
   1329             log_err("Error in opening collation elements\n");
   1330             break;
   1331         }
   1332         for (;;) {
   1333             uint32_t ce = (uint32_t)ucol_next(iter, &status);
   1334             if (ce == 0xFFFFFFFF) {
   1335                 ce = 0;
   1336             }
   1337             /* we now unconditionally reorder Thai/Lao prevowels, so this
   1338              * test would fail if we don't skip here.
   1339              */
   1340             if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
   1341               continue;
   1342             }
   1343             if (ce != ces[count] || U_FAILURE(status)) {
   1344                 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
   1345                 break;
   1346             }
   1347             if (ces[count] == 0) {
   1348                 break;
   1349             }
   1350             count ++;
   1351         }
   1352         ucol_closeElements(iter);
   1353     }
   1354 
   1355     T_FileStream_close(file);
   1356     ucol_close(coll);
   1357 }
   1358 
   1359 /**
   1360 * Testing the discontigous contractions
   1361 */
   1362 static void TestDiscontiguos() {
   1363     const char               *rulestr    =
   1364                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
   1365           UChar               rule[50];
   1366           int                 rulelen = u_unescape(rulestr, rule, 50);
   1367     const char               *src[] = {
   1368      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
   1369     /* base character blocked */
   1370      "XD\\u0300", "XD\\u0300\\u0315",
   1371     /* non blocking combining character */
   1372      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
   1373      /* blocking combining character */
   1374      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
   1375      /* contraction prefix */
   1376      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
   1377      "X\\u0300\\u031A\\u0315",
   1378      /* ends not with a contraction character */
   1379      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
   1380      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
   1381     };
   1382     const char               *tgt[] = {
   1383      /* non blocking combining character */
   1384      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
   1385     /* base character blocked */
   1386      "X D \\u0300", "X D \\u0300\\u0315",
   1387     /* non blocking combining character */
   1388      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
   1389      /* blocking combining character */
   1390      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
   1391      /* contraction prefix */
   1392      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
   1393      "X\\u0300 \\u031A \\u0315",
   1394      /* ends not with a contraction character */
   1395      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
   1396      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
   1397     };
   1398           int                 size   = 20;
   1399           UCollator          *coll;
   1400           UErrorCode          status    = U_ZERO_ERROR;
   1401           int                 count     = 0;
   1402           UCollationElements *iter;
   1403           UCollationElements *resultiter;
   1404 
   1405     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
   1406     iter       = ucol_openElements(coll, rule, 1, &status);
   1407     resultiter = ucol_openElements(coll, rule, 1, &status);
   1408 
   1409     if (U_FAILURE(status)) {
   1410         log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
   1411         return;
   1412     }
   1413 
   1414     while (count < size) {
   1415         UChar  str[20];
   1416         UChar  tstr[20];
   1417         int    strLen = u_unescape(src[count], str, 20);
   1418         UChar *s;
   1419 
   1420         ucol_setText(iter, str, strLen, &status);
   1421         if (U_FAILURE(status)) {
   1422             log_err("Error opening collation iterator\n");
   1423             return;
   1424         }
   1425 
   1426         u_unescape(tgt[count], tstr, 20);
   1427         s = tstr;
   1428 
   1429         log_verbose("count %d\n", count);
   1430 
   1431         for (;;) {
   1432             uint32_t  ce;
   1433             UChar    *e = u_strchr(s, 0x20);
   1434             if (e == 0) {
   1435                 e = u_strchr(s, 0);
   1436             }
   1437             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
   1438             ce = ucol_next(resultiter, &status);
   1439             if (U_FAILURE(status)) {
   1440                 log_err("Error manipulating collation iterator\n");
   1441                 return;
   1442             }
   1443             while (ce != UCOL_NULLORDER) {
   1444                 if (ce != (uint32_t)ucol_next(iter, &status) ||
   1445                     U_FAILURE(status)) {
   1446                     log_err("Discontiguos contraction test mismatch\n");
   1447                     return;
   1448                 }
   1449                 ce = ucol_next(resultiter, &status);
   1450                 if (U_FAILURE(status)) {
   1451                     log_err("Error getting next collation element\n");
   1452                     return;
   1453                 }
   1454             }
   1455             s = e + 1;
   1456             if (*e == 0) {
   1457                 break;
   1458             }
   1459         }
   1460         ucol_reset(iter);
   1461         backAndForth(iter);
   1462         count ++;
   1463     }
   1464     ucol_closeElements(resultiter);
   1465     ucol_closeElements(iter);
   1466     ucol_close(coll);
   1467 }
   1468 
   1469 static void TestCEBufferOverflow()
   1470 {
   1471     UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
   1472     UErrorCode          status = U_ZERO_ERROR;
   1473     UChar               rule[10];
   1474     UCollator          *coll;
   1475     UCollationElements *iter;
   1476 
   1477     u_uastrcpy(rule, "&z < AB");
   1478     coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
   1479     if (U_FAILURE(status)) {
   1480         log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
   1481         return;
   1482     }
   1483 
   1484     /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
   1485     test. this will cause an overflow in getPrev */
   1486     str[0] = 0x0041;    /* 'A' */
   1487     /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
   1488     uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
   1489     str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
   1490     iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
   1491                              &status);
   1492     if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
   1493         status == U_BUFFER_OVERFLOW_ERROR) {
   1494         log_err("CE buffer should not overflow with long string of trail surrogates\n");
   1495     }
   1496     ucol_closeElements(iter);
   1497     ucol_close(coll);
   1498 }
   1499 
   1500 /**
   1501 * Checking collation element validity.
   1502 */
   1503 #define MAX_CODEPOINTS_TO_SHOW 10
   1504 static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
   1505     int i, lengthToUse = length;
   1506     if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
   1507         lengthToUse = MAX_CODEPOINTS_TO_SHOW;
   1508     }
   1509     for (i = 0; i < lengthToUse; ++i) {
   1510         int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
   1511         if (bytesWritten <= 0) {
   1512             break;
   1513         }
   1514         codepointText += bytesWritten;
   1515     }
   1516     if (i < length) {
   1517         sprintf(codepointText, " ...");
   1518     }
   1519 }
   1520 
   1521 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
   1522                              int length)
   1523 {
   1524     UErrorCode          status = U_ZERO_ERROR;
   1525     UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
   1526                                                   &status);
   1527     UBool result = FALSE;
   1528     UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
   1529     const char * collLocale;
   1530 
   1531     if (U_FAILURE(status)) {
   1532         log_err("Error creating iterator for testing validity\n");
   1533         return FALSE;
   1534     }
   1535     collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
   1536     if (U_FAILURE(status) || collLocale==NULL) {
   1537         status = U_ZERO_ERROR;
   1538         collLocale = "?";
   1539     }
   1540 
   1541     for (;;) {
   1542         uint32_t ce = ucol_next(iter, &status);
   1543         uint32_t primary, p1, p2, secondary, tertiary;
   1544         if (ce == UCOL_NULLORDER) {
   1545             result = TRUE;
   1546             break;
   1547         }
   1548         if (ce == 0) {
   1549             continue;
   1550         }
   1551         if (ce == 0x02000202) {
   1552             /* special CE for merge-sort character */
   1553             if (*codepoints == 0xFFFE /* && length == 1 */) {
   1554                 /*
   1555                  * Note: We should check for length==1 but the token parser appears
   1556                  * to give us trailing NUL characters.
   1557                  * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
   1558                  *                     rather than the internal collation rule parser
   1559                  */
   1560                 continue;
   1561             } else {
   1562                 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
   1563                         (int)*codepoints, (int)length);
   1564                 break;
   1565             }
   1566         }
   1567         primary   = UCOL_PRIMARYORDER(ce);
   1568         p1 = primary >> 8;
   1569         p2 = primary & 0xFF;
   1570         secondary = UCOL_SECONDARYORDER(ce);
   1571         tertiary  = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
   1572 
   1573         if (!isContinuation(ce)) {
   1574             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
   1575                 log_err("Empty CE %08lX except for case bits\n", (long)ce);
   1576                 break;
   1577             }
   1578             if (p1 == 0) {
   1579                 if (p2 != 0) {
   1580                     log_err("Primary 00 xx in %08lX\n", (long)ce);
   1581                     break;
   1582                 }
   1583                 primaryDone = TRUE;
   1584             } else {
   1585                 if (p1 <= 2 || p1 >= 0xF0) {
   1586                     /* Primary first bytes F0..FF are specials. */
   1587                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
   1588                     break;
   1589                 }
   1590                 if (p2 == 0) {
   1591                     primaryDone = TRUE;
   1592                 } else {
   1593                     if (p2 <= 3 || p2 >= 0xFF) {
   1594                         /* Primary second bytes 03 and FF are sort key compression terminators. */
   1595                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
   1596                         break;
   1597                     }
   1598                     primaryDone = FALSE;
   1599                 }
   1600             }
   1601             if (secondary == 0) {
   1602                 if (primary != 0) {
   1603                     log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
   1604                     break;
   1605                 }
   1606                 secondaryDone = TRUE;
   1607             } else {
   1608                 if (secondary <= 2 ||
   1609                     (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
   1610                 ) {
   1611                     /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
   1612                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
   1613                     break;
   1614                 }
   1615                 secondaryDone = FALSE;
   1616             }
   1617             if (tertiary == 0) {
   1618                 /* We know that ce != 0. */
   1619                 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
   1620                 break;
   1621             }
   1622             if (tertiary <= 2) {
   1623                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
   1624                 break;
   1625             }
   1626             tertiaryDone = FALSE;
   1627         } else {
   1628             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
   1629                 log_err("Empty continuation %08lX\n", (long)ce);
   1630                 break;
   1631             }
   1632             if (primaryDone && primary != 0) {
   1633                 log_err("Primary was done but continues in %08lX\n", (long)ce);
   1634                 break;
   1635             }
   1636             if (p1 == 0) {
   1637                 if (p2 != 0) {
   1638                     log_err("Primary 00 xx in %08lX\n", (long)ce);
   1639                     break;
   1640                 }
   1641                 primaryDone = TRUE;
   1642             } else {
   1643                 if (p1 <= 2) {
   1644                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
   1645                     break;
   1646                 }
   1647                 if (p2 == 0) {
   1648                     primaryDone = TRUE;
   1649                 } else {
   1650                     if (p2 <= 3) {
   1651                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
   1652                         break;
   1653                     }
   1654                 }
   1655             }
   1656             if (secondaryDone && secondary != 0) {
   1657                 log_err("Secondary was done but continues in %08lX\n", (long)ce);
   1658                 break;
   1659             }
   1660             if (secondary == 0) {
   1661                 secondaryDone = TRUE;
   1662             } else {
   1663                 if (secondary <= 2) {
   1664                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
   1665                     break;
   1666                 }
   1667             }
   1668             if (tertiaryDone && tertiary != 0) {
   1669                 log_err("Tertiary was done but continues in %08lX\n", (long)ce);
   1670                 break;
   1671             }
   1672             if (tertiary == 0) {
   1673                 tertiaryDone = TRUE;
   1674             } else if (tertiary <= 2) {
   1675                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
   1676                 break;
   1677             }
   1678         }
   1679     }
   1680     if (!result) {
   1681         char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
   1682         showCodepoints(codepoints, length, codepointText);
   1683         log_err("Locale: %s  Code point string: %s\n", collLocale, codepointText);
   1684     }
   1685     ucol_closeElements(iter);
   1686     return result;
   1687 }
   1688 
   1689 static const UChar IMPORT[] = { 0x5B, 0x69, 0x6D, 0x70, 0x6F, 0x72, 0x74, 0 };  /* "[import" */
   1690 
   1691 static void TestCEValidity()
   1692 {
   1693     /* testing UCA collation elements */
   1694     UErrorCode  status      = U_ZERO_ERROR;
   1695     /* en_US has no tailorings */
   1696     UCollator  *coll        = ucol_open("root", &status);
   1697     /* tailored locales */
   1698     char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
   1699     const char *loc;
   1700     FileStream *file = NULL;
   1701     char        line[2048];
   1702     UChar       codepoints[11];
   1703     int         count = 0;
   1704     int         maxCount = 0;
   1705     UChar       contextCPs[3];
   1706     UChar32     c;
   1707     UParseError parseError;
   1708     if (U_FAILURE(status)) {
   1709         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
   1710         return;
   1711     }
   1712     log_verbose("Testing UCA elements\n");
   1713     file = getFractionalUCA();
   1714     if (file == NULL) {
   1715         log_err("Fractional UCA data can not be opened\n");
   1716         return;
   1717     }
   1718 
   1719     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1720         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1721             line[0] == 0x000D || line[0] == '[') {
   1722             continue;
   1723         }
   1724 
   1725         getCodePoints(line, codepoints, contextCPs);
   1726         checkCEValidity(coll, codepoints, u_strlen(codepoints));
   1727     }
   1728 
   1729     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
   1730     for (c = 0; c <= 0xffff; ++c) {
   1731         if (u_isdefined(c)) {
   1732             codepoints[0] = (UChar)c;
   1733             checkCEValidity(coll, codepoints, 1);
   1734         }
   1735     }
   1736     for (; c <= 0x10ffff; ++c) {
   1737         if (u_isdefined(c)) {
   1738             int32_t i = 0;
   1739             U16_APPEND_UNSAFE(codepoints, i, c);
   1740             checkCEValidity(coll, codepoints, i);
   1741         }
   1742     }
   1743 
   1744     ucol_close(coll);
   1745 
   1746     /* testing tailored collation elements */
   1747     log_verbose("Testing tailored elements\n");
   1748     if(getTestOption(QUICK_OPTION)) {
   1749         maxCount = sizeof(locale)/sizeof(locale[0]);
   1750     } else {
   1751         maxCount = uloc_countAvailable();
   1752     }
   1753     while (count < maxCount) {
   1754         const UChar *rules = NULL,
   1755                     *current = NULL;
   1756         UChar *rulesCopy = NULL;
   1757         int32_t ruleLen = 0;
   1758 
   1759         uint32_t chOffset = 0;
   1760         uint32_t chLen = 0;
   1761         uint32_t exOffset = 0;
   1762         uint32_t exLen = 0;
   1763         uint32_t prefixOffset = 0;
   1764         uint32_t prefixLen = 0;
   1765         UBool    startOfRules = TRUE;
   1766         UColOptionSet opts;
   1767 
   1768         UColTokenParser src;
   1769         uint32_t strength = 0;
   1770         uint16_t specs = 0;
   1771         if(getTestOption(QUICK_OPTION)) {
   1772             loc = locale[count];
   1773         } else {
   1774             loc = uloc_getAvailable(count);
   1775             if(!hasCollationElements(loc)) {
   1776                 count++;
   1777                 continue;
   1778             }
   1779         }
   1780         status = U_ZERO_ERROR; // clear status from previous loop iteration
   1781 
   1782         uprv_memset(&src, 0, sizeof(UColTokenParser));
   1783 
   1784         log_verbose("Testing CEs for %s\n", loc);
   1785 
   1786         coll      = ucol_open(loc, &status);
   1787         if (U_FAILURE(status)) {
   1788             log_err("%s collator creation failed with status %s\n", loc, u_errorName(status));
   1789             return;
   1790         }
   1791 
   1792         src.opts = &opts;
   1793         rules = ucol_getRules(coll, &ruleLen);
   1794 
   1795         /*
   1796          * We have not set up the UColTokenParser with a callback function
   1797          * to fetch [import] sub-rules,
   1798          * so skip testing tailorings that import others.
   1799          * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
   1800          *                     rather than the internal collation rule parser
   1801          */
   1802         if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
   1803             rulesCopy = (UChar *)uprv_malloc((ruleLen +
   1804                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
   1805             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
   1806             src.current = src.source = rulesCopy;
   1807             src.end = rulesCopy + ruleLen;
   1808             src.extraCurrent = src.end;
   1809             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1810 
   1811 	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   1812 	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   1813             while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL && U_SUCCESS(status)) {
   1814               strength = src.parsedToken.strength;
   1815               chOffset = src.parsedToken.charsOffset;
   1816               chLen = src.parsedToken.charsLen;
   1817               exOffset = src.parsedToken.extensionOffset;
   1818               exLen = src.parsedToken.extensionLen;
   1819               prefixOffset = src.parsedToken.prefixOffset;
   1820               prefixLen = src.parsedToken.prefixLen;
   1821               specs = src.parsedToken.flags;
   1822 
   1823                 startOfRules = FALSE;
   1824                 uprv_memcpy(codepoints, src.source + chOffset,
   1825                                                        chLen * sizeof(UChar));
   1826                 codepoints[chLen] = 0;
   1827                 checkCEValidity(coll, codepoints, chLen);
   1828             }
   1829             if (U_FAILURE(status)) {
   1830                 log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", loc, u_errorName(status));
   1831             }
   1832             uprv_free(src.source);
   1833             uprv_free(src.reorderCodes);
   1834         }
   1835 
   1836         ucol_close(coll);
   1837         count ++;
   1838     }
   1839     T_FileStream_close(file);
   1840 }
   1841 
   1842 static void printSortKeyError(const UChar   *codepoints, int length,
   1843                                     uint8_t *sortkey, int sklen)
   1844 {
   1845     int count = 0;
   1846     log_err("Sortkey not valid for ");
   1847     while (length > 0) {
   1848         log_err("0x%04x ", *codepoints);
   1849         length --;
   1850         codepoints ++;
   1851     }
   1852     log_err("\nSortkey : ");
   1853     while (count < sklen) {
   1854         log_err("0x%02x ", sortkey[count]);
   1855         count ++;
   1856     }
   1857     log_err("\n");
   1858 }
   1859 
   1860 /**
   1861 * Checking sort key validity for all levels
   1862 */
   1863 static UBool checkSortKeyValidity(UCollator *coll,
   1864                                   const UChar *codepoints,
   1865                                   int length)
   1866 {
   1867     UErrorCode status  = U_ZERO_ERROR;
   1868     UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
   1869                                       UCOL_TERTIARY, UCOL_QUATERNARY,
   1870                                       UCOL_IDENTICAL};
   1871     int        strengthlen = 5;
   1872     int        strengthIndex = 0;
   1873     int        caselevel   = 0;
   1874 
   1875     while (caselevel < 1) {
   1876         if (caselevel == 0) {
   1877             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
   1878         }
   1879         else {
   1880             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
   1881         }
   1882 
   1883         while (strengthIndex < strengthlen) {
   1884             int        count01 = 0;
   1885             uint32_t   count   = 0;
   1886             uint8_t    sortkey[128];
   1887             uint32_t   sklen;
   1888 
   1889             ucol_setStrength(coll, strength[strengthIndex]);
   1890             sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
   1891             while (sortkey[count] != 0) {
   1892                 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
   1893                     printSortKeyError(codepoints, length, sortkey, sklen);
   1894                     return FALSE;
   1895                 }
   1896                 if (sortkey[count] == 1) {
   1897                     count01 ++;
   1898                 }
   1899                 count ++;
   1900             }
   1901 
   1902             if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
   1903                 printSortKeyError(codepoints, length, sortkey, sklen);
   1904                 return FALSE;
   1905             }
   1906             strengthIndex ++;
   1907         }
   1908         caselevel ++;
   1909     }
   1910     return TRUE;
   1911 }
   1912 
   1913 static void TestSortKeyValidity(void)
   1914 {
   1915     /* testing UCA collation elements */
   1916     UErrorCode  status      = U_ZERO_ERROR;
   1917     /* en_US has no tailorings */
   1918     UCollator  *coll        = ucol_open("en_US", &status);
   1919     /* tailored locales */
   1920     char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
   1921     FileStream *file = NULL;
   1922     char        line[2048];
   1923     UChar       codepoints[10];
   1924     int         count = 0;
   1925     UChar       contextCPs[5];
   1926     UParseError parseError;
   1927     if (U_FAILURE(status)) {
   1928         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
   1929         return;
   1930     }
   1931     log_verbose("Testing UCA elements\n");
   1932     file = getFractionalUCA();
   1933     if (file == NULL) {
   1934         log_err("Fractional UCA data can not be opened\n");
   1935         return;
   1936     }
   1937 
   1938     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1939         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1940             line[0] == 0x000D || line[0] == '[') {
   1941             continue;
   1942         }
   1943 
   1944         getCodePoints(line, codepoints, contextCPs);
   1945         if(codepoints[0] == 0xFFFE) {
   1946             /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
   1947             continue;
   1948         }
   1949         checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
   1950     }
   1951 
   1952     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
   1953     codepoints[0] = 0;
   1954 
   1955     while (codepoints[0] < 0xFFFF) {
   1956         if (u_isdefined((UChar32)codepoints[0])) {
   1957             checkSortKeyValidity(coll, codepoints, 1);
   1958         }
   1959         codepoints[0] ++;
   1960     }
   1961 
   1962     ucol_close(coll);
   1963 
   1964     /* testing tailored collation elements */
   1965     log_verbose("Testing tailored elements\n");
   1966     while (count < 5) {
   1967         const UChar *rules = NULL,
   1968                     *current = NULL;
   1969         UChar *rulesCopy = NULL;
   1970         int32_t ruleLen = 0;
   1971 
   1972         uint32_t chOffset = 0;
   1973         uint32_t chLen = 0;
   1974         uint32_t exOffset = 0;
   1975         uint32_t exLen = 0;
   1976         uint32_t prefixOffset = 0;
   1977         uint32_t prefixLen = 0;
   1978         UBool    startOfRules = TRUE;
   1979         UColOptionSet opts;
   1980 
   1981         UColTokenParser src;
   1982         uint32_t strength = 0;
   1983         uint16_t specs = 0;
   1984         status = U_ZERO_ERROR; // clear status from previous loop iteration
   1985 
   1986         uprv_memset(&src, 0, sizeof(UColTokenParser));
   1987 
   1988         coll      = ucol_open(locale[count], &status);
   1989         if (U_FAILURE(status)) {
   1990             log_err("%s collator creation failed with status %s\n", locale[count], u_errorName(status));
   1991             return;
   1992         }
   1993 
   1994         src.opts = &opts;
   1995         rules = ucol_getRules(coll, &ruleLen);
   1996 
   1997         /*
   1998          * We have not set up the UColTokenParser with a callback function
   1999          * to fetch [import] sub-rules,
   2000          * so skip testing tailorings that import others.
   2001          * TODO: Ticket #8047: Change TestSortKeyValidity to use ucol_getTailoredSet()
   2002          *                     rather than the internal collation rule parser
   2003          */
   2004         if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
   2005             rulesCopy = (UChar *)uprv_malloc((ruleLen +
   2006                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
   2007             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
   2008             src.current = src.source = rulesCopy;
   2009             src.end = rulesCopy + ruleLen;
   2010             src.extraCurrent = src.end;
   2011             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   2012 
   2013 	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   2014 	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   2015             while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL && U_SUCCESS(status)) {
   2016                 strength = src.parsedToken.strength;
   2017                 chOffset = src.parsedToken.charsOffset;
   2018                 chLen = src.parsedToken.charsLen;
   2019                 exOffset = src.parsedToken.extensionOffset;
   2020                 exLen = src.parsedToken.extensionLen;
   2021                 prefixOffset = src.parsedToken.prefixOffset;
   2022                 prefixLen = src.parsedToken.prefixLen;
   2023                 specs = src.parsedToken.flags;
   2024 
   2025                 startOfRules = FALSE;
   2026                 uprv_memcpy(codepoints, src.source + chOffset,
   2027                                                        chLen * sizeof(UChar));
   2028                 codepoints[chLen] = 0;
   2029                 if(codepoints[0] == 0xFFFE) {
   2030                     /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
   2031                     continue;
   2032                 }
   2033                 checkSortKeyValidity(coll, codepoints, chLen);
   2034             }
   2035             if (U_FAILURE(status)) {
   2036                 log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", locale[count], u_errorName(status));
   2037             }
   2038             uprv_free(src.source);
   2039             uprv_free(src.reorderCodes);
   2040         }
   2041 
   2042         ucol_close(coll);
   2043         count ++;
   2044     }
   2045     T_FileStream_close(file);
   2046 }
   2047 
   2048 /**
   2049 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
   2050 * normalization on AND jamo tailoring, among other things.
   2051 */
   2052 static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
   2053     0x0020, 0xAC00,                 /* simple LV Hangul */
   2054     0x0020, 0xAC01,                 /* simple LVT Hangul */
   2055     0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
   2056     0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
   2057     0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
   2058     0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
   2059     0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
   2060     0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
   2061     0x0020, 0x00E6,                 /* small letter ae, expands */
   2062     0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
   2063     0x0020
   2064 };
   2065 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
   2066 
   2067 static const int32_t rootStandardOffsets[] = {
   2068     0,  1,2,
   2069     2,  3,4,4,
   2070     4,  5,6,6,
   2071     6,  7,8,8,
   2072     8,  9,10,11,
   2073     12, 13,14,15,
   2074     16, 17,18,19,
   2075     20, 21,22,23,
   2076     24, 25,26,26,26,
   2077     26, 27,28,28,
   2078     28,
   2079     29
   2080 };
   2081 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
   2082 
   2083 static const int32_t rootSearchOffsets[] = {
   2084     0,  1,2,
   2085     2,  3,4,4,
   2086     4,  5,6,6,6,
   2087     6,  7,8,8,8,8,8,8,
   2088     8,  9,10,11,
   2089     12, 13,14,15,
   2090     16, 17,18,19,20,
   2091     20, 21,22,22,23,23,23,24,
   2092     24, 25,26,26,26,
   2093     26, 27,28,28,
   2094     28,
   2095     29
   2096 };
   2097 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
   2098 
   2099 typedef struct {
   2100     const char *    locale;
   2101     const int32_t * offsets;
   2102     int32_t         offsetsLen;
   2103 } TSCEItem;
   2104 
   2105 static const TSCEItem tsceItems[] = {
   2106     { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
   2107     { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
   2108     { NULL,                    NULL,                0                        }
   2109 };
   2110 
   2111 static void TestSearchCollatorElements(void)
   2112 {
   2113     const TSCEItem * tsceItemPtr;
   2114     for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
   2115         UErrorCode status = U_ZERO_ERROR;
   2116         UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
   2117         if ( U_SUCCESS(status) ) {
   2118             UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
   2119             if ( U_SUCCESS(status) ) {
   2120                 int32_t offset, element;
   2121                 const int32_t * nextOffsetPtr;
   2122                 const int32_t * limitOffsetPtr;
   2123 
   2124                 nextOffsetPtr = tsceItemPtr->offsets;
   2125                 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
   2126                 do {
   2127                     offset = ucol_getOffset(uce);
   2128                     element = ucol_next(uce, &status);
   2129                     if ( element == 0 ) {
   2130                         log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
   2131                     }
   2132                     if ( nextOffsetPtr < limitOffsetPtr ) {
   2133                         if (offset != *nextOffsetPtr) {
   2134                             log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
   2135                                                             tsceItemPtr->locale, *nextOffsetPtr, offset );
   2136                             nextOffsetPtr = limitOffsetPtr;
   2137                             break;
   2138                         }
   2139                         nextOffsetPtr++;
   2140                     } else {
   2141                         log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
   2142                     }
   2143                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
   2144                 if ( nextOffsetPtr < limitOffsetPtr ) {
   2145                     log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
   2146                 }
   2147 
   2148                 ucol_setOffset(uce, kLen_tsceText, &status);
   2149                 status = U_ZERO_ERROR;
   2150                 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
   2151                 limitOffsetPtr = tsceItemPtr->offsets;
   2152                 do {
   2153                     offset = ucol_getOffset(uce);
   2154                     element = ucol_previous(uce, &status);
   2155                     if ( element == 0 ) {
   2156                         log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
   2157                     }
   2158                     if ( nextOffsetPtr > limitOffsetPtr ) {
   2159                         nextOffsetPtr--;
   2160                         if (offset != *nextOffsetPtr) {
   2161                             log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
   2162                                                                 tsceItemPtr->locale, *nextOffsetPtr, offset );
   2163                             nextOffsetPtr = limitOffsetPtr;
   2164                             break;
   2165                         }
   2166                    } else {
   2167                         log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
   2168                     }
   2169                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
   2170                 if ( nextOffsetPtr > limitOffsetPtr ) {
   2171                     log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
   2172                 }
   2173 
   2174                 ucol_closeElements(uce);
   2175             } else {
   2176                 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
   2177             }
   2178             ucol_close(ucol);
   2179         } else {
   2180             log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
   2181         }
   2182     }
   2183 }
   2184 
   2185 #endif /* #if !UCONFIG_NO_COLLATION */
   2186