Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2013, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /********************************************************************************
      7 *
      8 * File CITERTST.C
      9 *
     10 * Modification History:
     11 * Date      Name               Description
     12 *           Madhu Katragadda   Ported for C API
     13 * 02/19/01  synwee             Modified test case for new collation iterator
     14 *********************************************************************************/
     15 /*
     16  * Collation Iterator tests.
     17  * (Let me reiterate my position...)
     18  */
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_COLLATION
     23 
     24 #include "unicode/ucol.h"
     25 #include "unicode/ucoleitr.h"
     26 #include "unicode/uloc.h"
     27 #include "unicode/uchar.h"
     28 #include "unicode/ustring.h"
     29 #include "unicode/putil.h"
     30 #include "callcoll.h"
     31 #include "cmemory.h"
     32 #include "cintltst.h"
     33 #include "citertst.h"
     34 #include "ccolltst.h"
     35 #include "filestrm.h"
     36 #include "cstring.h"
     37 #include "ucol_imp.h"
     38 #include "ucol_tok.h"
     39 #include "uparse.h"
     40 #include <stdio.h>
     41 
     42 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
     43 
     44 void addCollIterTest(TestNode** root)
     45 {
     46     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
     47     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
     48     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
     49     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
     50     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
     51     addTest(root, &TestNormalizedUnicodeChar,
     52                                 "tscoll/citertst/TestNormalizedUnicodeChar");
     53     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
     54     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
     55     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
     56     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
     57     addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
     58     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
     59     addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
     60     addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
     61     addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
     62     addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
     63 }
     64 
     65 /* The locales we support */
     66 
     67 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
     68 
     69 static void TestBug672() {
     70     UErrorCode  status = U_ZERO_ERROR;
     71     UChar       pattern[20];
     72     UChar       text[50];
     73     int         i;
     74     int         result[3][3];
     75 
     76     u_uastrcpy(pattern, "resume");
     77     u_uastrcpy(text, "Time to resume updating my resume.");
     78 
     79     for (i = 0; i < 3; ++ i) {
     80         UCollator          *coll = ucol_open(LOCALES[i], &status);
     81         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
     82                                                      &status);
     83         UCollationElements *titer = ucol_openElements(coll, text, -1,
     84                                                      &status);
     85         if (U_FAILURE(status)) {
     86             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
     87                     myErrorName(status));
     88             return;
     89         }
     90 
     91         log_verbose("locale tested %s\n", LOCALES[i]);
     92 
     93         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
     94                U_SUCCESS(status)) {
     95         }
     96         if (U_FAILURE(status)) {
     97             log_err("ERROR: reversing collation iterator :%s\n",
     98                     myErrorName(status));
     99             return;
    100         }
    101         ucol_reset(pitr);
    102 
    103         ucol_setOffset(titer, u_strlen(pattern), &status);
    104         if (U_FAILURE(status)) {
    105             log_err("ERROR: setting offset in collator :%s\n",
    106                     myErrorName(status));
    107             return;
    108         }
    109         result[i][0] = ucol_getOffset(titer);
    110         log_verbose("Text iterator set to offset %d\n", result[i][0]);
    111 
    112         /* Use previous() */
    113         ucol_previous(titer, &status);
    114         result[i][1] = ucol_getOffset(titer);
    115         log_verbose("Current offset %d after previous\n", result[i][1]);
    116 
    117         /* Add one to index */
    118         log_verbose("Adding one to current offset...\n");
    119         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
    120         if (U_FAILURE(status)) {
    121             log_err("ERROR: setting offset in collator :%s\n",
    122                     myErrorName(status));
    123             return;
    124         }
    125         result[i][2] = ucol_getOffset(titer);
    126         log_verbose("Current offset in text = %d\n", result[i][2]);
    127         ucol_closeElements(pitr);
    128         ucol_closeElements(titer);
    129         ucol_close(coll);
    130     }
    131 
    132     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
    133         uprv_memcmp(result[1], result[2], 3) != 0) {
    134         log_err("ERROR: Different locales have different offsets at the same character\n");
    135     }
    136 }
    137 
    138 
    139 
    140 /*  Running this test with normalization enabled showed up a bug in the incremental
    141     normalization code. */
    142 static void TestBug672Normalize() {
    143     UErrorCode  status = U_ZERO_ERROR;
    144     UChar       pattern[20];
    145     UChar       text[50];
    146     int         i;
    147     int         result[3][3];
    148 
    149     u_uastrcpy(pattern, "resume");
    150     u_uastrcpy(text, "Time to resume updating my resume.");
    151 
    152     for (i = 0; i < 3; ++ i) {
    153         UCollator          *coll = ucol_open(LOCALES[i], &status);
    154         UCollationElements *pitr = NULL;
    155         UCollationElements *titer = NULL;
    156 
    157         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    158 
    159         pitr = ucol_openElements(coll, pattern, -1, &status);
    160         titer = ucol_openElements(coll, text, -1, &status);
    161         if (U_FAILURE(status)) {
    162             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
    163                     myErrorName(status));
    164             return;
    165         }
    166 
    167         log_verbose("locale tested %s\n", LOCALES[i]);
    168 
    169         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
    170                U_SUCCESS(status)) {
    171         }
    172         if (U_FAILURE(status)) {
    173             log_err("ERROR: reversing collation iterator :%s\n",
    174                     myErrorName(status));
    175             return;
    176         }
    177         ucol_reset(pitr);
    178 
    179         ucol_setOffset(titer, u_strlen(pattern), &status);
    180         if (U_FAILURE(status)) {
    181             log_err("ERROR: setting offset in collator :%s\n",
    182                     myErrorName(status));
    183             return;
    184         }
    185         result[i][0] = ucol_getOffset(titer);
    186         log_verbose("Text iterator set to offset %d\n", result[i][0]);
    187 
    188         /* Use previous() */
    189         ucol_previous(titer, &status);
    190         result[i][1] = ucol_getOffset(titer);
    191         log_verbose("Current offset %d after previous\n", result[i][1]);
    192 
    193         /* Add one to index */
    194         log_verbose("Adding one to current offset...\n");
    195         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
    196         if (U_FAILURE(status)) {
    197             log_err("ERROR: setting offset in collator :%s\n",
    198                     myErrorName(status));
    199             return;
    200         }
    201         result[i][2] = ucol_getOffset(titer);
    202         log_verbose("Current offset in text = %d\n", result[i][2]);
    203         ucol_closeElements(pitr);
    204         ucol_closeElements(titer);
    205         ucol_close(coll);
    206     }
    207 
    208     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
    209         uprv_memcmp(result[1], result[2], 3) != 0) {
    210         log_err("ERROR: Different locales have different offsets at the same character\n");
    211     }
    212 }
    213 
    214 
    215 
    216 
    217 /**
    218  * Test for CollationElementIterator previous and next for the whole set of
    219  * unicode characters.
    220  */
    221 static void TestUnicodeChar()
    222 {
    223     UChar source[0x100];
    224     UCollator *en_us;
    225     UCollationElements *iter;
    226     UErrorCode status = U_ZERO_ERROR;
    227     UChar codepoint;
    228 
    229     UChar *test;
    230     en_us = ucol_open("en_US", &status);
    231     if (U_FAILURE(status)){
    232        log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
    233               myErrorName(status));
    234        return;
    235     }
    236 
    237     for (codepoint = 1; codepoint < 0xFFFE;)
    238     {
    239       test = source;
    240 
    241       while (codepoint % 0xFF != 0)
    242       {
    243         if (u_isdefined(codepoint))
    244           *(test ++) = codepoint;
    245         codepoint ++;
    246       }
    247 
    248       if (u_isdefined(codepoint))
    249         *(test ++) = codepoint;
    250 
    251       if (codepoint != 0xFFFF)
    252         codepoint ++;
    253 
    254       *test = 0;
    255       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
    256       if(U_FAILURE(status)){
    257           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    258               myErrorName(status));
    259           ucol_close(en_us);
    260           return;
    261       }
    262       /* A basic test to see if it's working at all */
    263       log_verbose("codepoint testing %x\n", codepoint);
    264       backAndForth(iter);
    265       ucol_closeElements(iter);
    266 
    267       /* null termination test */
    268       iter=ucol_openElements(en_us, source, -1, &status);
    269       if(U_FAILURE(status)){
    270           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    271               myErrorName(status));
    272           ucol_close(en_us);
    273           return;
    274       }
    275       /* A basic test to see if it's working at all */
    276       backAndForth(iter);
    277       ucol_closeElements(iter);
    278     }
    279 
    280     ucol_close(en_us);
    281 }
    282 
    283 /**
    284  * Test for CollationElementIterator previous and next for the whole set of
    285  * unicode characters with normalization on.
    286  */
    287 static void TestNormalizedUnicodeChar()
    288 {
    289     UChar source[0x100];
    290     UCollator *th_th;
    291     UCollationElements *iter;
    292     UErrorCode status = U_ZERO_ERROR;
    293     UChar codepoint;
    294 
    295     UChar *test;
    296     /* thai should have normalization on */
    297     th_th = ucol_open("th_TH", &status);
    298     if (U_FAILURE(status)){
    299         log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
    300               myErrorName(status));
    301         return;
    302     }
    303 
    304     for (codepoint = 1; codepoint < 0xFFFE;)
    305     {
    306       test = source;
    307 
    308       while (codepoint % 0xFF != 0)
    309       {
    310         if (u_isdefined(codepoint))
    311           *(test ++) = codepoint;
    312         codepoint ++;
    313       }
    314 
    315       if (u_isdefined(codepoint))
    316         *(test ++) = codepoint;
    317 
    318       if (codepoint != 0xFFFF)
    319         codepoint ++;
    320 
    321       *test = 0;
    322       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
    323       if(U_FAILURE(status)){
    324           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    325               myErrorName(status));
    326             ucol_close(th_th);
    327           return;
    328       }
    329 
    330       backAndForth(iter);
    331       ucol_closeElements(iter);
    332 
    333       iter=ucol_openElements(th_th, source, -1, &status);
    334       if(U_FAILURE(status)){
    335           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    336               myErrorName(status));
    337             ucol_close(th_th);
    338           return;
    339       }
    340 
    341       backAndForth(iter);
    342       ucol_closeElements(iter);
    343     }
    344 
    345     ucol_close(th_th);
    346 }
    347 
    348 /**
    349 * Test the incremental normalization
    350 */
    351 static void TestNormalization()
    352 {
    353           UErrorCode          status = U_ZERO_ERROR;
    354     const char               *str    =
    355                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
    356           UCollator          *coll;
    357           UChar               rule[50];
    358           int                 rulelen = u_unescape(str, rule, 50);
    359           int                 count = 0;
    360     const char                *testdata[] =
    361                         {"\\u1ED9", "o\\u0323\\u0302",
    362                         "\\u0300\\u0315", "\\u0315\\u0300",
    363                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
    364                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
    365                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
    366                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
    367                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
    368     int32_t   srclen;
    369     UChar source[10];
    370     UCollationElements *iter;
    371 
    372     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
    373     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    374     if (U_FAILURE(status)){
    375         log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
    376               myErrorName(status));
    377         return;
    378     }
    379 
    380     srclen = u_unescape(testdata[0], source, 10);
    381     iter = ucol_openElements(coll, source, srclen, &status);
    382     backAndForth(iter);
    383     ucol_closeElements(iter);
    384 
    385     srclen = u_unescape(testdata[1], source, 10);
    386     iter = ucol_openElements(coll, source, srclen, &status);
    387     backAndForth(iter);
    388     ucol_closeElements(iter);
    389 
    390     while (count < 12) {
    391         srclen = u_unescape(testdata[count], source, 10);
    392         iter = ucol_openElements(coll, source, srclen, &status);
    393 
    394         if (U_FAILURE(status)){
    395             log_err("ERROR: in creation of collator element iterator\n %s\n",
    396                   myErrorName(status));
    397             return;
    398         }
    399         backAndForth(iter);
    400         ucol_closeElements(iter);
    401 
    402         iter = ucol_openElements(coll, source, -1, &status);
    403 
    404         if (U_FAILURE(status)){
    405             log_err("ERROR: in creation of collator element iterator\n %s\n",
    406                   myErrorName(status));
    407             return;
    408         }
    409         backAndForth(iter);
    410         ucol_closeElements(iter);
    411         count ++;
    412     }
    413     ucol_close(coll);
    414 }
    415 
    416 /**
    417  * Test for CollationElementIterator.previous()
    418  *
    419  * @bug 4108758 - Make sure it works with contracting characters
    420  *
    421  */
    422 static void TestPrevious()
    423 {
    424     UCollator *coll=NULL;
    425     UChar rule[50];
    426     UChar *source;
    427     UCollator *c1, *c2, *c3;
    428     UCollationElements *iter;
    429     UErrorCode status = U_ZERO_ERROR;
    430     UChar test1[50];
    431     UChar test2[50];
    432 
    433     u_uastrcpy(test1, "What subset of all possible test cases?");
    434     u_uastrcpy(test2, "has the highest probability of detecting");
    435     coll = ucol_open("en_US", &status);
    436 
    437     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
    438     log_verbose("English locale testing back and forth\n");
    439     if(U_FAILURE(status)){
    440         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    441             myErrorName(status));
    442         ucol_close(coll);
    443         return;
    444     }
    445     /* A basic test to see if it's working at all */
    446     backAndForth(iter);
    447     ucol_closeElements(iter);
    448     ucol_close(coll);
    449 
    450     /* Test with a contracting character sequence */
    451     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
    452     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    453 
    454     log_verbose("Contraction rule testing back and forth with no normalization\n");
    455 
    456     if (c1 == NULL || U_FAILURE(status))
    457     {
    458         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
    459             myErrorName(status));
    460         return;
    461     }
    462     source=(UChar*)malloc(sizeof(UChar) * 20);
    463     u_uastrcpy(source, "abchdcba");
    464     iter=ucol_openElements(c1, source, u_strlen(source), &status);
    465     if(U_FAILURE(status)){
    466         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    467             myErrorName(status));
    468         return;
    469     }
    470     backAndForth(iter);
    471     ucol_closeElements(iter);
    472     ucol_close(c1);
    473 
    474     /* Test with an expanding character sequence */
    475     u_uastrcpy(rule, "&a < b < c/abd < d");
    476     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    477     log_verbose("Expansion rule testing back and forth with no normalization\n");
    478     if (c2 == NULL || U_FAILURE(status))
    479     {
    480         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
    481             myErrorName(status));
    482         return;
    483     }
    484     u_uastrcpy(source, "abcd");
    485     iter=ucol_openElements(c2, source, u_strlen(source), &status);
    486     if(U_FAILURE(status)){
    487         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    488             myErrorName(status));
    489         return;
    490     }
    491     backAndForth(iter);
    492     ucol_closeElements(iter);
    493     ucol_close(c2);
    494     /* Now try both */
    495     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
    496     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
    497     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
    498 
    499     if (c3 == NULL || U_FAILURE(status))
    500     {
    501         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
    502             myErrorName(status));
    503         return;
    504     }
    505     u_uastrcpy(source, "abcdbchdc");
    506     iter=ucol_openElements(c3, source, u_strlen(source), &status);
    507     if(U_FAILURE(status)){
    508         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    509             myErrorName(status));
    510         return;
    511     }
    512     backAndForth(iter);
    513     ucol_closeElements(iter);
    514     ucol_close(c3);
    515     source[0] = 0x0e41;
    516     source[1] = 0x0e02;
    517     source[2] = 0x0e41;
    518     source[3] = 0x0e02;
    519     source[4] = 0x0e27;
    520     source[5] = 0x61;
    521     source[6] = 0x62;
    522     source[7] = 0x63;
    523     source[8] = 0;
    524 
    525     coll = ucol_open("th_TH", &status);
    526     log_verbose("Thai locale testing back and forth with normalization\n");
    527     iter=ucol_openElements(coll, source, u_strlen(source), &status);
    528     if(U_FAILURE(status)){
    529         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    530             myErrorName(status));
    531         return;
    532     }
    533     backAndForth(iter);
    534     ucol_closeElements(iter);
    535     ucol_close(coll);
    536 
    537     /* prev test */
    538     source[0] = 0x0061;
    539     source[1] = 0x30CF;
    540     source[2] = 0x3099;
    541     source[3] = 0x30FC;
    542     source[4] = 0;
    543 
    544     coll = ucol_open("ja_JP", &status);
    545     log_verbose("Japanese locale testing back and forth with normalization\n");
    546     iter=ucol_openElements(coll, source, u_strlen(source), &status);
    547     if(U_FAILURE(status)){
    548         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    549             myErrorName(status));
    550         return;
    551     }
    552     backAndForth(iter);
    553     ucol_closeElements(iter);
    554     ucol_close(coll);
    555 
    556     free(source);
    557 }
    558 
    559 /**
    560  * Test for getOffset() and setOffset()
    561  */
    562 static void TestOffset()
    563 {
    564     UErrorCode status= U_ZERO_ERROR;
    565     UCollator *en_us=NULL;
    566     UCollationElements *iter, *pristine;
    567     int32_t offset;
    568     OrderAndOffset *orders;
    569     int32_t orderLength=0;
    570     int     count = 0;
    571     UChar test1[50];
    572     UChar test2[50];
    573 
    574     u_uastrcpy(test1, "What subset of all possible test cases?");
    575     u_uastrcpy(test2, "has the highest probability of detecting");
    576     en_us = ucol_open("en_US", &status);
    577     log_verbose("Testing getOffset and setOffset for collations\n");
    578     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
    579     if(U_FAILURE(status)){
    580         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    581             myErrorName(status));
    582         ucol_close(en_us);
    583         return;
    584     }
    585 
    586     /* testing boundaries */
    587     ucol_setOffset(iter, 0, &status);
    588     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
    589         log_err("Error: After setting offset to 0, we should be at the end "
    590                 "of the backwards iteration");
    591     }
    592     ucol_setOffset(iter, u_strlen(test1), &status);
    593     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
    594         log_err("Error: After setting offset to end of the string, we should "
    595                 "be at the end of the backwards iteration");
    596     }
    597 
    598     /* Run all the way through the iterator, then get the offset */
    599 
    600     orders = getOrders(iter, &orderLength);
    601 
    602     offset = ucol_getOffset(iter);
    603 
    604     if (offset != u_strlen(test1))
    605     {
    606         log_err("offset at end != length %d vs %d\n", offset,
    607             u_strlen(test1) );
    608     }
    609 
    610     /* Now set the offset back to the beginning and see if it works */
    611     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
    612     if(U_FAILURE(status)){
    613         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    614             myErrorName(status));
    615     ucol_close(en_us);
    616         return;
    617     }
    618     status = U_ZERO_ERROR;
    619 
    620     ucol_setOffset(iter, 0, &status);
    621     if (U_FAILURE(status))
    622     {
    623         log_err("setOffset failed. %s\n",    myErrorName(status));
    624     }
    625     else
    626     {
    627         assertEqual(iter, pristine);
    628     }
    629 
    630     ucol_closeElements(pristine);
    631     ucol_closeElements(iter);
    632     free(orders);
    633 
    634     /* testing offsets in normalization buffer */
    635     test1[0] = 0x61;
    636     test1[1] = 0x300;
    637     test1[2] = 0x316;
    638     test1[3] = 0x62;
    639     test1[4] = 0;
    640     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    641     iter = ucol_openElements(en_us, test1, 4, &status);
    642     if(U_FAILURE(status)){
    643         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    644             myErrorName(status));
    645         ucol_close(en_us);
    646         return;
    647     }
    648 
    649     count = 0;
    650     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
    651         U_SUCCESS(status)) {
    652         switch (count) {
    653         case 0:
    654             if (ucol_getOffset(iter) != 1) {
    655                 log_err("ERROR: Offset of iteration should be 1\n");
    656             }
    657             break;
    658         case 3:
    659             if (ucol_getOffset(iter) != 4) {
    660                 log_err("ERROR: Offset of iteration should be 4\n");
    661             }
    662             break;
    663         default:
    664             if (ucol_getOffset(iter) != 3) {
    665                 log_err("ERROR: Offset of iteration should be 3\n");
    666             }
    667         }
    668         count ++;
    669     }
    670 
    671     ucol_reset(iter);
    672     count = 0;
    673     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
    674         U_SUCCESS(status)) {
    675         switch (count) {
    676         case 0:
    677         case 1:
    678             if (ucol_getOffset(iter) != 3) {
    679                 log_err("ERROR: Offset of iteration should be 3\n");
    680             }
    681             break;
    682         case 2:
    683             if (ucol_getOffset(iter) != 1) {
    684                 log_err("ERROR: Offset of iteration should be 1\n");
    685             }
    686             break;
    687         default:
    688             if (ucol_getOffset(iter) != 0) {
    689                 log_err("ERROR: Offset of iteration should be 0\n");
    690             }
    691         }
    692         count ++;
    693     }
    694 
    695     if(U_FAILURE(status)){
    696         log_err("ERROR: in iterating collation elements %s\n",
    697             myErrorName(status));
    698     }
    699 
    700     ucol_closeElements(iter);
    701     ucol_close(en_us);
    702 }
    703 
    704 /**
    705  * Test for setText()
    706  */
    707 static void TestSetText()
    708 {
    709     int32_t c,i;
    710     UErrorCode status = U_ZERO_ERROR;
    711     UCollator *en_us=NULL;
    712     UCollationElements *iter1, *iter2;
    713     UChar test1[50];
    714     UChar test2[50];
    715 
    716     u_uastrcpy(test1, "What subset of all possible test cases?");
    717     u_uastrcpy(test2, "has the highest probability of detecting");
    718     en_us = ucol_open("en_US", &status);
    719     log_verbose("testing setText for Collation elements\n");
    720     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
    721     if(U_FAILURE(status)){
    722         log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
    723             myErrorName(status));
    724     ucol_close(en_us);
    725         return;
    726     }
    727     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
    728     if(U_FAILURE(status)){
    729         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
    730             myErrorName(status));
    731     ucol_close(en_us);
    732         return;
    733     }
    734 
    735     /* Run through the second iterator just to exercise it */
    736     c = ucol_next(iter2, &status);
    737     i = 0;
    738 
    739     while ( ++i < 10 && (c != UCOL_NULLORDER))
    740     {
    741         if (U_FAILURE(status))
    742         {
    743             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
    744             ucol_closeElements(iter2);
    745             ucol_closeElements(iter1);
    746     ucol_close(en_us);
    747             return;
    748         }
    749 
    750         c = ucol_next(iter2, &status);
    751     }
    752 
    753     /* Now set it to point to the same string as the first iterator */
    754     ucol_setText(iter2, test1, u_strlen(test1), &status);
    755     if (U_FAILURE(status))
    756     {
    757         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
    758     }
    759     else
    760     {
    761         assertEqual(iter1, iter2);
    762     }
    763 
    764     /* Now set it to point to a null string with fake length*/
    765     ucol_setText(iter2, NULL, 2, &status);
    766     if (U_FAILURE(status))
    767     {
    768         log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
    769     }
    770     else
    771     {
    772         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
    773             log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
    774         }
    775     }
    776 
    777     ucol_closeElements(iter2);
    778     ucol_closeElements(iter1);
    779     ucol_close(en_us);
    780 }
    781 
    782 /** @bug 4108762
    783  * Test for getMaxExpansion()
    784  */
    785 static void TestMaxExpansion()
    786 {
    787     UErrorCode          status = U_ZERO_ERROR;
    788     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
    789     UChar               ch     = 0;
    790     UChar32             unassigned = 0xEFFFD;
    791     UChar               supplementary[2];
    792     uint32_t            stringOffset = 0;
    793     UBool               isError = FALSE;
    794     uint32_t            sorder = 0;
    795     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
    796     uint32_t            temporder = 0;
    797 
    798     UChar rule[256];
    799     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
    800     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
    801         UCOL_DEFAULT_STRENGTH,NULL, &status);
    802     if(U_SUCCESS(status) && coll) {
    803       iter = ucol_openElements(coll, &ch, 1, &status);
    804 
    805       while (ch < 0xFFFF && U_SUCCESS(status)) {
    806           int      count = 1;
    807           uint32_t order;
    808           int32_t  size = 0;
    809 
    810           ch ++;
    811 
    812           ucol_setText(iter, &ch, 1, &status);
    813           order = ucol_previous(iter, &status);
    814 
    815           /* thai management */
    816           if (order == 0)
    817               order = ucol_previous(iter, &status);
    818 
    819           while (U_SUCCESS(status) &&
    820               ucol_previous(iter, &status) != UCOL_NULLORDER) {
    821               count ++;
    822           }
    823 
    824           size = ucol_getMaxExpansion(iter, order);
    825           if (U_FAILURE(status) || size < count) {
    826               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    827                   ch, count);
    828           }
    829       }
    830 
    831       /* testing for exact max expansion */
    832       ch = 0;
    833       while (ch < 0x61) {
    834           uint32_t order;
    835           int32_t  size;
    836           ucol_setText(iter, &ch, 1, &status);
    837           order = ucol_previous(iter, &status);
    838           size  = ucol_getMaxExpansion(iter, order);
    839           if (U_FAILURE(status) || size != 1) {
    840               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    841                   ch, 1);
    842           }
    843           ch ++;
    844       }
    845 
    846       ch = 0x63;
    847       ucol_setText(iter, &ch, 1, &status);
    848       temporder = ucol_previous(iter, &status);
    849 
    850       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
    851           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
    852                   ch, 3);
    853       }
    854 
    855       ch = 0x64;
    856       ucol_setText(iter, &ch, 1, &status);
    857       temporder = ucol_previous(iter, &status);
    858 
    859       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
    860           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
    861                   ch, 3);
    862       }
    863 
    864       U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
    865       (void)isError;    /* Suppress set but not used warning. */
    866       ucol_setText(iter, supplementary, 2, &status);
    867       sorder = ucol_previous(iter, &status);
    868 
    869       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
    870           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    871                   ch, 2);
    872       }
    873 
    874       /* testing jamo */
    875       ch = 0x1165;
    876 
    877       ucol_setText(iter, &ch, 1, &status);
    878       temporder = ucol_previous(iter, &status);
    879       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
    880           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
    881                   ch, 3);
    882       }
    883 
    884       ucol_closeElements(iter);
    885       ucol_close(coll);
    886 
    887       /* testing special jamo &a<\u1160 */
    888       rule[0] = 0x26;
    889       rule[1] = 0x71;
    890       rule[2] = 0x3c;
    891       rule[3] = 0x1165;
    892       rule[4] = 0x2f;
    893       rule[5] = 0x71;
    894       rule[6] = 0x71;
    895       rule[7] = 0x71;
    896       rule[8] = 0x71;
    897       rule[9] = 0;
    898 
    899       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
    900           UCOL_DEFAULT_STRENGTH,NULL, &status);
    901       iter = ucol_openElements(coll, &ch, 1, &status);
    902 
    903       temporder = ucol_previous(iter, &status);
    904       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
    905           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
    906                   ch, 5);
    907       }
    908 
    909       ucol_closeElements(iter);
    910       ucol_close(coll);
    911     } else {
    912       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
    913     }
    914 
    915 }
    916 
    917 
    918 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
    919 {
    920     int32_t c1, c2;
    921     int32_t count = 0;
    922     UErrorCode status = U_ZERO_ERROR;
    923 
    924     do
    925     {
    926         c1 = ucol_next(i1, &status);
    927         c2 = ucol_next(i2, &status);
    928 
    929         if (c1 != c2)
    930         {
    931             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
    932             break;
    933         }
    934 
    935         count += 1;
    936     }
    937     while (c1 != UCOL_NULLORDER);
    938 }
    939 
    940 /**
    941  * Testing iterators with extremely small buffers
    942  */
    943 static void TestSmallBuffer()
    944 {
    945     UErrorCode          status = U_ZERO_ERROR;
    946     UCollator          *coll;
    947     UCollationElements *testiter,
    948                        *iter;
    949     int32_t             count = 0;
    950     OrderAndOffset     *testorders,
    951                        *orders;
    952 
    953     UChar teststr[500];
    954     UChar str[] = {0x300, 0x31A, 0};
    955     /*
    956     creating a long string of decomposable characters,
    957     since by default the writable buffer is of size 256
    958     */
    959     while (count < 500) {
    960         if ((count & 1) == 0) {
    961             teststr[count ++] = 0x300;
    962         }
    963         else {
    964             teststr[count ++] = 0x31A;
    965         }
    966     }
    967 
    968     coll = ucol_open("th_TH", &status);
    969     if(U_SUCCESS(status) && coll) {
    970       testiter = ucol_openElements(coll, teststr, 500, &status);
    971       iter = ucol_openElements(coll, str, 2, &status);
    972 
    973       orders     = getOrders(iter, &count);
    974       if (count != 2) {
    975           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
    976       }
    977 
    978       /*
    979       this will rearrange the string data to 250 characters of 0x300 first then
    980       250 characters of 0x031A
    981       */
    982       testorders = getOrders(testiter, &count);
    983 
    984       if (count != 500) {
    985           log_err("Error decomposition does not give the right sized collation elements\n");
    986       }
    987 
    988       while (count != 0) {
    989           /* UCA collation element for 0x0F76 */
    990           if ((count > 250 && testorders[-- count].order != orders[1].order) ||
    991               (count <= 250 && testorders[-- count].order != orders[0].order)) {
    992               log_err("Error decomposition does not give the right collation element at %d count\n", count);
    993               break;
    994           }
    995       }
    996 
    997       free(testorders);
    998       free(orders);
    999 
   1000       ucol_reset(testiter);
   1001 
   1002       /* ensures closing of elements done properly to clear writable buffer */
   1003       ucol_next(testiter, &status);
   1004       ucol_next(testiter, &status);
   1005       ucol_closeElements(testiter);
   1006       ucol_closeElements(iter);
   1007       ucol_close(coll);
   1008     } else {
   1009       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   1010     }
   1011 }
   1012 
   1013 /**
   1014 * Sniplets of code from genuca
   1015 */
   1016 static int32_t hex2num(char hex) {
   1017     if(hex>='0' && hex <='9') {
   1018         return hex-'0';
   1019     } else if(hex>='a' && hex<='f') {
   1020         return hex-'a'+10;
   1021     } else if(hex>='A' && hex<='F') {
   1022         return hex-'A'+10;
   1023     } else {
   1024         return 0;
   1025     }
   1026 }
   1027 
   1028 /**
   1029 * Getting codepoints from a string
   1030 * @param str character string contain codepoints seperated by space and ended
   1031 *        by a semicolon
   1032 * @param codepoints array for storage, assuming size > 5
   1033 * @return position at the end of the codepoint section
   1034 */
   1035 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
   1036     UErrorCode errorCode = U_ZERO_ERROR;
   1037     char *semi = uprv_strchr(str, ';');
   1038     char *pipe = uprv_strchr(str, '|');
   1039     char *s;
   1040     *codepoints = 0;
   1041     *contextCPs = 0;
   1042     if(semi == NULL) {
   1043         log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
   1044         return str;
   1045     }
   1046     if(pipe != NULL) {
   1047         int32_t contextLength;
   1048         *pipe = 0;
   1049         contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
   1050         *pipe = '|';
   1051         if(U_FAILURE(errorCode)) {
   1052             log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
   1053             return str;
   1054         }
   1055         /* prepend the precontext string to the codepoints */
   1056         u_memcpy(codepoints, contextCPs, contextLength);
   1057         codepoints += contextLength;
   1058         /* start of the code point string */
   1059         s = pipe + 1;
   1060     } else {
   1061         s = str;
   1062     }
   1063     u_parseString(s, codepoints, 99, NULL, &errorCode);
   1064     if(U_FAILURE(errorCode)) {
   1065         log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
   1066         return str;
   1067     }
   1068     return semi + 1;
   1069 }
   1070 
   1071 /**
   1072 * Sniplets of code from genuca
   1073 */
   1074 static int32_t
   1075 readElement(char **from, char *to, char separator, UErrorCode *status)
   1076 {
   1077     if (U_SUCCESS(*status)) {
   1078         char    buffer[1024];
   1079         int32_t i = 0;
   1080         while (**from != separator) {
   1081             if (**from != ' ') {
   1082                 *(buffer+i++) = **from;
   1083             }
   1084             (*from)++;
   1085         }
   1086         (*from)++;
   1087         *(buffer + i) = 0;
   1088         strcpy(to, buffer);
   1089         return i/2;
   1090     }
   1091 
   1092     return 0;
   1093 }
   1094 
   1095 /**
   1096 * Sniplets of code from genuca
   1097 */
   1098 static uint32_t
   1099 getSingleCEValue(char *primary, char *secondary, char *tertiary,
   1100                           UErrorCode *status)
   1101 {
   1102     if (U_SUCCESS(*status)) {
   1103         uint32_t  value    = 0;
   1104         char      primsave = '\0';
   1105         char      secsave  = '\0';
   1106         char      tersave  = '\0';
   1107         char     *primend  = primary+4;
   1108         char     *secend   = secondary+2;
   1109         char     *terend   = tertiary+2;
   1110         uint32_t  primvalue;
   1111         uint32_t  secvalue;
   1112         uint32_t  tervalue;
   1113 
   1114         if (uprv_strlen(primary) > 4) {
   1115             primsave = *primend;
   1116             *primend = '\0';
   1117         }
   1118 
   1119         if (uprv_strlen(secondary) > 2) {
   1120             secsave = *secend;
   1121             *secend = '\0';
   1122         }
   1123 
   1124         if (uprv_strlen(tertiary) > 2) {
   1125             tersave = *terend;
   1126             *terend = '\0';
   1127         }
   1128 
   1129         primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
   1130         secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
   1131         tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
   1132         if(primvalue <= 0xFF) {
   1133           primvalue <<= 8;
   1134         }
   1135 
   1136         value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
   1137            | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
   1138            | (tervalue & UCOL_TERTIARYORDERMASK);
   1139 
   1140         if(primsave!='\0') {
   1141             *primend = primsave;
   1142         }
   1143         if(secsave!='\0') {
   1144             *secend = secsave;
   1145         }
   1146         if(tersave!='\0') {
   1147             *terend = tersave;
   1148         }
   1149         return value;
   1150     }
   1151     return 0;
   1152 }
   1153 
   1154 /**
   1155 * Getting collation elements generated from a string
   1156 * @param str character string contain collation elements contained in [] and
   1157 *        seperated by space
   1158 * @param ce array for storage, assuming size > 20
   1159 * @param status error status
   1160 * @return position at the end of the codepoint section
   1161 */
   1162 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
   1163     char       *pStartCP     = uprv_strchr(str, '[');
   1164     int         count        = 0;
   1165     char       *pEndCP;
   1166     char        primary[100];
   1167     char        secondary[100];
   1168     char        tertiary[100];
   1169 
   1170     while (*pStartCP == '[') {
   1171         uint32_t primarycount   = 0;
   1172         uint32_t secondarycount = 0;
   1173         uint32_t tertiarycount  = 0;
   1174         uint32_t CEi = 1;
   1175         pEndCP = strchr(pStartCP, ']');
   1176         if(pEndCP == NULL) {
   1177             break;
   1178         }
   1179         pStartCP ++;
   1180 
   1181         primarycount   = readElement(&pStartCP, primary, ',', status);
   1182         secondarycount = readElement(&pStartCP, secondary, ',', status);
   1183         tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
   1184 
   1185         /* I want to get the CEs entered right here, including continuation */
   1186         ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
   1187         if (U_FAILURE(*status)) {
   1188             break;
   1189         }
   1190 
   1191         while (2 * CEi < primarycount || CEi < secondarycount ||
   1192                CEi < tertiarycount) {
   1193             uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
   1194             if (2 * CEi < primarycount) {
   1195                 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
   1196                 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
   1197             }
   1198 
   1199             if (2 * CEi + 1 < primarycount) {
   1200                 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
   1201                 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
   1202             }
   1203 
   1204             if (CEi < secondarycount) {
   1205                 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
   1206                 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
   1207             }
   1208 
   1209             if (CEi < tertiarycount) {
   1210                 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
   1211                 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
   1212             }
   1213 
   1214             CEi ++;
   1215             ces[count ++] = value;
   1216         }
   1217 
   1218       pStartCP = pEndCP + 1;
   1219     }
   1220     ces[count] = 0;
   1221     return pStartCP;
   1222 }
   1223 
   1224 /**
   1225 * Getting the FractionalUCA.txt file stream
   1226 */
   1227 static FileStream * getFractionalUCA(void)
   1228 {
   1229     char        newPath[256];
   1230     char        backupPath[256];
   1231     FileStream *result = NULL;
   1232 
   1233     /* Look inside ICU_DATA first */
   1234     uprv_strcpy(newPath, ctest_dataSrcDir());
   1235     uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
   1236     uprv_strcat(newPath, "FractionalUCA.txt");
   1237 
   1238     /* As a fallback, try to guess where the source data was located
   1239      *   at the time ICU was built, and look there.
   1240      */
   1241 #if defined (U_TOPSRCDIR)
   1242     strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
   1243 #else
   1244     {
   1245         UErrorCode errorCode = U_ZERO_ERROR;
   1246         strcpy(backupPath, loadTestData(&errorCode));
   1247         strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
   1248     }
   1249 #endif
   1250     strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
   1251 
   1252     result = T_FileStream_open(newPath, "rb");
   1253 
   1254     if (result == NULL) {
   1255         result = T_FileStream_open(backupPath, "rb");
   1256         if (result == NULL) {
   1257             log_err("Failed to open either %s or %s\n", newPath, backupPath);
   1258         }
   1259     }
   1260     return result;
   1261 }
   1262 
   1263 /**
   1264 * Testing the CEs returned by the iterator
   1265 */
   1266 static void TestCEs() {
   1267     FileStream *file = NULL;
   1268     char        line[2048];
   1269     char       *str;
   1270     UChar       codepoints[10];
   1271     uint32_t    ces[20];
   1272     UErrorCode  status = U_ZERO_ERROR;
   1273     UCollator          *coll = ucol_open("", &status);
   1274     uint32_t lineNo = 0;
   1275     UChar       contextCPs[5];
   1276 
   1277     if (U_FAILURE(status)) {
   1278         log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
   1279         return;
   1280     }
   1281 
   1282     file = getFractionalUCA();
   1283 
   1284     if (file == NULL) {
   1285         log_err("*** unable to open input FractionalUCA.txt file ***\n");
   1286         return;
   1287     }
   1288 
   1289 
   1290     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1291         int                 count = 0;
   1292         UCollationElements *iter;
   1293         int32_t            preContextCeLen=0;
   1294         lineNo++;
   1295         /* skip this line if it is empty or a comment or is a return value
   1296         or start of some variable section */
   1297         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1298             line[0] == 0x000D || line[0] == '[') {
   1299             continue;
   1300         }
   1301 
   1302         str = getCodePoints(line, codepoints, contextCPs);
   1303 
   1304         /* these are 'fake' codepoints in the fractional UCA, and are used just
   1305          * for positioning of indirect values. They should not go through this
   1306          * test.
   1307          */
   1308         if(*codepoints == 0xFDD0) {
   1309           continue;
   1310         }
   1311         if (*contextCPs != 0) {
   1312             iter = ucol_openElements(coll, contextCPs, -1, &status);
   1313             if (U_FAILURE(status)) {
   1314                 log_err("Error in opening collation elements\n");
   1315                 break;
   1316             }
   1317             while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
   1318                 preContextCeLen++;
   1319             }
   1320             ucol_closeElements(iter);
   1321         }
   1322 
   1323         getCEs(str, ces+preContextCeLen, &status);
   1324         if (U_FAILURE(status)) {
   1325             log_err("Error in parsing collation elements in FractionalUCA.txt\n");
   1326             break;
   1327         }
   1328         iter = ucol_openElements(coll, codepoints, -1, &status);
   1329         if (U_FAILURE(status)) {
   1330             log_err("Error in opening collation elements\n");
   1331             break;
   1332         }
   1333         for (;;) {
   1334             uint32_t ce = (uint32_t)ucol_next(iter, &status);
   1335             if (ce == 0xFFFFFFFF) {
   1336                 ce = 0;
   1337             }
   1338             /* we now unconditionally reorder Thai/Lao prevowels, so this
   1339              * test would fail if we don't skip here.
   1340              */
   1341             if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
   1342               continue;
   1343             }
   1344             if (ce != ces[count] || U_FAILURE(status)) {
   1345                 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
   1346                 break;
   1347             }
   1348             if (ces[count] == 0) {
   1349                 break;
   1350             }
   1351             count ++;
   1352         }
   1353         ucol_closeElements(iter);
   1354     }
   1355 
   1356     T_FileStream_close(file);
   1357     ucol_close(coll);
   1358 }
   1359 
   1360 /**
   1361 * Testing the discontigous contractions
   1362 */
   1363 static void TestDiscontiguos() {
   1364     const char               *rulestr    =
   1365                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
   1366           UChar               rule[50];
   1367           int                 rulelen = u_unescape(rulestr, rule, 50);
   1368     const char               *src[] = {
   1369      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
   1370     /* base character blocked */
   1371      "XD\\u0300", "XD\\u0300\\u0315",
   1372     /* non blocking combining character */
   1373      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
   1374      /* blocking combining character */
   1375      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
   1376      /* contraction prefix */
   1377      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
   1378      "X\\u0300\\u031A\\u0315",
   1379      /* ends not with a contraction character */
   1380      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
   1381      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
   1382     };
   1383     const char               *tgt[] = {
   1384      /* non blocking combining character */
   1385      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
   1386     /* base character blocked */
   1387      "X D \\u0300", "X D \\u0300\\u0315",
   1388     /* non blocking combining character */
   1389      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
   1390      /* blocking combining character */
   1391      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
   1392      /* contraction prefix */
   1393      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
   1394      "X\\u0300 \\u031A \\u0315",
   1395      /* ends not with a contraction character */
   1396      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
   1397      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
   1398     };
   1399           int                 size   = 20;
   1400           UCollator          *coll;
   1401           UErrorCode          status    = U_ZERO_ERROR;
   1402           int                 count     = 0;
   1403           UCollationElements *iter;
   1404           UCollationElements *resultiter;
   1405 
   1406     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
   1407     iter       = ucol_openElements(coll, rule, 1, &status);
   1408     resultiter = ucol_openElements(coll, rule, 1, &status);
   1409 
   1410     if (U_FAILURE(status)) {
   1411         log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
   1412         return;
   1413     }
   1414 
   1415     while (count < size) {
   1416         UChar  str[20];
   1417         UChar  tstr[20];
   1418         int    strLen = u_unescape(src[count], str, 20);
   1419         UChar *s;
   1420 
   1421         ucol_setText(iter, str, strLen, &status);
   1422         if (U_FAILURE(status)) {
   1423             log_err("Error opening collation iterator\n");
   1424             return;
   1425         }
   1426 
   1427         u_unescape(tgt[count], tstr, 20);
   1428         s = tstr;
   1429 
   1430         log_verbose("count %d\n", count);
   1431 
   1432         for (;;) {
   1433             uint32_t  ce;
   1434             UChar    *e = u_strchr(s, 0x20);
   1435             if (e == 0) {
   1436                 e = u_strchr(s, 0);
   1437             }
   1438             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
   1439             ce = ucol_next(resultiter, &status);
   1440             if (U_FAILURE(status)) {
   1441                 log_err("Error manipulating collation iterator\n");
   1442                 return;
   1443             }
   1444             while (ce != UCOL_NULLORDER) {
   1445                 if (ce != (uint32_t)ucol_next(iter, &status) ||
   1446                     U_FAILURE(status)) {
   1447                     log_err("Discontiguos contraction test mismatch\n");
   1448                     return;
   1449                 }
   1450                 ce = ucol_next(resultiter, &status);
   1451                 if (U_FAILURE(status)) {
   1452                     log_err("Error getting next collation element\n");
   1453                     return;
   1454                 }
   1455             }
   1456             s = e + 1;
   1457             if (*e == 0) {
   1458                 break;
   1459             }
   1460         }
   1461         ucol_reset(iter);
   1462         backAndForth(iter);
   1463         count ++;
   1464     }
   1465     ucol_closeElements(resultiter);
   1466     ucol_closeElements(iter);
   1467     ucol_close(coll);
   1468 }
   1469 
   1470 static void TestCEBufferOverflow()
   1471 {
   1472     UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
   1473     UErrorCode          status = U_ZERO_ERROR;
   1474     UChar               rule[10];
   1475     UCollator          *coll;
   1476     UCollationElements *iter;
   1477 
   1478     u_uastrcpy(rule, "&z < AB");
   1479     coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
   1480     if (U_FAILURE(status)) {
   1481         log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
   1482         return;
   1483     }
   1484 
   1485     /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
   1486     test. this will cause an overflow in getPrev */
   1487     str[0] = 0x0041;    /* 'A' */
   1488     /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
   1489     uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
   1490     str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
   1491     iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
   1492                              &status);
   1493     if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
   1494         status == U_BUFFER_OVERFLOW_ERROR) {
   1495         log_err("CE buffer should not overflow with long string of trail surrogates\n");
   1496     }
   1497     ucol_closeElements(iter);
   1498     ucol_close(coll);
   1499 }
   1500 
   1501 /**
   1502 * Checking collation element validity.
   1503 */
   1504 #define MAX_CODEPOINTS_TO_SHOW 10
   1505 static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
   1506     int i, lengthToUse = length;
   1507     if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
   1508         lengthToUse = MAX_CODEPOINTS_TO_SHOW;
   1509     }
   1510     for (i = 0; i < lengthToUse; ++i) {
   1511         int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
   1512         if (bytesWritten <= 0) {
   1513             break;
   1514         }
   1515         codepointText += bytesWritten;
   1516     }
   1517     if (i < length) {
   1518         sprintf(codepointText, " ...");
   1519     }
   1520 }
   1521 
   1522 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
   1523                              int length)
   1524 {
   1525     UErrorCode          status = U_ZERO_ERROR;
   1526     UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
   1527                                                   &status);
   1528     UBool result = FALSE;
   1529     UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
   1530     const char * collLocale;
   1531 
   1532     if (U_FAILURE(status)) {
   1533         log_err("Error creating iterator for testing validity\n");
   1534         return FALSE;
   1535     }
   1536     collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
   1537     if (U_FAILURE(status) || collLocale==NULL) {
   1538         status = U_ZERO_ERROR;
   1539         collLocale = "?";
   1540     }
   1541 
   1542     for (;;) {
   1543         uint32_t ce = ucol_next(iter, &status);
   1544         uint32_t primary, p1, p2, secondary, tertiary;
   1545         if (ce == UCOL_NULLORDER) {
   1546             result = TRUE;
   1547             break;
   1548         }
   1549         if (ce == 0) {
   1550             continue;
   1551         }
   1552         if (ce == 0x02000202) {
   1553             /* special CE for merge-sort character */
   1554             if (*codepoints == 0xFFFE /* && length == 1 */) {
   1555                 /*
   1556                  * Note: We should check for length==1 but the token parser appears
   1557                  * to give us trailing NUL characters.
   1558                  * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
   1559                  *                     rather than the internal collation rule parser
   1560                  */
   1561                 continue;
   1562             } else {
   1563                 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
   1564                         (int)*codepoints, (int)length);
   1565                 break;
   1566             }
   1567         }
   1568         primary   = UCOL_PRIMARYORDER(ce);
   1569         p1 = primary >> 8;
   1570         p2 = primary & 0xFF;
   1571         secondary = UCOL_SECONDARYORDER(ce);
   1572         tertiary  = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
   1573 
   1574         if (!isContinuation(ce)) {
   1575             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
   1576                 log_err("Empty CE %08lX except for case bits\n", (long)ce);
   1577                 break;
   1578             }
   1579             if (p1 == 0) {
   1580                 if (p2 != 0) {
   1581                     log_err("Primary 00 xx in %08lX\n", (long)ce);
   1582                     break;
   1583                 }
   1584                 primaryDone = TRUE;
   1585             } else {
   1586                 if (p1 <= 2 || p1 >= 0xF0) {
   1587                     /* Primary first bytes F0..FF are specials. */
   1588                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
   1589                     break;
   1590                 }
   1591                 if (p2 == 0) {
   1592                     primaryDone = TRUE;
   1593                 } else {
   1594                     if (p2 <= 3 || p2 >= 0xFF) {
   1595                         /* Primary second bytes 03 and FF are sort key compression terminators. */
   1596                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
   1597                         break;
   1598                     }
   1599                     primaryDone = FALSE;
   1600                 }
   1601             }
   1602             if (secondary == 0) {
   1603                 if (primary != 0) {
   1604                     log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
   1605                     break;
   1606                 }
   1607                 secondaryDone = TRUE;
   1608             } else {
   1609                 if (secondary <= 2 ||
   1610                     (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
   1611                 ) {
   1612                     /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
   1613                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
   1614                     break;
   1615                 }
   1616                 secondaryDone = FALSE;
   1617             }
   1618             if (tertiary == 0) {
   1619                 /* We know that ce != 0. */
   1620                 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
   1621                 break;
   1622             }
   1623             if (tertiary <= 2) {
   1624                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
   1625                 break;
   1626             }
   1627             tertiaryDone = FALSE;
   1628         } else {
   1629             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
   1630                 log_err("Empty continuation %08lX\n", (long)ce);
   1631                 break;
   1632             }
   1633             if (primaryDone && primary != 0) {
   1634                 log_err("Primary was done but continues in %08lX\n", (long)ce);
   1635                 break;
   1636             }
   1637             if (p1 == 0) {
   1638                 if (p2 != 0) {
   1639                     log_err("Primary 00 xx in %08lX\n", (long)ce);
   1640                     break;
   1641                 }
   1642                 primaryDone = TRUE;
   1643             } else {
   1644                 if (p1 <= 2) {
   1645                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
   1646                     break;
   1647                 }
   1648                 if (p2 == 0) {
   1649                     primaryDone = TRUE;
   1650                 } else {
   1651                     if (p2 <= 3) {
   1652                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
   1653                         break;
   1654                     }
   1655                 }
   1656             }
   1657             if (secondaryDone && secondary != 0) {
   1658                 log_err("Secondary was done but continues in %08lX\n", (long)ce);
   1659                 break;
   1660             }
   1661             if (secondary == 0) {
   1662                 secondaryDone = TRUE;
   1663             } else {
   1664                 if (secondary <= 2) {
   1665                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
   1666                     break;
   1667                 }
   1668             }
   1669             if (tertiaryDone && tertiary != 0) {
   1670                 log_err("Tertiary was done but continues in %08lX\n", (long)ce);
   1671                 break;
   1672             }
   1673             if (tertiary == 0) {
   1674                 tertiaryDone = TRUE;
   1675             } else if (tertiary <= 2) {
   1676                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
   1677                 break;
   1678             }
   1679         }
   1680     }
   1681     if (!result) {
   1682         char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
   1683         showCodepoints(codepoints, length, codepointText);
   1684         log_err("Locale: %s  Code point string: %s\n", collLocale, codepointText);
   1685     }
   1686     ucol_closeElements(iter);
   1687     return result;
   1688 }
   1689 
   1690 static const UChar IMPORT[] = { 0x5B, 0x69, 0x6D, 0x70, 0x6F, 0x72, 0x74, 0 };  /* "[import" */
   1691 
   1692 static void TestCEValidity()
   1693 {
   1694     /* testing UCA collation elements */
   1695     UErrorCode  status      = U_ZERO_ERROR;
   1696     /* en_US has no tailorings */
   1697     UCollator  *coll        = ucol_open("root", &status);
   1698     /* tailored locales */
   1699     char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
   1700     const char *loc;
   1701     FileStream *file = NULL;
   1702     char        line[2048];
   1703     UChar       codepoints[11];
   1704     int         count = 0;
   1705     int         maxCount = 0;
   1706     UChar       contextCPs[3];
   1707     UChar32     c;
   1708     UParseError parseError;
   1709     if (U_FAILURE(status)) {
   1710         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
   1711         return;
   1712     }
   1713     log_verbose("Testing UCA elements\n");
   1714     file = getFractionalUCA();
   1715     if (file == NULL) {
   1716         log_err("Fractional UCA data can not be opened\n");
   1717         return;
   1718     }
   1719 
   1720     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1721         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1722             line[0] == 0x000D || line[0] == '[') {
   1723             continue;
   1724         }
   1725 
   1726         getCodePoints(line, codepoints, contextCPs);
   1727         checkCEValidity(coll, codepoints, u_strlen(codepoints));
   1728     }
   1729 
   1730     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
   1731     for (c = 0; c <= 0xffff; ++c) {
   1732         if (u_isdefined(c)) {
   1733             codepoints[0] = (UChar)c;
   1734             checkCEValidity(coll, codepoints, 1);
   1735         }
   1736     }
   1737     for (; c <= 0x10ffff; ++c) {
   1738         if (u_isdefined(c)) {
   1739             int32_t i = 0;
   1740             U16_APPEND_UNSAFE(codepoints, i, c);
   1741             checkCEValidity(coll, codepoints, i);
   1742         }
   1743     }
   1744 
   1745     ucol_close(coll);
   1746 
   1747     /* testing tailored collation elements */
   1748     log_verbose("Testing tailored elements\n");
   1749     if(getTestOption(QUICK_OPTION)) {
   1750         maxCount = sizeof(locale)/sizeof(locale[0]);
   1751     } else {
   1752         maxCount = uloc_countAvailable();
   1753     }
   1754     while (count < maxCount) {
   1755         const UChar *rules = NULL,
   1756                     *current = NULL;
   1757         UChar *rulesCopy = NULL;
   1758         int32_t ruleLen = 0;
   1759 
   1760         uint32_t chOffset = 0;
   1761         uint32_t chLen = 0;
   1762         uint32_t exOffset = 0;
   1763         uint32_t exLen = 0;
   1764         uint32_t prefixOffset = 0;
   1765         uint32_t prefixLen = 0;
   1766         UBool    startOfRules = TRUE;
   1767         UColOptionSet opts;
   1768 
   1769         UColTokenParser src;
   1770         uint32_t strength = 0;
   1771         uint16_t specs = 0;
   1772 
   1773         (void)specs;        /* Suppress set but not used warnings. */
   1774         (void)strength;
   1775         (void)prefixLen;
   1776         (void)prefixOffset;
   1777         (void)exLen;
   1778         (void)exOffset;
   1779 
   1780         if(getTestOption(QUICK_OPTION)) {
   1781             loc = locale[count];
   1782         } else {
   1783             loc = uloc_getAvailable(count);
   1784             if(!hasCollationElements(loc)) {
   1785                 count++;
   1786                 continue;
   1787             }
   1788         }
   1789         status = U_ZERO_ERROR; // clear status from previous loop iteration
   1790 
   1791         uprv_memset(&src, 0, sizeof(UColTokenParser));
   1792 
   1793         log_verbose("Testing CEs for %s\n", loc);
   1794 
   1795         coll      = ucol_open(loc, &status);
   1796         if (U_FAILURE(status)) {
   1797             log_err("%s collator creation failed with status %s\n", loc, u_errorName(status));
   1798             return;
   1799         }
   1800 
   1801         src.opts = &opts;
   1802         rules = ucol_getRules(coll, &ruleLen);
   1803 
   1804         /*
   1805          * We have not set up the UColTokenParser with a callback function
   1806          * to fetch [import] sub-rules,
   1807          * so skip testing tailorings that import others.
   1808          * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
   1809          *                     rather than the internal collation rule parser
   1810          */
   1811         if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
   1812             rulesCopy = (UChar *)uprv_malloc((ruleLen +
   1813                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
   1814             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
   1815             src.current = src.source = rulesCopy;
   1816             src.end = rulesCopy + ruleLen;
   1817             src.extraCurrent = src.end;
   1818             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1819 
   1820 	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   1821 	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   1822             while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL && U_SUCCESS(status)) {
   1823               strength = src.parsedToken.strength;
   1824               chOffset = src.parsedToken.charsOffset;
   1825               chLen = src.parsedToken.charsLen;
   1826               exOffset = src.parsedToken.extensionOffset;
   1827               exLen = src.parsedToken.extensionLen;
   1828               prefixOffset = src.parsedToken.prefixOffset;
   1829               prefixLen = src.parsedToken.prefixLen;
   1830               specs = src.parsedToken.flags;
   1831 
   1832                 startOfRules = FALSE;
   1833                 uprv_memcpy(codepoints, src.source + chOffset,
   1834                                                        chLen * sizeof(UChar));
   1835                 codepoints[chLen] = 0;
   1836                 checkCEValidity(coll, codepoints, chLen);
   1837             }
   1838             if (U_FAILURE(status)) {
   1839                 log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", loc, u_errorName(status));
   1840             }
   1841             uprv_free(src.source);
   1842             uprv_free(src.reorderCodes);
   1843         }
   1844 
   1845         ucol_close(coll);
   1846         count ++;
   1847     }
   1848     T_FileStream_close(file);
   1849 }
   1850 
   1851 static void printSortKeyError(const UChar   *codepoints, int length,
   1852                                     uint8_t *sortkey, int sklen)
   1853 {
   1854     int count = 0;
   1855     log_err("Sortkey not valid for ");
   1856     while (length > 0) {
   1857         log_err("0x%04x ", *codepoints);
   1858         length --;
   1859         codepoints ++;
   1860     }
   1861     log_err("\nSortkey : ");
   1862     while (count < sklen) {
   1863         log_err("0x%02x ", sortkey[count]);
   1864         count ++;
   1865     }
   1866     log_err("\n");
   1867 }
   1868 
   1869 /**
   1870 * Checking sort key validity for all levels
   1871 */
   1872 static UBool checkSortKeyValidity(UCollator *coll,
   1873                                   const UChar *codepoints,
   1874                                   int length)
   1875 {
   1876     UErrorCode status  = U_ZERO_ERROR;
   1877     UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
   1878                                       UCOL_TERTIARY, UCOL_QUATERNARY,
   1879                                       UCOL_IDENTICAL};
   1880     int        strengthlen = 5;
   1881     int        strengthIndex = 0;
   1882     int        caselevel   = 0;
   1883 
   1884     while (caselevel < 1) {
   1885         if (caselevel == 0) {
   1886             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
   1887         }
   1888         else {
   1889             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
   1890         }
   1891 
   1892         while (strengthIndex < strengthlen) {
   1893             int        count01 = 0;
   1894             uint32_t   count   = 0;
   1895             uint8_t    sortkey[128];
   1896             uint32_t   sklen;
   1897 
   1898             ucol_setStrength(coll, strength[strengthIndex]);
   1899             sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
   1900             while (sortkey[count] != 0) {
   1901                 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
   1902                     printSortKeyError(codepoints, length, sortkey, sklen);
   1903                     return FALSE;
   1904                 }
   1905                 if (sortkey[count] == 1) {
   1906                     count01 ++;
   1907                 }
   1908                 count ++;
   1909             }
   1910 
   1911             if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
   1912                 printSortKeyError(codepoints, length, sortkey, sklen);
   1913                 return FALSE;
   1914             }
   1915             strengthIndex ++;
   1916         }
   1917         caselevel ++;
   1918     }
   1919     return TRUE;
   1920 }
   1921 
   1922 static void TestSortKeyValidity(void)
   1923 {
   1924     /* testing UCA collation elements */
   1925     UErrorCode  status      = U_ZERO_ERROR;
   1926     /* en_US has no tailorings */
   1927     UCollator  *coll        = ucol_open("en_US", &status);
   1928     /* tailored locales */
   1929     char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
   1930     FileStream *file = NULL;
   1931     char        line[2048];
   1932     UChar       codepoints[10];
   1933     int         count = 0;
   1934     UChar       contextCPs[5];
   1935     UParseError parseError;
   1936     if (U_FAILURE(status)) {
   1937         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
   1938         return;
   1939     }
   1940     log_verbose("Testing UCA elements\n");
   1941     file = getFractionalUCA();
   1942     if (file == NULL) {
   1943         log_err("Fractional UCA data can not be opened\n");
   1944         return;
   1945     }
   1946 
   1947     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1948         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1949             line[0] == 0x000D || line[0] == '[') {
   1950             continue;
   1951         }
   1952 
   1953         getCodePoints(line, codepoints, contextCPs);
   1954         if(codepoints[0] == 0xFFFE) {
   1955             /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
   1956             continue;
   1957         }
   1958         checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
   1959     }
   1960 
   1961     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
   1962     codepoints[0] = 0;
   1963 
   1964     while (codepoints[0] < 0xFFFF) {
   1965         if (u_isdefined((UChar32)codepoints[0])) {
   1966             checkSortKeyValidity(coll, codepoints, 1);
   1967         }
   1968         codepoints[0] ++;
   1969     }
   1970 
   1971     ucol_close(coll);
   1972 
   1973     /* testing tailored collation elements */
   1974     log_verbose("Testing tailored elements\n");
   1975     while (count < 5) {
   1976         const UChar *rules = NULL,
   1977                     *current = NULL;
   1978         UChar *rulesCopy = NULL;
   1979         int32_t ruleLen = 0;
   1980 
   1981         uint32_t chOffset = 0;
   1982         uint32_t chLen = 0;
   1983         uint32_t exOffset = 0;
   1984         uint32_t exLen = 0;
   1985         uint32_t prefixOffset = 0;
   1986         uint32_t prefixLen = 0;
   1987         UBool    startOfRules = TRUE;
   1988         UColOptionSet opts;
   1989 
   1990         UColTokenParser src;
   1991         uint32_t strength = 0;
   1992         uint16_t specs = 0;
   1993         status = U_ZERO_ERROR; // clear status from previous loop iteration
   1994 
   1995         (void)specs;
   1996         (void)strength;
   1997         (void)prefixLen;
   1998         (void)prefixOffset;
   1999         (void)exLen;
   2000         (void)exOffset;
   2001 
   2002         uprv_memset(&src, 0, sizeof(UColTokenParser));
   2003 
   2004         coll      = ucol_open(locale[count], &status);
   2005         if (U_FAILURE(status)) {
   2006             log_err("%s collator creation failed with status %s\n", locale[count], u_errorName(status));
   2007             return;
   2008         }
   2009 
   2010         src.opts = &opts;
   2011         rules = ucol_getRules(coll, &ruleLen);
   2012 
   2013         /*
   2014          * We have not set up the UColTokenParser with a callback function
   2015          * to fetch [import] sub-rules,
   2016          * so skip testing tailorings that import others.
   2017          * TODO: Ticket #8047: Change TestSortKeyValidity to use ucol_getTailoredSet()
   2018          *                     rather than the internal collation rule parser
   2019          */
   2020         if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
   2021             rulesCopy = (UChar *)uprv_malloc((ruleLen +
   2022                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
   2023             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
   2024             src.current = src.source = rulesCopy;
   2025             src.end = rulesCopy + ruleLen;
   2026             src.extraCurrent = src.end;
   2027             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   2028 
   2029 	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   2030 	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   2031             while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL && U_SUCCESS(status)) {
   2032                 strength = src.parsedToken.strength;
   2033                 chOffset = src.parsedToken.charsOffset;
   2034                 chLen = src.parsedToken.charsLen;
   2035                 exOffset = src.parsedToken.extensionOffset;
   2036                 exLen = src.parsedToken.extensionLen;
   2037                 prefixOffset = src.parsedToken.prefixOffset;
   2038                 prefixLen = src.parsedToken.prefixLen;
   2039                 specs = src.parsedToken.flags;
   2040 
   2041                 startOfRules = FALSE;
   2042                 uprv_memcpy(codepoints, src.source + chOffset,
   2043                                                        chLen * sizeof(UChar));
   2044                 codepoints[chLen] = 0;
   2045                 if(codepoints[0] == 0xFFFE) {
   2046                     /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
   2047                     continue;
   2048                 }
   2049                 checkSortKeyValidity(coll, codepoints, chLen);
   2050             }
   2051             if (U_FAILURE(status)) {
   2052                 log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", locale[count], u_errorName(status));
   2053             }
   2054             uprv_free(src.source);
   2055             uprv_free(src.reorderCodes);
   2056         }
   2057 
   2058         ucol_close(coll);
   2059         count ++;
   2060     }
   2061     T_FileStream_close(file);
   2062 }
   2063 
   2064 /**
   2065 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
   2066 * normalization on AND jamo tailoring, among other things.
   2067 */
   2068 static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
   2069     0x0020, 0xAC00,                 /* simple LV Hangul */
   2070     0x0020, 0xAC01,                 /* simple LVT Hangul */
   2071     0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
   2072     0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
   2073     0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
   2074     0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
   2075     0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
   2076     0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
   2077     0x0020, 0x00E6,                 /* small letter ae, expands */
   2078     0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
   2079     0x0020
   2080 };
   2081 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
   2082 
   2083 static const int32_t rootStandardOffsets[] = {
   2084     0,  1,2,
   2085     2,  3,4,4,
   2086     4,  5,6,6,
   2087     6,  7,8,8,
   2088     8,  9,10,11,
   2089     12, 13,14,15,
   2090     16, 17,18,19,
   2091     20, 21,22,23,
   2092     24, 25,26,26,26,
   2093     26, 27,28,28,
   2094     28,
   2095     29
   2096 };
   2097 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
   2098 
   2099 static const int32_t rootSearchOffsets[] = {
   2100     0,  1,2,
   2101     2,  3,4,4,
   2102     4,  5,6,6,6,
   2103     6,  7,8,8,8,8,8,8,
   2104     8,  9,10,11,
   2105     12, 13,14,15,
   2106     16, 17,18,19,20,
   2107     20, 21,22,22,23,23,23,24,
   2108     24, 25,26,26,26,
   2109     26, 27,28,28,
   2110     28,
   2111     29
   2112 };
   2113 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
   2114 
   2115 typedef struct {
   2116     const char *    locale;
   2117     const int32_t * offsets;
   2118     int32_t         offsetsLen;
   2119 } TSCEItem;
   2120 
   2121 static const TSCEItem tsceItems[] = {
   2122     { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
   2123     { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
   2124     { NULL,                    NULL,                0                        }
   2125 };
   2126 
   2127 static void TestSearchCollatorElements(void)
   2128 {
   2129     const TSCEItem * tsceItemPtr;
   2130     for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
   2131         UErrorCode status = U_ZERO_ERROR;
   2132         UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
   2133         if ( U_SUCCESS(status) ) {
   2134             UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
   2135             if ( U_SUCCESS(status) ) {
   2136                 int32_t offset, element;
   2137                 const int32_t * nextOffsetPtr;
   2138                 const int32_t * limitOffsetPtr;
   2139 
   2140                 nextOffsetPtr = tsceItemPtr->offsets;
   2141                 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
   2142                 do {
   2143                     offset = ucol_getOffset(uce);
   2144                     element = ucol_next(uce, &status);
   2145                     if ( element == 0 ) {
   2146                         log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
   2147                     }
   2148                     if ( nextOffsetPtr < limitOffsetPtr ) {
   2149                         if (offset != *nextOffsetPtr) {
   2150                             log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
   2151                                                             tsceItemPtr->locale, *nextOffsetPtr, offset );
   2152                             nextOffsetPtr = limitOffsetPtr;
   2153                             break;
   2154                         }
   2155                         nextOffsetPtr++;
   2156                     } else {
   2157                         log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
   2158                     }
   2159                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
   2160                 if ( nextOffsetPtr < limitOffsetPtr ) {
   2161                     log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
   2162                 }
   2163 
   2164                 ucol_setOffset(uce, kLen_tsceText, &status);
   2165                 status = U_ZERO_ERROR;
   2166                 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
   2167                 limitOffsetPtr = tsceItemPtr->offsets;
   2168                 do {
   2169                     offset = ucol_getOffset(uce);
   2170                     element = ucol_previous(uce, &status);
   2171                     if ( element == 0 ) {
   2172                         log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
   2173                     }
   2174                     if ( nextOffsetPtr > limitOffsetPtr ) {
   2175                         nextOffsetPtr--;
   2176                         if (offset != *nextOffsetPtr) {
   2177                             log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
   2178                                                                 tsceItemPtr->locale, *nextOffsetPtr, offset );
   2179                             nextOffsetPtr = limitOffsetPtr;
   2180                             break;
   2181                         }
   2182                    } else {
   2183                         log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
   2184                     }
   2185                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
   2186                 if ( nextOffsetPtr > limitOffsetPtr ) {
   2187                     log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
   2188                 }
   2189 
   2190                 ucol_closeElements(uce);
   2191             } else {
   2192                 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
   2193             }
   2194             ucol_close(ucol);
   2195         } else {
   2196             log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
   2197         }
   2198     }
   2199 }
   2200 
   2201 #endif /* #if !UCONFIG_NO_COLLATION */
   2202