Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2011, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /********************************************************************************
      7 *
      8 * File CITERTST.C
      9 *
     10 * Modification History:
     11 * Date      Name               Description
     12 *           Madhu Katragadda   Ported for C API
     13 * 02/19/01  synwee             Modified test case for new collation iterator
     14 *********************************************************************************/
     15 /*
     16  * Collation Iterator tests.
     17  * (Let me reiterate my position...)
     18  */
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_COLLATION
     23 
     24 #include "unicode/ucol.h"
     25 #include "unicode/ucoleitr.h"
     26 #include "unicode/uloc.h"
     27 #include "unicode/uchar.h"
     28 #include "unicode/ustring.h"
     29 #include "unicode/putil.h"
     30 #include "callcoll.h"
     31 #include "cmemory.h"
     32 #include "cintltst.h"
     33 #include "citertst.h"
     34 #include "ccolltst.h"
     35 #include "filestrm.h"
     36 #include "cstring.h"
     37 #include "ucol_imp.h"
     38 #include "ucol_tok.h"
     39 #include "uparse.h"
     40 #include <stdio.h>
     41 
     42 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
     43 
     44 void addCollIterTest(TestNode** root)
     45 {
     46     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
     47     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
     48     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
     49     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
     50     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
     51     addTest(root, &TestNormalizedUnicodeChar,
     52                                 "tscoll/citertst/TestNormalizedUnicodeChar");
     53     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
     54     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
     55     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
     56     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
     57     addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
     58     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
     59     addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
     60     addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
     61     addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
     62     addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
     63 }
     64 
     65 /* The locales we support */
     66 
     67 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
     68 
     69 static void TestBug672() {
     70     UErrorCode  status = U_ZERO_ERROR;
     71     UChar       pattern[20];
     72     UChar       text[50];
     73     int         i;
     74     int         result[3][3];
     75 
     76     u_uastrcpy(pattern, "resume");
     77     u_uastrcpy(text, "Time to resume updating my resume.");
     78 
     79     for (i = 0; i < 3; ++ i) {
     80         UCollator          *coll = ucol_open(LOCALES[i], &status);
     81         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
     82                                                      &status);
     83         UCollationElements *titer = ucol_openElements(coll, text, -1,
     84                                                      &status);
     85         if (U_FAILURE(status)) {
     86             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
     87                     myErrorName(status));
     88             return;
     89         }
     90 
     91         log_verbose("locale tested %s\n", LOCALES[i]);
     92 
     93         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
     94                U_SUCCESS(status)) {
     95         }
     96         if (U_FAILURE(status)) {
     97             log_err("ERROR: reversing collation iterator :%s\n",
     98                     myErrorName(status));
     99             return;
    100         }
    101         ucol_reset(pitr);
    102 
    103         ucol_setOffset(titer, u_strlen(pattern), &status);
    104         if (U_FAILURE(status)) {
    105             log_err("ERROR: setting offset in collator :%s\n",
    106                     myErrorName(status));
    107             return;
    108         }
    109         result[i][0] = ucol_getOffset(titer);
    110         log_verbose("Text iterator set to offset %d\n", result[i][0]);
    111 
    112         /* Use previous() */
    113         ucol_previous(titer, &status);
    114         result[i][1] = ucol_getOffset(titer);
    115         log_verbose("Current offset %d after previous\n", result[i][1]);
    116 
    117         /* Add one to index */
    118         log_verbose("Adding one to current offset...\n");
    119         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
    120         if (U_FAILURE(status)) {
    121             log_err("ERROR: setting offset in collator :%s\n",
    122                     myErrorName(status));
    123             return;
    124         }
    125         result[i][2] = ucol_getOffset(titer);
    126         log_verbose("Current offset in text = %d\n", result[i][2]);
    127         ucol_closeElements(pitr);
    128         ucol_closeElements(titer);
    129         ucol_close(coll);
    130     }
    131 
    132     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
    133         uprv_memcmp(result[1], result[2], 3) != 0) {
    134         log_err("ERROR: Different locales have different offsets at the same character\n");
    135     }
    136 }
    137 
    138 
    139 
    140 /*  Running this test with normalization enabled showed up a bug in the incremental
    141     normalization code. */
    142 static void TestBug672Normalize() {
    143     UErrorCode  status = U_ZERO_ERROR;
    144     UChar       pattern[20];
    145     UChar       text[50];
    146     int         i;
    147     int         result[3][3];
    148 
    149     u_uastrcpy(pattern, "resume");
    150     u_uastrcpy(text, "Time to resume updating my resume.");
    151 
    152     for (i = 0; i < 3; ++ i) {
    153         UCollator          *coll = ucol_open(LOCALES[i], &status);
    154         UCollationElements *pitr = NULL;
    155         UCollationElements *titer = NULL;
    156 
    157         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    158 
    159         pitr = ucol_openElements(coll, pattern, -1, &status);
    160         titer = ucol_openElements(coll, text, -1, &status);
    161         if (U_FAILURE(status)) {
    162             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
    163                     myErrorName(status));
    164             return;
    165         }
    166 
    167         log_verbose("locale tested %s\n", LOCALES[i]);
    168 
    169         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
    170                U_SUCCESS(status)) {
    171         }
    172         if (U_FAILURE(status)) {
    173             log_err("ERROR: reversing collation iterator :%s\n",
    174                     myErrorName(status));
    175             return;
    176         }
    177         ucol_reset(pitr);
    178 
    179         ucol_setOffset(titer, u_strlen(pattern), &status);
    180         if (U_FAILURE(status)) {
    181             log_err("ERROR: setting offset in collator :%s\n",
    182                     myErrorName(status));
    183             return;
    184         }
    185         result[i][0] = ucol_getOffset(titer);
    186         log_verbose("Text iterator set to offset %d\n", result[i][0]);
    187 
    188         /* Use previous() */
    189         ucol_previous(titer, &status);
    190         result[i][1] = ucol_getOffset(titer);
    191         log_verbose("Current offset %d after previous\n", result[i][1]);
    192 
    193         /* Add one to index */
    194         log_verbose("Adding one to current offset...\n");
    195         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
    196         if (U_FAILURE(status)) {
    197             log_err("ERROR: setting offset in collator :%s\n",
    198                     myErrorName(status));
    199             return;
    200         }
    201         result[i][2] = ucol_getOffset(titer);
    202         log_verbose("Current offset in text = %d\n", result[i][2]);
    203         ucol_closeElements(pitr);
    204         ucol_closeElements(titer);
    205         ucol_close(coll);
    206     }
    207 
    208     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
    209         uprv_memcmp(result[1], result[2], 3) != 0) {
    210         log_err("ERROR: Different locales have different offsets at the same character\n");
    211     }
    212 }
    213 
    214 
    215 
    216 
    217 /**
    218  * Test for CollationElementIterator previous and next for the whole set of
    219  * unicode characters.
    220  */
    221 static void TestUnicodeChar()
    222 {
    223     UChar source[0x100];
    224     UCollator *en_us;
    225     UCollationElements *iter;
    226     UErrorCode status = U_ZERO_ERROR;
    227     UChar codepoint;
    228 
    229     UChar *test;
    230     en_us = ucol_open("en_US", &status);
    231     if (U_FAILURE(status)){
    232        log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
    233               myErrorName(status));
    234        return;
    235     }
    236 
    237     for (codepoint = 1; codepoint < 0xFFFE;)
    238     {
    239       test = source;
    240 
    241       while (codepoint % 0xFF != 0)
    242       {
    243         if (u_isdefined(codepoint))
    244           *(test ++) = codepoint;
    245         codepoint ++;
    246       }
    247 
    248       if (u_isdefined(codepoint))
    249         *(test ++) = codepoint;
    250 
    251       if (codepoint != 0xFFFF)
    252         codepoint ++;
    253 
    254       *test = 0;
    255       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
    256       if(U_FAILURE(status)){
    257           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    258               myErrorName(status));
    259           ucol_close(en_us);
    260           return;
    261       }
    262       /* A basic test to see if it's working at all */
    263       log_verbose("codepoint testing %x\n", codepoint);
    264       backAndForth(iter);
    265       ucol_closeElements(iter);
    266 
    267       /* null termination test */
    268       iter=ucol_openElements(en_us, source, -1, &status);
    269       if(U_FAILURE(status)){
    270           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    271               myErrorName(status));
    272           ucol_close(en_us);
    273           return;
    274       }
    275       /* A basic test to see if it's working at all */
    276       backAndForth(iter);
    277       ucol_closeElements(iter);
    278     }
    279 
    280     ucol_close(en_us);
    281 }
    282 
    283 /**
    284  * Test for CollationElementIterator previous and next for the whole set of
    285  * unicode characters with normalization on.
    286  */
    287 static void TestNormalizedUnicodeChar()
    288 {
    289     UChar source[0x100];
    290     UCollator *th_th;
    291     UCollationElements *iter;
    292     UErrorCode status = U_ZERO_ERROR;
    293     UChar codepoint;
    294 
    295     UChar *test;
    296     /* thai should have normalization on */
    297     th_th = ucol_open("th_TH", &status);
    298     if (U_FAILURE(status)){
    299         log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
    300               myErrorName(status));
    301         return;
    302     }
    303 
    304     for (codepoint = 1; codepoint < 0xFFFE;)
    305     {
    306       test = source;
    307 
    308       while (codepoint % 0xFF != 0)
    309       {
    310         if (u_isdefined(codepoint))
    311           *(test ++) = codepoint;
    312         codepoint ++;
    313       }
    314 
    315       if (u_isdefined(codepoint))
    316         *(test ++) = codepoint;
    317 
    318       if (codepoint != 0xFFFF)
    319         codepoint ++;
    320 
    321       *test = 0;
    322       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
    323       if(U_FAILURE(status)){
    324           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    325               myErrorName(status));
    326             ucol_close(th_th);
    327           return;
    328       }
    329 
    330       backAndForth(iter);
    331       ucol_closeElements(iter);
    332 
    333       iter=ucol_openElements(th_th, source, -1, &status);
    334       if(U_FAILURE(status)){
    335           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    336               myErrorName(status));
    337             ucol_close(th_th);
    338           return;
    339       }
    340 
    341       backAndForth(iter);
    342       ucol_closeElements(iter);
    343     }
    344 
    345     ucol_close(th_th);
    346 }
    347 
    348 /**
    349 * Test the incremental normalization
    350 */
    351 static void TestNormalization()
    352 {
    353           UErrorCode          status = U_ZERO_ERROR;
    354     const char               *str    =
    355                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
    356           UCollator          *coll;
    357           UChar               rule[50];
    358           int                 rulelen = u_unescape(str, rule, 50);
    359           int                 count = 0;
    360     const char                *testdata[] =
    361                         {"\\u1ED9", "o\\u0323\\u0302",
    362                         "\\u0300\\u0315", "\\u0315\\u0300",
    363                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
    364                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
    365                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
    366                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
    367                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
    368     int32_t   srclen;
    369     UChar source[10];
    370     UCollationElements *iter;
    371 
    372     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
    373     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    374     if (U_FAILURE(status)){
    375         log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
    376               myErrorName(status));
    377         return;
    378     }
    379 
    380     srclen = u_unescape(testdata[0], source, 10);
    381     iter = ucol_openElements(coll, source, srclen, &status);
    382     backAndForth(iter);
    383     ucol_closeElements(iter);
    384 
    385     srclen = u_unescape(testdata[1], source, 10);
    386     iter = ucol_openElements(coll, source, srclen, &status);
    387     backAndForth(iter);
    388     ucol_closeElements(iter);
    389 
    390     while (count < 12) {
    391         srclen = u_unescape(testdata[count], source, 10);
    392         iter = ucol_openElements(coll, source, srclen, &status);
    393 
    394         if (U_FAILURE(status)){
    395             log_err("ERROR: in creation of collator element iterator\n %s\n",
    396                   myErrorName(status));
    397             return;
    398         }
    399         backAndForth(iter);
    400         ucol_closeElements(iter);
    401 
    402         iter = ucol_openElements(coll, source, -1, &status);
    403 
    404         if (U_FAILURE(status)){
    405             log_err("ERROR: in creation of collator element iterator\n %s\n",
    406                   myErrorName(status));
    407             return;
    408         }
    409         backAndForth(iter);
    410         ucol_closeElements(iter);
    411         count ++;
    412     }
    413     ucol_close(coll);
    414 }
    415 
    416 /**
    417  * Test for CollationElementIterator.previous()
    418  *
    419  * @bug 4108758 - Make sure it works with contracting characters
    420  *
    421  */
    422 static void TestPrevious()
    423 {
    424     UCollator *coll=NULL;
    425     UChar rule[50];
    426     UChar *source;
    427     UCollator *c1, *c2, *c3;
    428     UCollationElements *iter;
    429     UErrorCode status = U_ZERO_ERROR;
    430     UChar test1[50];
    431     UChar test2[50];
    432 
    433     u_uastrcpy(test1, "What subset of all possible test cases?");
    434     u_uastrcpy(test2, "has the highest probability of detecting");
    435     coll = ucol_open("en_US", &status);
    436 
    437     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
    438     log_verbose("English locale testing back and forth\n");
    439     if(U_FAILURE(status)){
    440         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    441             myErrorName(status));
    442         ucol_close(coll);
    443         return;
    444     }
    445     /* A basic test to see if it's working at all */
    446     backAndForth(iter);
    447     ucol_closeElements(iter);
    448     ucol_close(coll);
    449 
    450     /* Test with a contracting character sequence */
    451     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
    452     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    453 
    454     log_verbose("Contraction rule testing back and forth with no normalization\n");
    455 
    456     if (c1 == NULL || U_FAILURE(status))
    457     {
    458         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
    459             myErrorName(status));
    460         return;
    461     }
    462     source=(UChar*)malloc(sizeof(UChar) * 20);
    463     u_uastrcpy(source, "abchdcba");
    464     iter=ucol_openElements(c1, source, u_strlen(source), &status);
    465     if(U_FAILURE(status)){
    466         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    467             myErrorName(status));
    468         return;
    469     }
    470     backAndForth(iter);
    471     ucol_closeElements(iter);
    472     ucol_close(c1);
    473 
    474     /* Test with an expanding character sequence */
    475     u_uastrcpy(rule, "&a < b < c/abd < d");
    476     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    477     log_verbose("Expansion rule testing back and forth with no normalization\n");
    478     if (c2 == NULL || U_FAILURE(status))
    479     {
    480         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
    481             myErrorName(status));
    482         return;
    483     }
    484     u_uastrcpy(source, "abcd");
    485     iter=ucol_openElements(c2, source, u_strlen(source), &status);
    486     if(U_FAILURE(status)){
    487         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    488             myErrorName(status));
    489         return;
    490     }
    491     backAndForth(iter);
    492     ucol_closeElements(iter);
    493     ucol_close(c2);
    494     /* Now try both */
    495     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
    496     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
    497     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
    498 
    499     if (c3 == NULL || U_FAILURE(status))
    500     {
    501         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
    502             myErrorName(status));
    503         return;
    504     }
    505     u_uastrcpy(source, "abcdbchdc");
    506     iter=ucol_openElements(c3, source, u_strlen(source), &status);
    507     if(U_FAILURE(status)){
    508         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    509             myErrorName(status));
    510         return;
    511     }
    512     backAndForth(iter);
    513     ucol_closeElements(iter);
    514     ucol_close(c3);
    515     source[0] = 0x0e41;
    516     source[1] = 0x0e02;
    517     source[2] = 0x0e41;
    518     source[3] = 0x0e02;
    519     source[4] = 0x0e27;
    520     source[5] = 0x61;
    521     source[6] = 0x62;
    522     source[7] = 0x63;
    523     source[8] = 0;
    524 
    525     coll = ucol_open("th_TH", &status);
    526     log_verbose("Thai locale testing back and forth with normalization\n");
    527     iter=ucol_openElements(coll, source, u_strlen(source), &status);
    528     if(U_FAILURE(status)){
    529         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    530             myErrorName(status));
    531         return;
    532     }
    533     backAndForth(iter);
    534     ucol_closeElements(iter);
    535     ucol_close(coll);
    536 
    537     /* prev test */
    538     source[0] = 0x0061;
    539     source[1] = 0x30CF;
    540     source[2] = 0x3099;
    541     source[3] = 0x30FC;
    542     source[4] = 0;
    543 
    544     coll = ucol_open("ja_JP", &status);
    545     log_verbose("Japanese locale testing back and forth with normalization\n");
    546     iter=ucol_openElements(coll, source, u_strlen(source), &status);
    547     if(U_FAILURE(status)){
    548         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    549             myErrorName(status));
    550         return;
    551     }
    552     backAndForth(iter);
    553     ucol_closeElements(iter);
    554     ucol_close(coll);
    555 
    556     free(source);
    557 }
    558 
    559 /**
    560  * Test for getOffset() and setOffset()
    561  */
    562 static void TestOffset()
    563 {
    564     UErrorCode status= U_ZERO_ERROR;
    565     UCollator *en_us=NULL;
    566     UCollationElements *iter, *pristine;
    567     int32_t offset;
    568     OrderAndOffset *orders;
    569     int32_t orderLength=0;
    570     int     count = 0;
    571     UChar test1[50];
    572     UChar test2[50];
    573 
    574     u_uastrcpy(test1, "What subset of all possible test cases?");
    575     u_uastrcpy(test2, "has the highest probability of detecting");
    576     en_us = ucol_open("en_US", &status);
    577     log_verbose("Testing getOffset and setOffset for collations\n");
    578     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
    579     if(U_FAILURE(status)){
    580         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    581             myErrorName(status));
    582         ucol_close(en_us);
    583         return;
    584     }
    585 
    586     /* testing boundaries */
    587     ucol_setOffset(iter, 0, &status);
    588     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
    589         log_err("Error: After setting offset to 0, we should be at the end "
    590                 "of the backwards iteration");
    591     }
    592     ucol_setOffset(iter, u_strlen(test1), &status);
    593     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
    594         log_err("Error: After setting offset to end of the string, we should "
    595                 "be at the end of the backwards iteration");
    596     }
    597 
    598     /* Run all the way through the iterator, then get the offset */
    599 
    600     orders = getOrders(iter, &orderLength);
    601 
    602     offset = ucol_getOffset(iter);
    603 
    604     if (offset != u_strlen(test1))
    605     {
    606         log_err("offset at end != length %d vs %d\n", offset,
    607             u_strlen(test1) );
    608     }
    609 
    610     /* Now set the offset back to the beginning and see if it works */
    611     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
    612     if(U_FAILURE(status)){
    613         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    614             myErrorName(status));
    615     ucol_close(en_us);
    616         return;
    617     }
    618     status = U_ZERO_ERROR;
    619 
    620     ucol_setOffset(iter, 0, &status);
    621     if (U_FAILURE(status))
    622     {
    623         log_err("setOffset failed. %s\n",    myErrorName(status));
    624     }
    625     else
    626     {
    627         assertEqual(iter, pristine);
    628     }
    629 
    630     ucol_closeElements(pristine);
    631     ucol_closeElements(iter);
    632     free(orders);
    633 
    634     /* testing offsets in normalization buffer */
    635     test1[0] = 0x61;
    636     test1[1] = 0x300;
    637     test1[2] = 0x316;
    638     test1[3] = 0x62;
    639     test1[4] = 0;
    640     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    641     iter = ucol_openElements(en_us, test1, 4, &status);
    642     if(U_FAILURE(status)){
    643         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    644             myErrorName(status));
    645         ucol_close(en_us);
    646         return;
    647     }
    648 
    649     count = 0;
    650     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
    651         U_SUCCESS(status)) {
    652         switch (count) {
    653         case 0:
    654             if (ucol_getOffset(iter) != 1) {
    655                 log_err("ERROR: Offset of iteration should be 1\n");
    656             }
    657             break;
    658         case 3:
    659             if (ucol_getOffset(iter) != 4) {
    660                 log_err("ERROR: Offset of iteration should be 4\n");
    661             }
    662             break;
    663         default:
    664             if (ucol_getOffset(iter) != 3) {
    665                 log_err("ERROR: Offset of iteration should be 3\n");
    666             }
    667         }
    668         count ++;
    669     }
    670 
    671     ucol_reset(iter);
    672     count = 0;
    673     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
    674         U_SUCCESS(status)) {
    675         switch (count) {
    676         case 0:
    677         case 1:
    678             if (ucol_getOffset(iter) != 3) {
    679                 log_err("ERROR: Offset of iteration should be 3\n");
    680             }
    681             break;
    682         case 2:
    683             if (ucol_getOffset(iter) != 1) {
    684                 log_err("ERROR: Offset of iteration should be 1\n");
    685             }
    686             break;
    687         default:
    688             if (ucol_getOffset(iter) != 0) {
    689                 log_err("ERROR: Offset of iteration should be 0\n");
    690             }
    691         }
    692         count ++;
    693     }
    694 
    695     if(U_FAILURE(status)){
    696         log_err("ERROR: in iterating collation elements %s\n",
    697             myErrorName(status));
    698     }
    699 
    700     ucol_closeElements(iter);
    701     ucol_close(en_us);
    702 }
    703 
    704 /**
    705  * Test for setText()
    706  */
    707 static void TestSetText()
    708 {
    709     int32_t c,i;
    710     UErrorCode status = U_ZERO_ERROR;
    711     UCollator *en_us=NULL;
    712     UCollationElements *iter1, *iter2;
    713     UChar test1[50];
    714     UChar test2[50];
    715 
    716     u_uastrcpy(test1, "What subset of all possible test cases?");
    717     u_uastrcpy(test2, "has the highest probability of detecting");
    718     en_us = ucol_open("en_US", &status);
    719     log_verbose("testing setText for Collation elements\n");
    720     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
    721     if(U_FAILURE(status)){
    722         log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
    723             myErrorName(status));
    724     ucol_close(en_us);
    725         return;
    726     }
    727     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
    728     if(U_FAILURE(status)){
    729         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
    730             myErrorName(status));
    731     ucol_close(en_us);
    732         return;
    733     }
    734 
    735     /* Run through the second iterator just to exercise it */
    736     c = ucol_next(iter2, &status);
    737     i = 0;
    738 
    739     while ( ++i < 10 && (c != UCOL_NULLORDER))
    740     {
    741         if (U_FAILURE(status))
    742         {
    743             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
    744             ucol_closeElements(iter2);
    745             ucol_closeElements(iter1);
    746     ucol_close(en_us);
    747             return;
    748         }
    749 
    750         c = ucol_next(iter2, &status);
    751     }
    752 
    753     /* Now set it to point to the same string as the first iterator */
    754     ucol_setText(iter2, test1, u_strlen(test1), &status);
    755     if (U_FAILURE(status))
    756     {
    757         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
    758     }
    759     else
    760     {
    761         assertEqual(iter1, iter2);
    762     }
    763 
    764     /* Now set it to point to a null string with fake length*/
    765     ucol_setText(iter2, NULL, 2, &status);
    766     if (U_FAILURE(status))
    767     {
    768         log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
    769     }
    770     else
    771     {
    772         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
    773             log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
    774         }
    775     }
    776 
    777     ucol_closeElements(iter2);
    778     ucol_closeElements(iter1);
    779     ucol_close(en_us);
    780 }
    781 
    782 /** @bug 4108762
    783  * Test for getMaxExpansion()
    784  */
    785 static void TestMaxExpansion()
    786 {
    787     UErrorCode          status = U_ZERO_ERROR;
    788     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
    789     UChar               ch     = 0;
    790     UChar32             unassigned = 0xEFFFD;
    791     UChar               supplementary[2];
    792     uint32_t            stringOffset = 0;
    793     UBool               isError = FALSE;
    794     uint32_t            sorder = 0;
    795     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
    796     uint32_t            temporder = 0;
    797 
    798     UChar rule[256];
    799     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
    800     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
    801         UCOL_DEFAULT_STRENGTH,NULL, &status);
    802     if(U_SUCCESS(status) && coll) {
    803       iter = ucol_openElements(coll, &ch, 1, &status);
    804 
    805       while (ch < 0xFFFF && U_SUCCESS(status)) {
    806           int      count = 1;
    807           uint32_t order;
    808           int32_t  size = 0;
    809 
    810           ch ++;
    811 
    812           ucol_setText(iter, &ch, 1, &status);
    813           order = ucol_previous(iter, &status);
    814 
    815           /* thai management */
    816           if (order == 0)
    817               order = ucol_previous(iter, &status);
    818 
    819           while (U_SUCCESS(status) &&
    820               ucol_previous(iter, &status) != UCOL_NULLORDER) {
    821               count ++;
    822           }
    823 
    824           size = ucol_getMaxExpansion(iter, order);
    825           if (U_FAILURE(status) || size < count) {
    826               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    827                   ch, count);
    828           }
    829       }
    830 
    831       /* testing for exact max expansion */
    832       ch = 0;
    833       while (ch < 0x61) {
    834           uint32_t order;
    835           int32_t  size;
    836           ucol_setText(iter, &ch, 1, &status);
    837           order = ucol_previous(iter, &status);
    838           size  = ucol_getMaxExpansion(iter, order);
    839           if (U_FAILURE(status) || size != 1) {
    840               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    841                   ch, 1);
    842           }
    843           ch ++;
    844       }
    845 
    846       ch = 0x63;
    847       ucol_setText(iter, &ch, 1, &status);
    848       temporder = ucol_previous(iter, &status);
    849 
    850       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
    851           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
    852                   ch, 3);
    853       }
    854 
    855       ch = 0x64;
    856       ucol_setText(iter, &ch, 1, &status);
    857       temporder = ucol_previous(iter, &status);
    858 
    859       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
    860           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
    861                   ch, 3);
    862       }
    863 
    864       U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
    865       ucol_setText(iter, supplementary, 2, &status);
    866       sorder = ucol_previous(iter, &status);
    867 
    868       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
    869           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    870                   ch, 2);
    871       }
    872 
    873       /* testing jamo */
    874       ch = 0x1165;
    875 
    876       ucol_setText(iter, &ch, 1, &status);
    877       temporder = ucol_previous(iter, &status);
    878       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
    879           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
    880                   ch, 3);
    881       }
    882 
    883       ucol_closeElements(iter);
    884       ucol_close(coll);
    885 
    886       /* testing special jamo &a<\u1160 */
    887       rule[0] = 0x26;
    888       rule[1] = 0x71;
    889       rule[2] = 0x3c;
    890       rule[3] = 0x1165;
    891       rule[4] = 0x2f;
    892       rule[5] = 0x71;
    893       rule[6] = 0x71;
    894       rule[7] = 0x71;
    895       rule[8] = 0x71;
    896       rule[9] = 0;
    897 
    898       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
    899           UCOL_DEFAULT_STRENGTH,NULL, &status);
    900       iter = ucol_openElements(coll, &ch, 1, &status);
    901 
    902       temporder = ucol_previous(iter, &status);
    903       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
    904           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
    905                   ch, 5);
    906       }
    907 
    908       ucol_closeElements(iter);
    909       ucol_close(coll);
    910     } else {
    911       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
    912     }
    913 
    914 }
    915 
    916 
    917 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
    918 {
    919     int32_t c1, c2;
    920     int32_t count = 0;
    921     UErrorCode status = U_ZERO_ERROR;
    922 
    923     do
    924     {
    925         c1 = ucol_next(i1, &status);
    926         c2 = ucol_next(i2, &status);
    927 
    928         if (c1 != c2)
    929         {
    930             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
    931             break;
    932         }
    933 
    934         count += 1;
    935     }
    936     while (c1 != UCOL_NULLORDER);
    937 }
    938 
    939 /**
    940  * Testing iterators with extremely small buffers
    941  */
    942 static void TestSmallBuffer()
    943 {
    944     UErrorCode          status = U_ZERO_ERROR;
    945     UCollator          *coll;
    946     UCollationElements *testiter,
    947                        *iter;
    948     int32_t             count = 0;
    949     OrderAndOffset     *testorders,
    950                        *orders;
    951 
    952     UChar teststr[500];
    953     UChar str[] = {0x300, 0x31A, 0};
    954     /*
    955     creating a long string of decomposable characters,
    956     since by default the writable buffer is of size 256
    957     */
    958     while (count < 500) {
    959         if ((count & 1) == 0) {
    960             teststr[count ++] = 0x300;
    961         }
    962         else {
    963             teststr[count ++] = 0x31A;
    964         }
    965     }
    966 
    967     coll = ucol_open("th_TH", &status);
    968     if(U_SUCCESS(status) && coll) {
    969       testiter = ucol_openElements(coll, teststr, 500, &status);
    970       iter = ucol_openElements(coll, str, 2, &status);
    971 
    972       orders     = getOrders(iter, &count);
    973       if (count != 2) {
    974           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
    975       }
    976 
    977       /*
    978       this will rearrange the string data to 250 characters of 0x300 first then
    979       250 characters of 0x031A
    980       */
    981       testorders = getOrders(testiter, &count);
    982 
    983       if (count != 500) {
    984           log_err("Error decomposition does not give the right sized collation elements\n");
    985       }
    986 
    987       while (count != 0) {
    988           /* UCA collation element for 0x0F76 */
    989           if ((count > 250 && testorders[-- count].order != orders[1].order) ||
    990               (count <= 250 && testorders[-- count].order != orders[0].order)) {
    991               log_err("Error decomposition does not give the right collation element at %d count\n", count);
    992               break;
    993           }
    994       }
    995 
    996       free(testorders);
    997       free(orders);
    998 
    999       ucol_reset(testiter);
   1000 
   1001       /* ensures closing of elements done properly to clear writable buffer */
   1002       ucol_next(testiter, &status);
   1003       ucol_next(testiter, &status);
   1004       ucol_closeElements(testiter);
   1005       ucol_closeElements(iter);
   1006       ucol_close(coll);
   1007     } else {
   1008       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   1009     }
   1010 }
   1011 
   1012 /**
   1013 * Sniplets of code from genuca
   1014 */
   1015 static int32_t hex2num(char hex) {
   1016     if(hex>='0' && hex <='9') {
   1017         return hex-'0';
   1018     } else if(hex>='a' && hex<='f') {
   1019         return hex-'a'+10;
   1020     } else if(hex>='A' && hex<='F') {
   1021         return hex-'A'+10;
   1022     } else {
   1023         return 0;
   1024     }
   1025 }
   1026 
   1027 /**
   1028 * Getting codepoints from a string
   1029 * @param str character string contain codepoints seperated by space and ended
   1030 *        by a semicolon
   1031 * @param codepoints array for storage, assuming size > 5
   1032 * @return position at the end of the codepoint section
   1033 */
   1034 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
   1035     UErrorCode errorCode = U_ZERO_ERROR;
   1036     char *semi = uprv_strchr(str, ';');
   1037     char *pipe = uprv_strchr(str, '|');
   1038     char *s;
   1039     *codepoints = 0;
   1040     *contextCPs = 0;
   1041     if(semi == NULL) {
   1042         log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
   1043         return str;
   1044     }
   1045     if(pipe != NULL) {
   1046         int32_t contextLength;
   1047         *pipe = 0;
   1048         contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
   1049         *pipe = '|';
   1050         if(U_FAILURE(errorCode)) {
   1051             log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
   1052             return str;
   1053         }
   1054         /* prepend the precontext string to the codepoints */
   1055         u_memcpy(codepoints, contextCPs, contextLength);
   1056         codepoints += contextLength;
   1057         /* start of the code point string */
   1058         s = pipe + 1;
   1059     } else {
   1060         s = str;
   1061     }
   1062     u_parseString(s, codepoints, 99, NULL, &errorCode);
   1063     if(U_FAILURE(errorCode)) {
   1064         log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
   1065         return str;
   1066     }
   1067     return semi + 1;
   1068 }
   1069 
   1070 /**
   1071 * Sniplets of code from genuca
   1072 */
   1073 static int32_t
   1074 readElement(char **from, char *to, char separator, UErrorCode *status)
   1075 {
   1076     if (U_SUCCESS(*status)) {
   1077         char    buffer[1024];
   1078         int32_t i = 0;
   1079         while (**from != separator) {
   1080             if (**from != ' ') {
   1081                 *(buffer+i++) = **from;
   1082             }
   1083             (*from)++;
   1084         }
   1085         (*from)++;
   1086         *(buffer + i) = 0;
   1087         strcpy(to, buffer);
   1088         return i/2;
   1089     }
   1090 
   1091     return 0;
   1092 }
   1093 
   1094 /**
   1095 * Sniplets of code from genuca
   1096 */
   1097 static uint32_t
   1098 getSingleCEValue(char *primary, char *secondary, char *tertiary,
   1099                           UErrorCode *status)
   1100 {
   1101     if (U_SUCCESS(*status)) {
   1102         uint32_t  value    = 0;
   1103         char      primsave = '\0';
   1104         char      secsave  = '\0';
   1105         char      tersave  = '\0';
   1106         char     *primend  = primary+4;
   1107         char     *secend   = secondary+2;
   1108         char     *terend   = tertiary+2;
   1109         uint32_t  primvalue;
   1110         uint32_t  secvalue;
   1111         uint32_t  tervalue;
   1112 
   1113         if (uprv_strlen(primary) > 4) {
   1114             primsave = *primend;
   1115             *primend = '\0';
   1116         }
   1117 
   1118         if (uprv_strlen(secondary) > 2) {
   1119             secsave = *secend;
   1120             *secend = '\0';
   1121         }
   1122 
   1123         if (uprv_strlen(tertiary) > 2) {
   1124             tersave = *terend;
   1125             *terend = '\0';
   1126         }
   1127 
   1128         primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
   1129         secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
   1130         tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
   1131         if(primvalue <= 0xFF) {
   1132           primvalue <<= 8;
   1133         }
   1134 
   1135         value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
   1136            | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
   1137            | (tervalue & UCOL_TERTIARYORDERMASK);
   1138 
   1139         if(primsave!='\0') {
   1140             *primend = primsave;
   1141         }
   1142         if(secsave!='\0') {
   1143             *secend = secsave;
   1144         }
   1145         if(tersave!='\0') {
   1146             *terend = tersave;
   1147         }
   1148         return value;
   1149     }
   1150     return 0;
   1151 }
   1152 
   1153 /**
   1154 * Getting collation elements generated from a string
   1155 * @param str character string contain collation elements contained in [] and
   1156 *        seperated by space
   1157 * @param ce array for storage, assuming size > 20
   1158 * @param status error status
   1159 * @return position at the end of the codepoint section
   1160 */
   1161 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
   1162     char       *pStartCP     = uprv_strchr(str, '[');
   1163     int         count        = 0;
   1164     char       *pEndCP;
   1165     char        primary[100];
   1166     char        secondary[100];
   1167     char        tertiary[100];
   1168 
   1169     while (*pStartCP == '[') {
   1170         uint32_t primarycount   = 0;
   1171         uint32_t secondarycount = 0;
   1172         uint32_t tertiarycount  = 0;
   1173         uint32_t CEi = 1;
   1174         pEndCP = strchr(pStartCP, ']');
   1175         if(pEndCP == NULL) {
   1176             break;
   1177         }
   1178         pStartCP ++;
   1179 
   1180         primarycount   = readElement(&pStartCP, primary, ',', status);
   1181         secondarycount = readElement(&pStartCP, secondary, ',', status);
   1182         tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
   1183 
   1184         /* I want to get the CEs entered right here, including continuation */
   1185         ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
   1186         if (U_FAILURE(*status)) {
   1187             break;
   1188         }
   1189 
   1190         while (2 * CEi < primarycount || CEi < secondarycount ||
   1191                CEi < tertiarycount) {
   1192             uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
   1193             if (2 * CEi < primarycount) {
   1194                 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
   1195                 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
   1196             }
   1197 
   1198             if (2 * CEi + 1 < primarycount) {
   1199                 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
   1200                 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
   1201             }
   1202 
   1203             if (CEi < secondarycount) {
   1204                 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
   1205                 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
   1206             }
   1207 
   1208             if (CEi < tertiarycount) {
   1209                 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
   1210                 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
   1211             }
   1212 
   1213             CEi ++;
   1214             ces[count ++] = value;
   1215         }
   1216 
   1217       pStartCP = pEndCP + 1;
   1218     }
   1219     ces[count] = 0;
   1220     return pStartCP;
   1221 }
   1222 
   1223 /**
   1224 * Getting the FractionalUCA.txt file stream
   1225 */
   1226 static FileStream * getFractionalUCA(void)
   1227 {
   1228     char        newPath[256];
   1229     char        backupPath[256];
   1230     FileStream *result = NULL;
   1231 
   1232     /* Look inside ICU_DATA first */
   1233     uprv_strcpy(newPath, ctest_dataSrcDir());
   1234     uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
   1235     uprv_strcat(newPath, "FractionalUCA.txt");
   1236 
   1237     /* As a fallback, try to guess where the source data was located
   1238      *   at the time ICU was built, and look there.
   1239      */
   1240 #if defined (U_TOPSRCDIR)
   1241     strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
   1242 #else
   1243     {
   1244         UErrorCode errorCode = U_ZERO_ERROR;
   1245         strcpy(backupPath, loadTestData(&errorCode));
   1246         strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
   1247     }
   1248 #endif
   1249     strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
   1250 
   1251     result = T_FileStream_open(newPath, "rb");
   1252 
   1253     if (result == NULL) {
   1254         result = T_FileStream_open(backupPath, "rb");
   1255         if (result == NULL) {
   1256             log_err("Failed to open either %s or %s\n", newPath, backupPath);
   1257         }
   1258     }
   1259     return result;
   1260 }
   1261 
   1262 /**
   1263 * Testing the CEs returned by the iterator
   1264 */
   1265 static void TestCEs() {
   1266     FileStream *file = NULL;
   1267     char        line[2048];
   1268     char       *str;
   1269     UChar       codepoints[10];
   1270     uint32_t    ces[20];
   1271     UErrorCode  status = U_ZERO_ERROR;
   1272     UCollator          *coll = ucol_open("", &status);
   1273     uint32_t lineNo = 0;
   1274     UChar       contextCPs[5];
   1275 
   1276     if (U_FAILURE(status)) {
   1277         log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
   1278         return;
   1279     }
   1280 
   1281     file = getFractionalUCA();
   1282 
   1283     if (file == NULL) {
   1284         log_err("*** unable to open input FractionalUCA.txt file ***\n");
   1285         return;
   1286     }
   1287 
   1288 
   1289     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1290         int                 count = 0;
   1291         UCollationElements *iter;
   1292         int32_t            preContextCeLen=0;
   1293         lineNo++;
   1294         /* skip this line if it is empty or a comment or is a return value
   1295         or start of some variable section */
   1296         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1297             line[0] == 0x000D || line[0] == '[') {
   1298             continue;
   1299         }
   1300 
   1301         str = getCodePoints(line, codepoints, contextCPs);
   1302 
   1303         /* these are 'fake' codepoints in the fractional UCA, and are used just
   1304          * for positioning of indirect values. They should not go through this
   1305          * test.
   1306          */
   1307         if(*codepoints == 0xFDD0) {
   1308           continue;
   1309         }
   1310         if (*contextCPs != 0) {
   1311             iter = ucol_openElements(coll, contextCPs, -1, &status);
   1312             if (U_FAILURE(status)) {
   1313                 log_err("Error in opening collation elements\n");
   1314                 break;
   1315             }
   1316             while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
   1317                 preContextCeLen++;
   1318             }
   1319             ucol_closeElements(iter);
   1320         }
   1321 
   1322         getCEs(str, ces+preContextCeLen, &status);
   1323         if (U_FAILURE(status)) {
   1324             log_err("Error in parsing collation elements in FractionalUCA.txt\n");
   1325             break;
   1326         }
   1327         iter = ucol_openElements(coll, codepoints, -1, &status);
   1328         if (U_FAILURE(status)) {
   1329             log_err("Error in opening collation elements\n");
   1330             break;
   1331         }
   1332         for (;;) {
   1333             uint32_t ce = (uint32_t)ucol_next(iter, &status);
   1334             if (ce == 0xFFFFFFFF) {
   1335                 ce = 0;
   1336             }
   1337             /* we now unconditionally reorder Thai/Lao prevowels, so this
   1338              * test would fail if we don't skip here.
   1339              */
   1340             if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
   1341               continue;
   1342             }
   1343             if (ce != ces[count] || U_FAILURE(status)) {
   1344                 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
   1345                 break;
   1346             }
   1347             if (ces[count] == 0) {
   1348                 break;
   1349             }
   1350             count ++;
   1351         }
   1352         ucol_closeElements(iter);
   1353     }
   1354 
   1355     T_FileStream_close(file);
   1356     ucol_close(coll);
   1357 }
   1358 
   1359 /**
   1360 * Testing the discontigous contractions
   1361 */
   1362 static void TestDiscontiguos() {
   1363     const char               *rulestr    =
   1364                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
   1365           UChar               rule[50];
   1366           int                 rulelen = u_unescape(rulestr, rule, 50);
   1367     const char               *src[] = {
   1368      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
   1369     /* base character blocked */
   1370      "XD\\u0300", "XD\\u0300\\u0315",
   1371     /* non blocking combining character */
   1372      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
   1373      /* blocking combining character */
   1374      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
   1375      /* contraction prefix */
   1376      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
   1377      "X\\u0300\\u031A\\u0315",
   1378      /* ends not with a contraction character */
   1379      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
   1380      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
   1381     };
   1382     const char               *tgt[] = {
   1383      /* non blocking combining character */
   1384      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
   1385     /* base character blocked */
   1386      "X D \\u0300", "X D \\u0300\\u0315",
   1387     /* non blocking combining character */
   1388      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
   1389      /* blocking combining character */
   1390      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
   1391      /* contraction prefix */
   1392      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
   1393      "X\\u0300 \\u031A \\u0315",
   1394      /* ends not with a contraction character */
   1395      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
   1396      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
   1397     };
   1398           int                 size   = 20;
   1399           UCollator          *coll;
   1400           UErrorCode          status    = U_ZERO_ERROR;
   1401           int                 count     = 0;
   1402           UCollationElements *iter;
   1403           UCollationElements *resultiter;
   1404 
   1405     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
   1406     iter       = ucol_openElements(coll, rule, 1, &status);
   1407     resultiter = ucol_openElements(coll, rule, 1, &status);
   1408 
   1409     if (U_FAILURE(status)) {
   1410         log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
   1411         return;
   1412     }
   1413 
   1414     while (count < size) {
   1415         UChar  str[20];
   1416         UChar  tstr[20];
   1417         int    strLen = u_unescape(src[count], str, 20);
   1418         UChar *s;
   1419 
   1420         ucol_setText(iter, str, strLen, &status);
   1421         if (U_FAILURE(status)) {
   1422             log_err("Error opening collation iterator\n");
   1423             return;
   1424         }
   1425 
   1426         u_unescape(tgt[count], tstr, 20);
   1427         s = tstr;
   1428 
   1429         log_verbose("count %d\n", count);
   1430 
   1431         for (;;) {
   1432             uint32_t  ce;
   1433             UChar    *e = u_strchr(s, 0x20);
   1434             if (e == 0) {
   1435                 e = u_strchr(s, 0);
   1436             }
   1437             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
   1438             ce = ucol_next(resultiter, &status);
   1439             if (U_FAILURE(status)) {
   1440                 log_err("Error manipulating collation iterator\n");
   1441                 return;
   1442             }
   1443             while (ce != UCOL_NULLORDER) {
   1444                 if (ce != (uint32_t)ucol_next(iter, &status) ||
   1445                     U_FAILURE(status)) {
   1446                     log_err("Discontiguos contraction test mismatch\n");
   1447                     return;
   1448                 }
   1449                 ce = ucol_next(resultiter, &status);
   1450                 if (U_FAILURE(status)) {
   1451                     log_err("Error getting next collation element\n");
   1452                     return;
   1453                 }
   1454             }
   1455             s = e + 1;
   1456             if (*e == 0) {
   1457                 break;
   1458             }
   1459         }
   1460         ucol_reset(iter);
   1461         backAndForth(iter);
   1462         count ++;
   1463     }
   1464     ucol_closeElements(resultiter);
   1465     ucol_closeElements(iter);
   1466     ucol_close(coll);
   1467 }
   1468 
   1469 static void TestCEBufferOverflow()
   1470 {
   1471     UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
   1472     UErrorCode          status = U_ZERO_ERROR;
   1473     UChar               rule[10];
   1474     UCollator          *coll;
   1475     UCollationElements *iter;
   1476 
   1477     u_uastrcpy(rule, "&z < AB");
   1478     coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
   1479     if (U_FAILURE(status)) {
   1480         log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
   1481         return;
   1482     }
   1483 
   1484     /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
   1485     test. this will cause an overflow in getPrev */
   1486     str[0] = 0x0041;    /* 'A' */
   1487     /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
   1488     uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
   1489     str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
   1490     iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
   1491                              &status);
   1492     if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
   1493         status == U_BUFFER_OVERFLOW_ERROR) {
   1494         log_err("CE buffer should not overflow with long string of trail surrogates\n");
   1495     }
   1496     ucol_closeElements(iter);
   1497     ucol_close(coll);
   1498 }
   1499 
   1500 /**
   1501 * Checking collation element validity.
   1502 */
   1503 #define MAX_CODEPOINTS_TO_SHOW 10
   1504 static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
   1505     int i, lengthToUse = length;
   1506     if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
   1507         lengthToUse = MAX_CODEPOINTS_TO_SHOW;
   1508     }
   1509     for (i = 0; i < lengthToUse; ++i) {
   1510         int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
   1511         if (bytesWritten <= 0) {
   1512             break;
   1513         }
   1514         codepointText += bytesWritten;
   1515     }
   1516     if (i < length) {
   1517         sprintf(codepointText, " ...");
   1518     }
   1519 }
   1520 
   1521 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
   1522                              int length)
   1523 {
   1524     UErrorCode          status = U_ZERO_ERROR;
   1525     UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
   1526                                                   &status);
   1527     UBool result = FALSE;
   1528     UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
   1529     const char * collLocale;
   1530 
   1531     if (U_FAILURE(status)) {
   1532         log_err("Error creating iterator for testing validity\n");
   1533         return FALSE;
   1534     }
   1535     collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
   1536     if (U_FAILURE(status) || collLocale==NULL) {
   1537         status = U_ZERO_ERROR;
   1538         collLocale = "?";
   1539     }
   1540 
   1541     for (;;) {
   1542         uint32_t ce = ucol_next(iter, &status);
   1543         uint32_t primary, p1, p2, secondary, tertiary;
   1544         if (ce == UCOL_NULLORDER) {
   1545             result = TRUE;
   1546             break;
   1547         }
   1548         if (ce == 0) {
   1549             continue;
   1550         }
   1551         if (ce == 0x02000202) {
   1552             /* special CE for merge-sort character */
   1553             if (*codepoints == 0xFFFE /* && length == 1 */) {
   1554                 /*
   1555                  * Note: We should check for length==1 but the token parser appears
   1556                  * to give us trailing NUL characters.
   1557                  * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
   1558                  *                     rather than the internal collation rule parser
   1559                  */
   1560                 continue;
   1561             } else {
   1562                 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
   1563                         (int)*codepoints, (int)length);
   1564                 break;
   1565             }
   1566         }
   1567         primary   = UCOL_PRIMARYORDER(ce);
   1568         p1 = primary >> 8;
   1569         p2 = primary & 0xFF;
   1570         secondary = UCOL_SECONDARYORDER(ce);
   1571         tertiary  = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
   1572 
   1573         if (!isContinuation(ce)) {
   1574             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
   1575                 log_err("Empty CE %08lX except for case bits\n", (long)ce);
   1576                 break;
   1577             }
   1578             if (p1 == 0) {
   1579                 if (p2 != 0) {
   1580                     log_err("Primary 00 xx in %08lX\n", (long)ce);
   1581                     break;
   1582                 }
   1583                 primaryDone = TRUE;
   1584             } else {
   1585                 if (p1 <= 2 || p1 >= 0xF0) {
   1586                     /* Primary first bytes F0..FF are specials. */
   1587                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
   1588                     break;
   1589                 }
   1590                 if (p2 == 0) {
   1591                     primaryDone = TRUE;
   1592                 } else {
   1593                     if (p2 <= 3 || p2 >= 0xFF) {
   1594                         /* Primary second bytes 03 and FF are sort key compression terminators. */
   1595                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
   1596                         break;
   1597                     }
   1598                     primaryDone = FALSE;
   1599                 }
   1600             }
   1601             if (secondary == 0) {
   1602                 if (primary != 0) {
   1603                     log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
   1604                     break;
   1605                 }
   1606                 secondaryDone = TRUE;
   1607             } else {
   1608                 if (secondary <= 2 ||
   1609                     (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
   1610                 ) {
   1611                     /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
   1612                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
   1613                     break;
   1614                 }
   1615                 secondaryDone = FALSE;
   1616             }
   1617             if (tertiary == 0) {
   1618                 /* We know that ce != 0. */
   1619                 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
   1620                 break;
   1621             }
   1622             if (tertiary <= 2) {
   1623                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
   1624                 break;
   1625             }
   1626             tertiaryDone = FALSE;
   1627         } else {
   1628             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
   1629                 log_err("Empty continuation %08lX\n", (long)ce);
   1630                 break;
   1631             }
   1632             if (primaryDone && primary != 0) {
   1633                 log_err("Primary was done but continues in %08lX\n", (long)ce);
   1634                 break;
   1635             }
   1636             if (p1 == 0) {
   1637                 if (p2 != 0) {
   1638                     log_err("Primary 00 xx in %08lX\n", (long)ce);
   1639                     break;
   1640                 }
   1641                 primaryDone = TRUE;
   1642             } else {
   1643                 if (p1 <= 2) {
   1644                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
   1645                     break;
   1646                 }
   1647                 if (p2 == 0) {
   1648                     primaryDone = TRUE;
   1649                 } else {
   1650                     if (p2 <= 3) {
   1651                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
   1652                         break;
   1653                     }
   1654                 }
   1655             }
   1656             if (secondaryDone && secondary != 0) {
   1657                 log_err("Secondary was done but continues in %08lX\n", (long)ce);
   1658                 break;
   1659             }
   1660             if (secondary == 0) {
   1661                 secondaryDone = TRUE;
   1662             } else {
   1663                 if (secondary <= 2) {
   1664                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
   1665                     break;
   1666                 }
   1667             }
   1668             if (tertiaryDone && tertiary != 0) {
   1669                 log_err("Tertiary was done but continues in %08lX\n", (long)ce);
   1670                 break;
   1671             }
   1672             if (tertiary == 0) {
   1673                 tertiaryDone = TRUE;
   1674             } else if (tertiary <= 2) {
   1675                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
   1676                 break;
   1677             }
   1678         }
   1679     }
   1680     if (!result) {
   1681         char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
   1682         showCodepoints(codepoints, length, codepointText);
   1683         log_err("Locale: %s  Code point string: %s\n", collLocale, codepointText);
   1684     }
   1685     ucol_closeElements(iter);
   1686     return result;
   1687 }
   1688 
   1689 static void TestCEValidity()
   1690 {
   1691     /* testing UCA collation elements */
   1692     UErrorCode  status      = U_ZERO_ERROR;
   1693     /* en_US has no tailorings */
   1694     UCollator  *coll        = ucol_open("root", &status);
   1695     /* tailored locales */
   1696     char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
   1697     const char *loc;
   1698     FileStream *file = NULL;
   1699     char        line[2048];
   1700     UChar       codepoints[11];
   1701     int         count = 0;
   1702     int         maxCount = 0;
   1703     UChar       contextCPs[3];
   1704     UChar32     c;
   1705     UParseError parseError;
   1706     if (U_FAILURE(status)) {
   1707         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
   1708         return;
   1709     }
   1710     log_verbose("Testing UCA elements\n");
   1711     file = getFractionalUCA();
   1712     if (file == NULL) {
   1713         log_err("Fractional UCA data can not be opened\n");
   1714         return;
   1715     }
   1716 
   1717     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1718         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1719             line[0] == 0x000D || line[0] == '[') {
   1720             continue;
   1721         }
   1722 
   1723         getCodePoints(line, codepoints, contextCPs);
   1724         checkCEValidity(coll, codepoints, u_strlen(codepoints));
   1725     }
   1726 
   1727     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
   1728     for (c = 0; c <= 0xffff; ++c) {
   1729         if (u_isdefined(c)) {
   1730             codepoints[0] = (UChar)c;
   1731             checkCEValidity(coll, codepoints, 1);
   1732         }
   1733     }
   1734     for (; c <= 0x10ffff; ++c) {
   1735         if (u_isdefined(c)) {
   1736             int32_t i = 0;
   1737             U16_APPEND_UNSAFE(codepoints, i, c);
   1738             checkCEValidity(coll, codepoints, i);
   1739         }
   1740     }
   1741 
   1742     ucol_close(coll);
   1743 
   1744     /* testing tailored collation elements */
   1745     log_verbose("Testing tailored elements\n");
   1746     if(getTestOption(QUICK_OPTION)) {
   1747         maxCount = sizeof(locale)/sizeof(locale[0]);
   1748     } else {
   1749         maxCount = uloc_countAvailable();
   1750     }
   1751     while (count < maxCount) {
   1752         const UChar *rules = NULL,
   1753                     *current = NULL;
   1754         UChar *rulesCopy = NULL;
   1755         int32_t ruleLen = 0;
   1756 
   1757         uint32_t chOffset = 0;
   1758         uint32_t chLen = 0;
   1759         uint32_t exOffset = 0;
   1760         uint32_t exLen = 0;
   1761         uint32_t prefixOffset = 0;
   1762         uint32_t prefixLen = 0;
   1763         UBool    startOfRules = TRUE;
   1764         UColOptionSet opts;
   1765 
   1766         UColTokenParser src;
   1767         uint32_t strength = 0;
   1768         uint16_t specs = 0;
   1769         if(getTestOption(QUICK_OPTION)) {
   1770             loc = locale[count];
   1771         } else {
   1772             loc = uloc_getAvailable(count);
   1773             if(!hasCollationElements(loc)) {
   1774                 count++;
   1775                 continue;
   1776             }
   1777         }
   1778 
   1779         uprv_memset(&src, 0, sizeof(UColTokenParser));
   1780 
   1781         log_verbose("Testing CEs for %s\n", loc);
   1782 
   1783         coll      = ucol_open(loc, &status);
   1784         if (U_FAILURE(status)) {
   1785             log_err("%s collator creation failed\n", loc);
   1786             return;
   1787         }
   1788 
   1789         src.opts = &opts;
   1790         rules = ucol_getRules(coll, &ruleLen);
   1791 
   1792         if (ruleLen > 0) {
   1793             rulesCopy = (UChar *)uprv_malloc((ruleLen +
   1794                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
   1795             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
   1796             src.current = src.source = rulesCopy;
   1797             src.end = rulesCopy + ruleLen;
   1798             src.extraCurrent = src.end;
   1799             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1800 
   1801 	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   1802 	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   1803             while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
   1804               strength = src.parsedToken.strength;
   1805               chOffset = src.parsedToken.charsOffset;
   1806               chLen = src.parsedToken.charsLen;
   1807               exOffset = src.parsedToken.extensionOffset;
   1808               exLen = src.parsedToken.extensionLen;
   1809               prefixOffset = src.parsedToken.prefixOffset;
   1810               prefixLen = src.parsedToken.prefixLen;
   1811               specs = src.parsedToken.flags;
   1812 
   1813                 startOfRules = FALSE;
   1814                 uprv_memcpy(codepoints, src.source + chOffset,
   1815                                                        chLen * sizeof(UChar));
   1816                 codepoints[chLen] = 0;
   1817                 checkCEValidity(coll, codepoints, chLen);
   1818             }
   1819             uprv_free(src.source);
   1820         }
   1821 
   1822         ucol_close(coll);
   1823         count ++;
   1824     }
   1825     T_FileStream_close(file);
   1826 }
   1827 
   1828 static void printSortKeyError(const UChar   *codepoints, int length,
   1829                                     uint8_t *sortkey, int sklen)
   1830 {
   1831     int count = 0;
   1832     log_err("Sortkey not valid for ");
   1833     while (length > 0) {
   1834         log_err("0x%04x ", *codepoints);
   1835         length --;
   1836         codepoints ++;
   1837     }
   1838     log_err("\nSortkey : ");
   1839     while (count < sklen) {
   1840         log_err("0x%02x ", sortkey[count]);
   1841         count ++;
   1842     }
   1843     log_err("\n");
   1844 }
   1845 
   1846 /**
   1847 * Checking sort key validity for all levels
   1848 */
   1849 static UBool checkSortKeyValidity(UCollator *coll,
   1850                                   const UChar *codepoints,
   1851                                   int length)
   1852 {
   1853     UErrorCode status  = U_ZERO_ERROR;
   1854     UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
   1855                                       UCOL_TERTIARY, UCOL_QUATERNARY,
   1856                                       UCOL_IDENTICAL};
   1857     int        strengthlen = 5;
   1858     int        strengthIndex = 0;
   1859     int        caselevel   = 0;
   1860 
   1861     while (caselevel < 1) {
   1862         if (caselevel == 0) {
   1863             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
   1864         }
   1865         else {
   1866             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
   1867         }
   1868 
   1869         while (strengthIndex < strengthlen) {
   1870             int        count01 = 0;
   1871             uint32_t   count   = 0;
   1872             uint8_t    sortkey[128];
   1873             uint32_t   sklen;
   1874 
   1875             ucol_setStrength(coll, strength[strengthIndex]);
   1876             sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
   1877             while (sortkey[count] != 0) {
   1878                 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
   1879                     printSortKeyError(codepoints, length, sortkey, sklen);
   1880                     return FALSE;
   1881                 }
   1882                 if (sortkey[count] == 1) {
   1883                     count01 ++;
   1884                 }
   1885                 count ++;
   1886             }
   1887 
   1888             if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
   1889                 printSortKeyError(codepoints, length, sortkey, sklen);
   1890                 return FALSE;
   1891             }
   1892             strengthIndex ++;
   1893         }
   1894         caselevel ++;
   1895     }
   1896     return TRUE;
   1897 }
   1898 
   1899 static void TestSortKeyValidity(void)
   1900 {
   1901     /* testing UCA collation elements */
   1902     UErrorCode  status      = U_ZERO_ERROR;
   1903     /* en_US has no tailorings */
   1904     UCollator  *coll        = ucol_open("en_US", &status);
   1905     /* tailored locales */
   1906     char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
   1907     FileStream *file = NULL;
   1908     char        line[2048];
   1909     UChar       codepoints[10];
   1910     int         count = 0;
   1911     UChar       contextCPs[5];
   1912     UParseError parseError;
   1913     if (U_FAILURE(status)) {
   1914         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
   1915         return;
   1916     }
   1917     log_verbose("Testing UCA elements\n");
   1918     file = getFractionalUCA();
   1919     if (file == NULL) {
   1920         log_err("Fractional UCA data can not be opened\n");
   1921         return;
   1922     }
   1923 
   1924     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1925         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1926             line[0] == 0x000D || line[0] == '[') {
   1927             continue;
   1928         }
   1929 
   1930         getCodePoints(line, codepoints, contextCPs);
   1931         if(codepoints[0] == 0xFFFE) {
   1932             /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
   1933             continue;
   1934         }
   1935         checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
   1936     }
   1937 
   1938     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
   1939     codepoints[0] = 0;
   1940 
   1941     while (codepoints[0] < 0xFFFF) {
   1942         if (u_isdefined((UChar32)codepoints[0])) {
   1943             checkSortKeyValidity(coll, codepoints, 1);
   1944         }
   1945         codepoints[0] ++;
   1946     }
   1947 
   1948     ucol_close(coll);
   1949 
   1950     /* testing tailored collation elements */
   1951     log_verbose("Testing tailored elements\n");
   1952     while (count < 5) {
   1953         const UChar *rules = NULL,
   1954                     *current = NULL;
   1955         UChar *rulesCopy = NULL;
   1956         int32_t ruleLen = 0;
   1957 
   1958         uint32_t chOffset = 0;
   1959         uint32_t chLen = 0;
   1960         uint32_t exOffset = 0;
   1961         uint32_t exLen = 0;
   1962         uint32_t prefixOffset = 0;
   1963         uint32_t prefixLen = 0;
   1964         UBool    startOfRules = TRUE;
   1965         UColOptionSet opts;
   1966 
   1967         UColTokenParser src;
   1968         uint32_t strength = 0;
   1969         uint16_t specs = 0;
   1970 
   1971         uprv_memset(&src, 0, sizeof(UColTokenParser));
   1972 
   1973         coll      = ucol_open(locale[count], &status);
   1974         if (U_FAILURE(status)) {
   1975             log_err("%s collator creation failed\n", locale[count]);
   1976             return;
   1977         }
   1978 
   1979         src.opts = &opts;
   1980         rules = ucol_getRules(coll, &ruleLen);
   1981 
   1982         if (ruleLen > 0) {
   1983             rulesCopy = (UChar *)uprv_malloc((ruleLen +
   1984                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
   1985             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
   1986             src.current = src.source = rulesCopy;
   1987             src.end = rulesCopy + ruleLen;
   1988             src.extraCurrent = src.end;
   1989             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1990 
   1991 	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   1992 	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   1993             while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
   1994                 strength = src.parsedToken.strength;
   1995                 chOffset = src.parsedToken.charsOffset;
   1996                 chLen = src.parsedToken.charsLen;
   1997                 exOffset = src.parsedToken.extensionOffset;
   1998                 exLen = src.parsedToken.extensionLen;
   1999                 prefixOffset = src.parsedToken.prefixOffset;
   2000                 prefixLen = src.parsedToken.prefixLen;
   2001                 specs = src.parsedToken.flags;
   2002 
   2003                 startOfRules = FALSE;
   2004                 uprv_memcpy(codepoints, src.source + chOffset,
   2005                                                        chLen * sizeof(UChar));
   2006                 codepoints[chLen] = 0;
   2007                 if(codepoints[0] == 0xFFFE) {
   2008                     /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
   2009                     continue;
   2010                 }
   2011                 checkSortKeyValidity(coll, codepoints, chLen);
   2012             }
   2013             uprv_free(src.source);
   2014         }
   2015 
   2016         ucol_close(coll);
   2017         count ++;
   2018     }
   2019     T_FileStream_close(file);
   2020 }
   2021 
   2022 /**
   2023 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
   2024 * normalization on AND jamo tailoring, among other things.
   2025 */
   2026 static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
   2027     0x0020, 0xAC00,                 /* simple LV Hangul */
   2028     0x0020, 0xAC01,                 /* simple LVT Hangul */
   2029     0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
   2030     0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
   2031     0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
   2032     0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
   2033     0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
   2034     0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
   2035     0x0020, 0x00E6,                 /* small letter ae, expands */
   2036     0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
   2037     0x0020
   2038 };
   2039 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
   2040 
   2041 static const int32_t rootStandardOffsets[] = {
   2042     0,  1,2,
   2043     2,  3,4,4,
   2044     4,  5,6,6,
   2045     6,  7,8,8,
   2046     8,  9,10,11,
   2047     12, 13,14,15,
   2048     16, 17,18,19,
   2049     20, 21,22,23,
   2050     24, 25,26,26,26,
   2051     26, 27,28,28,
   2052     28,
   2053     29
   2054 };
   2055 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
   2056 
   2057 static const int32_t rootSearchOffsets[] = {
   2058     0,  1,2,
   2059     2,  3,4,4,
   2060     4,  5,6,6,6,
   2061     6,  7,8,8,8,8,8,8,
   2062     8,  9,10,11,
   2063     12, 13,14,15,
   2064     16, 17,18,19,20,
   2065     20, 21,22,22,23,23,23,24,
   2066     24, 25,26,26,26,
   2067     26, 27,28,28,
   2068     28,
   2069     29
   2070 };
   2071 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
   2072 
   2073 typedef struct {
   2074     const char *    locale;
   2075     const int32_t * offsets;
   2076     int32_t         offsetsLen;
   2077 } TSCEItem;
   2078 
   2079 static const TSCEItem tsceItems[] = {
   2080     { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
   2081     { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
   2082     { NULL,                    NULL,                0                        }
   2083 };
   2084 
   2085 static void TestSearchCollatorElements(void)
   2086 {
   2087     const TSCEItem * tsceItemPtr;
   2088     for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
   2089         UErrorCode status = U_ZERO_ERROR;
   2090         UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
   2091         if ( U_SUCCESS(status) ) {
   2092             UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
   2093             if ( U_SUCCESS(status) ) {
   2094                 int32_t offset, element;
   2095                 const int32_t * nextOffsetPtr;
   2096                 const int32_t * limitOffsetPtr;
   2097 
   2098                 nextOffsetPtr = tsceItemPtr->offsets;
   2099                 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
   2100                 do {
   2101                     offset = ucol_getOffset(uce);
   2102                     element = ucol_next(uce, &status);
   2103                     if ( element == 0 ) {
   2104                         log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
   2105                     }
   2106                     if ( nextOffsetPtr < limitOffsetPtr ) {
   2107                         if (offset != *nextOffsetPtr) {
   2108                             log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
   2109                                                             tsceItemPtr->locale, *nextOffsetPtr, offset );
   2110                             nextOffsetPtr = limitOffsetPtr;
   2111                             break;
   2112                         }
   2113                         nextOffsetPtr++;
   2114                     } else {
   2115                         log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
   2116                     }
   2117                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
   2118                 if ( nextOffsetPtr < limitOffsetPtr ) {
   2119                     log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
   2120                 }
   2121 
   2122                 ucol_setOffset(uce, kLen_tsceText, &status);
   2123                 status = U_ZERO_ERROR;
   2124                 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
   2125                 limitOffsetPtr = tsceItemPtr->offsets;
   2126                 do {
   2127                     offset = ucol_getOffset(uce);
   2128                     element = ucol_previous(uce, &status);
   2129                     if ( element == 0 ) {
   2130                         log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
   2131                     }
   2132                     if ( nextOffsetPtr > limitOffsetPtr ) {
   2133                         nextOffsetPtr--;
   2134                         if (offset != *nextOffsetPtr) {
   2135                             log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
   2136                                                                 tsceItemPtr->locale, *nextOffsetPtr, offset );
   2137                             nextOffsetPtr = limitOffsetPtr;
   2138                             break;
   2139                         }
   2140                    } else {
   2141                         log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
   2142                     }
   2143                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
   2144                 if ( nextOffsetPtr > limitOffsetPtr ) {
   2145                     log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
   2146                 }
   2147 
   2148                 ucol_closeElements(uce);
   2149             } else {
   2150                 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
   2151             }
   2152             ucol_close(ucol);
   2153         } else {
   2154             log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
   2155         }
   2156     }
   2157 }
   2158 
   2159 #endif /* #if !UCONFIG_NO_COLLATION */
   2160