Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2010, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /********************************************************************************
      7 *
      8 * File CITERTST.C
      9 *
     10 * Modification History:
     11 * Date      Name               Description
     12 *           Madhu Katragadda   Ported for C API
     13 * 02/19/01  synwee             Modified test case for new collation iterator
     14 *********************************************************************************/
     15 /*
     16  * Collation Iterator tests.
     17  * (Let me reiterate my position...)
     18  */
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_COLLATION
     23 
     24 #include "unicode/ucol.h"
     25 #include "unicode/uloc.h"
     26 #include "unicode/uchar.h"
     27 #include "unicode/ustring.h"
     28 #include "unicode/putil.h"
     29 #include "callcoll.h"
     30 #include "cmemory.h"
     31 #include "cintltst.h"
     32 #include "citertst.h"
     33 #include "ccolltst.h"
     34 #include "filestrm.h"
     35 #include "cstring.h"
     36 #include "ucol_imp.h"
     37 #include "ucol_tok.h"
     38 #include <stdio.h>
     39 
     40 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
     41 
     42 void addCollIterTest(TestNode** root)
     43 {
     44     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
     45     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
     46     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
     47     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
     48     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
     49     addTest(root, &TestNormalizedUnicodeChar,
     50                                 "tscoll/citertst/TestNormalizedUnicodeChar");
     51     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
     52     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
     53     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
     54     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
     55     addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
     56     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
     57     addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
     58     addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
     59     addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
     60 }
     61 
     62 /* The locales we support */
     63 
     64 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
     65 
     66 static void TestBug672() {
     67     UErrorCode  status = U_ZERO_ERROR;
     68     UChar       pattern[20];
     69     UChar       text[50];
     70     int         i;
     71     int         result[3][3];
     72 
     73     u_uastrcpy(pattern, "resume");
     74     u_uastrcpy(text, "Time to resume updating my resume.");
     75 
     76     for (i = 0; i < 3; ++ i) {
     77         UCollator          *coll = ucol_open(LOCALES[i], &status);
     78         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
     79                                                      &status);
     80         UCollationElements *titer = ucol_openElements(coll, text, -1,
     81                                                      &status);
     82         if (U_FAILURE(status)) {
     83             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
     84                     myErrorName(status));
     85             return;
     86         }
     87 
     88         log_verbose("locale tested %s\n", LOCALES[i]);
     89 
     90         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
     91                U_SUCCESS(status)) {
     92         }
     93         if (U_FAILURE(status)) {
     94             log_err("ERROR: reversing collation iterator :%s\n",
     95                     myErrorName(status));
     96             return;
     97         }
     98         ucol_reset(pitr);
     99 
    100         ucol_setOffset(titer, u_strlen(pattern), &status);
    101         if (U_FAILURE(status)) {
    102             log_err("ERROR: setting offset in collator :%s\n",
    103                     myErrorName(status));
    104             return;
    105         }
    106         result[i][0] = ucol_getOffset(titer);
    107         log_verbose("Text iterator set to offset %d\n", result[i][0]);
    108 
    109         /* Use previous() */
    110         ucol_previous(titer, &status);
    111         result[i][1] = ucol_getOffset(titer);
    112         log_verbose("Current offset %d after previous\n", result[i][1]);
    113 
    114         /* Add one to index */
    115         log_verbose("Adding one to current offset...\n");
    116         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
    117         if (U_FAILURE(status)) {
    118             log_err("ERROR: setting offset in collator :%s\n",
    119                     myErrorName(status));
    120             return;
    121         }
    122         result[i][2] = ucol_getOffset(titer);
    123         log_verbose("Current offset in text = %d\n", result[i][2]);
    124         ucol_closeElements(pitr);
    125         ucol_closeElements(titer);
    126         ucol_close(coll);
    127     }
    128 
    129     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
    130         uprv_memcmp(result[1], result[2], 3) != 0) {
    131         log_err("ERROR: Different locales have different offsets at the same character\n");
    132     }
    133 }
    134 
    135 
    136 
    137 /*  Running this test with normalization enabled showed up a bug in the incremental
    138     normalization code. */
    139 static void TestBug672Normalize() {
    140     UErrorCode  status = U_ZERO_ERROR;
    141     UChar       pattern[20];
    142     UChar       text[50];
    143     int         i;
    144     int         result[3][3];
    145 
    146     u_uastrcpy(pattern, "resume");
    147     u_uastrcpy(text, "Time to resume updating my resume.");
    148 
    149     for (i = 0; i < 3; ++ i) {
    150         UCollator          *coll = ucol_open(LOCALES[i], &status);
    151         UCollationElements *pitr = NULL;
    152         UCollationElements *titer = NULL;
    153 
    154         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    155 
    156         pitr = ucol_openElements(coll, pattern, -1, &status);
    157         titer = ucol_openElements(coll, text, -1, &status);
    158         if (U_FAILURE(status)) {
    159             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
    160                     myErrorName(status));
    161             return;
    162         }
    163 
    164         log_verbose("locale tested %s\n", LOCALES[i]);
    165 
    166         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
    167                U_SUCCESS(status)) {
    168         }
    169         if (U_FAILURE(status)) {
    170             log_err("ERROR: reversing collation iterator :%s\n",
    171                     myErrorName(status));
    172             return;
    173         }
    174         ucol_reset(pitr);
    175 
    176         ucol_setOffset(titer, u_strlen(pattern), &status);
    177         if (U_FAILURE(status)) {
    178             log_err("ERROR: setting offset in collator :%s\n",
    179                     myErrorName(status));
    180             return;
    181         }
    182         result[i][0] = ucol_getOffset(titer);
    183         log_verbose("Text iterator set to offset %d\n", result[i][0]);
    184 
    185         /* Use previous() */
    186         ucol_previous(titer, &status);
    187         result[i][1] = ucol_getOffset(titer);
    188         log_verbose("Current offset %d after previous\n", result[i][1]);
    189 
    190         /* Add one to index */
    191         log_verbose("Adding one to current offset...\n");
    192         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
    193         if (U_FAILURE(status)) {
    194             log_err("ERROR: setting offset in collator :%s\n",
    195                     myErrorName(status));
    196             return;
    197         }
    198         result[i][2] = ucol_getOffset(titer);
    199         log_verbose("Current offset in text = %d\n", result[i][2]);
    200         ucol_closeElements(pitr);
    201         ucol_closeElements(titer);
    202         ucol_close(coll);
    203     }
    204 
    205     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
    206         uprv_memcmp(result[1], result[2], 3) != 0) {
    207         log_err("ERROR: Different locales have different offsets at the same character\n");
    208     }
    209 }
    210 
    211 
    212 
    213 
    214 /**
    215  * Test for CollationElementIterator previous and next for the whole set of
    216  * unicode characters.
    217  */
    218 static void TestUnicodeChar()
    219 {
    220     UChar source[0x100];
    221     UCollator *en_us;
    222     UCollationElements *iter;
    223     UErrorCode status = U_ZERO_ERROR;
    224     UChar codepoint;
    225 
    226     UChar *test;
    227     en_us = ucol_open("en_US", &status);
    228     if (U_FAILURE(status)){
    229        log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
    230               myErrorName(status));
    231        return;
    232     }
    233 
    234     for (codepoint = 1; codepoint < 0xFFFE;)
    235     {
    236       test = source;
    237 
    238       while (codepoint % 0xFF != 0)
    239       {
    240         if (u_isdefined(codepoint))
    241           *(test ++) = codepoint;
    242         codepoint ++;
    243       }
    244 
    245       if (u_isdefined(codepoint))
    246         *(test ++) = codepoint;
    247 
    248       if (codepoint != 0xFFFF)
    249         codepoint ++;
    250 
    251       *test = 0;
    252       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
    253       if(U_FAILURE(status)){
    254           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    255               myErrorName(status));
    256           ucol_close(en_us);
    257           return;
    258       }
    259       /* A basic test to see if it's working at all */
    260       log_verbose("codepoint testing %x\n", codepoint);
    261       backAndForth(iter);
    262       ucol_closeElements(iter);
    263 
    264       /* null termination test */
    265       iter=ucol_openElements(en_us, source, -1, &status);
    266       if(U_FAILURE(status)){
    267           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    268               myErrorName(status));
    269           ucol_close(en_us);
    270           return;
    271       }
    272       /* A basic test to see if it's working at all */
    273       backAndForth(iter);
    274       ucol_closeElements(iter);
    275     }
    276 
    277     ucol_close(en_us);
    278 }
    279 
    280 /**
    281  * Test for CollationElementIterator previous and next for the whole set of
    282  * unicode characters with normalization on.
    283  */
    284 static void TestNormalizedUnicodeChar()
    285 {
    286     UChar source[0x100];
    287     UCollator *th_th;
    288     UCollationElements *iter;
    289     UErrorCode status = U_ZERO_ERROR;
    290     UChar codepoint;
    291 
    292     UChar *test;
    293     /* thai should have normalization on */
    294     th_th = ucol_open("th_TH", &status);
    295     if (U_FAILURE(status)){
    296         log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
    297               myErrorName(status));
    298         return;
    299     }
    300 
    301     for (codepoint = 1; codepoint < 0xFFFE;)
    302     {
    303       test = source;
    304 
    305       while (codepoint % 0xFF != 0)
    306       {
    307         if (u_isdefined(codepoint))
    308           *(test ++) = codepoint;
    309         codepoint ++;
    310       }
    311 
    312       if (u_isdefined(codepoint))
    313         *(test ++) = codepoint;
    314 
    315       if (codepoint != 0xFFFF)
    316         codepoint ++;
    317 
    318       *test = 0;
    319       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
    320       if(U_FAILURE(status)){
    321           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    322               myErrorName(status));
    323             ucol_close(th_th);
    324           return;
    325       }
    326 
    327       backAndForth(iter);
    328       ucol_closeElements(iter);
    329 
    330       iter=ucol_openElements(th_th, source, -1, &status);
    331       if(U_FAILURE(status)){
    332           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    333               myErrorName(status));
    334             ucol_close(th_th);
    335           return;
    336       }
    337 
    338       backAndForth(iter);
    339       ucol_closeElements(iter);
    340     }
    341 
    342     ucol_close(th_th);
    343 }
    344 
    345 /**
    346 * Test the incremental normalization
    347 */
    348 static void TestNormalization()
    349 {
    350           UErrorCode          status = U_ZERO_ERROR;
    351     const char               *str    =
    352                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
    353           UCollator          *coll;
    354           UChar               rule[50];
    355           int                 rulelen = u_unescape(str, rule, 50);
    356           int                 count = 0;
    357     const char                *testdata[] =
    358                         {"\\u1ED9", "o\\u0323\\u0302",
    359                         "\\u0300\\u0315", "\\u0315\\u0300",
    360                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
    361                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
    362                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
    363                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
    364                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
    365     int32_t   srclen;
    366     UChar source[10];
    367     UCollationElements *iter;
    368 
    369     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
    370     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    371     if (U_FAILURE(status)){
    372         log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
    373               myErrorName(status));
    374         return;
    375     }
    376 
    377     srclen = u_unescape(testdata[0], source, 10);
    378     iter = ucol_openElements(coll, source, srclen, &status);
    379     backAndForth(iter);
    380     ucol_closeElements(iter);
    381 
    382     srclen = u_unescape(testdata[1], source, 10);
    383     iter = ucol_openElements(coll, source, srclen, &status);
    384     backAndForth(iter);
    385     ucol_closeElements(iter);
    386 
    387     while (count < 12) {
    388         srclen = u_unescape(testdata[count], source, 10);
    389         iter = ucol_openElements(coll, source, srclen, &status);
    390 
    391         if (U_FAILURE(status)){
    392             log_err("ERROR: in creation of collator element iterator\n %s\n",
    393                   myErrorName(status));
    394             return;
    395         }
    396         backAndForth(iter);
    397         ucol_closeElements(iter);
    398 
    399         iter = ucol_openElements(coll, source, -1, &status);
    400 
    401         if (U_FAILURE(status)){
    402             log_err("ERROR: in creation of collator element iterator\n %s\n",
    403                   myErrorName(status));
    404             return;
    405         }
    406         backAndForth(iter);
    407         ucol_closeElements(iter);
    408         count ++;
    409     }
    410     ucol_close(coll);
    411 }
    412 
    413 /**
    414  * Test for CollationElementIterator.previous()
    415  *
    416  * @bug 4108758 - Make sure it works with contracting characters
    417  *
    418  */
    419 static void TestPrevious()
    420 {
    421     UCollator *coll=NULL;
    422     UChar rule[50];
    423     UChar *source;
    424     UCollator *c1, *c2, *c3;
    425     UCollationElements *iter;
    426     UErrorCode status = U_ZERO_ERROR;
    427     UChar test1[50];
    428     UChar test2[50];
    429 
    430     u_uastrcpy(test1, "What subset of all possible test cases?");
    431     u_uastrcpy(test2, "has the highest probability of detecting");
    432     coll = ucol_open("en_US", &status);
    433 
    434     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
    435     log_verbose("English locale testing back and forth\n");
    436     if(U_FAILURE(status)){
    437         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    438             myErrorName(status));
    439         ucol_close(coll);
    440         return;
    441     }
    442     /* A basic test to see if it's working at all */
    443     backAndForth(iter);
    444     ucol_closeElements(iter);
    445     ucol_close(coll);
    446 
    447     /* Test with a contracting character sequence */
    448     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
    449     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    450 
    451     log_verbose("Contraction rule testing back and forth with no normalization\n");
    452 
    453     if (c1 == NULL || U_FAILURE(status))
    454     {
    455         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
    456             myErrorName(status));
    457         return;
    458     }
    459     source=(UChar*)malloc(sizeof(UChar) * 20);
    460     u_uastrcpy(source, "abchdcba");
    461     iter=ucol_openElements(c1, source, u_strlen(source), &status);
    462     if(U_FAILURE(status)){
    463         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    464             myErrorName(status));
    465         return;
    466     }
    467     backAndForth(iter);
    468     ucol_closeElements(iter);
    469     ucol_close(c1);
    470 
    471     /* Test with an expanding character sequence */
    472     u_uastrcpy(rule, "&a < b < c/abd < d");
    473     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    474     log_verbose("Expansion rule testing back and forth with no normalization\n");
    475     if (c2 == NULL || U_FAILURE(status))
    476     {
    477         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
    478             myErrorName(status));
    479         return;
    480     }
    481     u_uastrcpy(source, "abcd");
    482     iter=ucol_openElements(c2, source, u_strlen(source), &status);
    483     if(U_FAILURE(status)){
    484         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    485             myErrorName(status));
    486         return;
    487     }
    488     backAndForth(iter);
    489     ucol_closeElements(iter);
    490     ucol_close(c2);
    491     /* Now try both */
    492     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
    493     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
    494     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
    495 
    496     if (c3 == NULL || U_FAILURE(status))
    497     {
    498         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
    499             myErrorName(status));
    500         return;
    501     }
    502     u_uastrcpy(source, "abcdbchdc");
    503     iter=ucol_openElements(c3, source, u_strlen(source), &status);
    504     if(U_FAILURE(status)){
    505         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    506             myErrorName(status));
    507         return;
    508     }
    509     backAndForth(iter);
    510     ucol_closeElements(iter);
    511     ucol_close(c3);
    512     source[0] = 0x0e41;
    513     source[1] = 0x0e02;
    514     source[2] = 0x0e41;
    515     source[3] = 0x0e02;
    516     source[4] = 0x0e27;
    517     source[5] = 0x61;
    518     source[6] = 0x62;
    519     source[7] = 0x63;
    520     source[8] = 0;
    521 
    522     coll = ucol_open("th_TH", &status);
    523     log_verbose("Thai locale testing back and forth with normalization\n");
    524     iter=ucol_openElements(coll, source, u_strlen(source), &status);
    525     if(U_FAILURE(status)){
    526         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    527             myErrorName(status));
    528         return;
    529     }
    530     backAndForth(iter);
    531     ucol_closeElements(iter);
    532     ucol_close(coll);
    533 
    534     /* prev test */
    535     source[0] = 0x0061;
    536     source[1] = 0x30CF;
    537     source[2] = 0x3099;
    538     source[3] = 0x30FC;
    539     source[4] = 0;
    540 
    541     coll = ucol_open("ja_JP", &status);
    542     log_verbose("Japanese locale testing back and forth with normalization\n");
    543     iter=ucol_openElements(coll, source, u_strlen(source), &status);
    544     if(U_FAILURE(status)){
    545         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    546             myErrorName(status));
    547         return;
    548     }
    549     backAndForth(iter);
    550     ucol_closeElements(iter);
    551     ucol_close(coll);
    552 
    553     free(source);
    554 }
    555 
    556 /**
    557  * Test for getOffset() and setOffset()
    558  */
    559 static void TestOffset()
    560 {
    561     UErrorCode status= U_ZERO_ERROR;
    562     UCollator *en_us=NULL;
    563     UCollationElements *iter, *pristine;
    564     int32_t offset;
    565     OrderAndOffset *orders;
    566     int32_t orderLength=0;
    567     int     count = 0;
    568     UChar test1[50];
    569     UChar test2[50];
    570 
    571     u_uastrcpy(test1, "What subset of all possible test cases?");
    572     u_uastrcpy(test2, "has the highest probability of detecting");
    573     en_us = ucol_open("en_US", &status);
    574     log_verbose("Testing getOffset and setOffset for collations\n");
    575     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
    576     if(U_FAILURE(status)){
    577         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    578             myErrorName(status));
    579         ucol_close(en_us);
    580         return;
    581     }
    582 
    583     /* testing boundaries */
    584     ucol_setOffset(iter, 0, &status);
    585     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
    586         log_err("Error: After setting offset to 0, we should be at the end "
    587                 "of the backwards iteration");
    588     }
    589     ucol_setOffset(iter, u_strlen(test1), &status);
    590     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
    591         log_err("Error: After setting offset to end of the string, we should "
    592                 "be at the end of the backwards iteration");
    593     }
    594 
    595     /* Run all the way through the iterator, then get the offset */
    596 
    597     orders = getOrders(iter, &orderLength);
    598 
    599     offset = ucol_getOffset(iter);
    600 
    601     if (offset != u_strlen(test1))
    602     {
    603         log_err("offset at end != length %d vs %d\n", offset,
    604             u_strlen(test1) );
    605     }
    606 
    607     /* Now set the offset back to the beginning and see if it works */
    608     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
    609     if(U_FAILURE(status)){
    610         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    611             myErrorName(status));
    612     ucol_close(en_us);
    613         return;
    614     }
    615     status = U_ZERO_ERROR;
    616 
    617     ucol_setOffset(iter, 0, &status);
    618     if (U_FAILURE(status))
    619     {
    620         log_err("setOffset failed. %s\n",    myErrorName(status));
    621     }
    622     else
    623     {
    624         assertEqual(iter, pristine);
    625     }
    626 
    627     ucol_closeElements(pristine);
    628     ucol_closeElements(iter);
    629     free(orders);
    630 
    631     /* testing offsets in normalization buffer */
    632     test1[0] = 0x61;
    633     test1[1] = 0x300;
    634     test1[2] = 0x316;
    635     test1[3] = 0x62;
    636     test1[4] = 0;
    637     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    638     iter = ucol_openElements(en_us, test1, 4, &status);
    639     if(U_FAILURE(status)){
    640         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
    641             myErrorName(status));
    642         ucol_close(en_us);
    643         return;
    644     }
    645 
    646     count = 0;
    647     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
    648         U_SUCCESS(status)) {
    649         switch (count) {
    650         case 0:
    651             if (ucol_getOffset(iter) != 1) {
    652                 log_err("ERROR: Offset of iteration should be 1\n");
    653             }
    654             break;
    655         case 3:
    656             if (ucol_getOffset(iter) != 4) {
    657                 log_err("ERROR: Offset of iteration should be 4\n");
    658             }
    659             break;
    660         default:
    661             if (ucol_getOffset(iter) != 3) {
    662                 log_err("ERROR: Offset of iteration should be 3\n");
    663             }
    664         }
    665         count ++;
    666     }
    667 
    668     ucol_reset(iter);
    669     count = 0;
    670     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
    671         U_SUCCESS(status)) {
    672         switch (count) {
    673         case 0:
    674         case 1:
    675             if (ucol_getOffset(iter) != 3) {
    676                 log_err("ERROR: Offset of iteration should be 3\n");
    677             }
    678             break;
    679         case 2:
    680             if (ucol_getOffset(iter) != 1) {
    681                 log_err("ERROR: Offset of iteration should be 1\n");
    682             }
    683             break;
    684         default:
    685             if (ucol_getOffset(iter) != 0) {
    686                 log_err("ERROR: Offset of iteration should be 0\n");
    687             }
    688         }
    689         count ++;
    690     }
    691 
    692     if(U_FAILURE(status)){
    693         log_err("ERROR: in iterating collation elements %s\n",
    694             myErrorName(status));
    695     }
    696 
    697     ucol_closeElements(iter);
    698     ucol_close(en_us);
    699 }
    700 
    701 /**
    702  * Test for setText()
    703  */
    704 static void TestSetText()
    705 {
    706     int32_t c,i;
    707     UErrorCode status = U_ZERO_ERROR;
    708     UCollator *en_us=NULL;
    709     UCollationElements *iter1, *iter2;
    710     UChar test1[50];
    711     UChar test2[50];
    712 
    713     u_uastrcpy(test1, "What subset of all possible test cases?");
    714     u_uastrcpy(test2, "has the highest probability of detecting");
    715     en_us = ucol_open("en_US", &status);
    716     log_verbose("testing setText for Collation elements\n");
    717     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
    718     if(U_FAILURE(status)){
    719         log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
    720             myErrorName(status));
    721     ucol_close(en_us);
    722         return;
    723     }
    724     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
    725     if(U_FAILURE(status)){
    726         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
    727             myErrorName(status));
    728     ucol_close(en_us);
    729         return;
    730     }
    731 
    732     /* Run through the second iterator just to exercise it */
    733     c = ucol_next(iter2, &status);
    734     i = 0;
    735 
    736     while ( ++i < 10 && (c != UCOL_NULLORDER))
    737     {
    738         if (U_FAILURE(status))
    739         {
    740             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
    741             ucol_closeElements(iter2);
    742             ucol_closeElements(iter1);
    743     ucol_close(en_us);
    744             return;
    745         }
    746 
    747         c = ucol_next(iter2, &status);
    748     }
    749 
    750     /* Now set it to point to the same string as the first iterator */
    751     ucol_setText(iter2, test1, u_strlen(test1), &status);
    752     if (U_FAILURE(status))
    753     {
    754         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
    755     }
    756     else
    757     {
    758         assertEqual(iter1, iter2);
    759     }
    760 
    761     /* Now set it to point to a null string with fake length*/
    762     ucol_setText(iter2, NULL, 2, &status);
    763     if (U_FAILURE(status))
    764     {
    765         log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
    766     }
    767     else
    768     {
    769         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
    770             log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
    771         }
    772     }
    773 
    774     ucol_closeElements(iter2);
    775     ucol_closeElements(iter1);
    776     ucol_close(en_us);
    777 }
    778 
    779 /** @bug 4108762
    780  * Test for getMaxExpansion()
    781  */
    782 static void TestMaxExpansion()
    783 {
    784     UErrorCode          status = U_ZERO_ERROR;
    785     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
    786     UChar               ch     = 0;
    787     UChar32             unassigned = 0xEFFFD;
    788     UChar               supplementary[2];
    789     uint32_t            index = 0;
    790     UBool               isError = FALSE;
    791     uint32_t            sorder = 0;
    792     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
    793     uint32_t            temporder = 0;
    794 
    795     UChar rule[256];
    796     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
    797     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
    798         UCOL_DEFAULT_STRENGTH,NULL, &status);
    799     if(U_SUCCESS(status) && coll) {
    800       iter = ucol_openElements(coll, &ch, 1, &status);
    801 
    802       while (ch < 0xFFFF && U_SUCCESS(status)) {
    803           int      count = 1;
    804           uint32_t order;
    805           int32_t  size = 0;
    806 
    807           ch ++;
    808 
    809           ucol_setText(iter, &ch, 1, &status);
    810           order = ucol_previous(iter, &status);
    811 
    812           /* thai management */
    813           if (order == 0)
    814               order = ucol_previous(iter, &status);
    815 
    816           while (U_SUCCESS(status) &&
    817               ucol_previous(iter, &status) != UCOL_NULLORDER) {
    818               count ++;
    819           }
    820 
    821           size = ucol_getMaxExpansion(iter, order);
    822           if (U_FAILURE(status) || size < count) {
    823               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    824                   ch, count);
    825           }
    826       }
    827 
    828       /* testing for exact max expansion */
    829       ch = 0;
    830       while (ch < 0x61) {
    831           uint32_t order;
    832           int32_t  size;
    833           ucol_setText(iter, &ch, 1, &status);
    834           order = ucol_previous(iter, &status);
    835           size  = ucol_getMaxExpansion(iter, order);
    836           if (U_FAILURE(status) || size != 1) {
    837               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    838                   ch, 1);
    839           }
    840           ch ++;
    841       }
    842 
    843       ch = 0x63;
    844       ucol_setText(iter, &ch, 1, &status);
    845       temporder = ucol_previous(iter, &status);
    846 
    847       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
    848           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
    849                   ch, 3);
    850       }
    851 
    852       ch = 0x64;
    853       ucol_setText(iter, &ch, 1, &status);
    854       temporder = ucol_previous(iter, &status);
    855 
    856       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
    857           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
    858                   ch, 3);
    859       }
    860 
    861       U16_APPEND(supplementary, index, 2, unassigned, isError);
    862       ucol_setText(iter, supplementary, 2, &status);
    863       sorder = ucol_previous(iter, &status);
    864 
    865       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
    866           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
    867                   ch, 2);
    868       }
    869 
    870       /* testing jamo */
    871       ch = 0x1165;
    872 
    873       ucol_setText(iter, &ch, 1, &status);
    874       temporder = ucol_previous(iter, &status);
    875       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
    876           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
    877                   ch, 3);
    878       }
    879 
    880       ucol_closeElements(iter);
    881       ucol_close(coll);
    882 
    883       /* testing special jamo &a<\u1160 */
    884       rule[0] = 0x26;
    885       rule[1] = 0x71;
    886       rule[2] = 0x3c;
    887       rule[3] = 0x1165;
    888       rule[4] = 0x2f;
    889       rule[5] = 0x71;
    890       rule[6] = 0x71;
    891       rule[7] = 0x71;
    892       rule[8] = 0x71;
    893       rule[9] = 0;
    894 
    895       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
    896           UCOL_DEFAULT_STRENGTH,NULL, &status);
    897       iter = ucol_openElements(coll, &ch, 1, &status);
    898 
    899       temporder = ucol_previous(iter, &status);
    900       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
    901           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
    902                   ch, 5);
    903       }
    904 
    905       ucol_closeElements(iter);
    906       ucol_close(coll);
    907     } else {
    908       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
    909     }
    910 
    911 }
    912 
    913 
    914 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
    915 {
    916     int32_t c1, c2;
    917     int32_t count = 0;
    918     UErrorCode status = U_ZERO_ERROR;
    919 
    920     do
    921     {
    922         c1 = ucol_next(i1, &status);
    923         c2 = ucol_next(i2, &status);
    924 
    925         if (c1 != c2)
    926         {
    927             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
    928             break;
    929         }
    930 
    931         count += 1;
    932     }
    933     while (c1 != UCOL_NULLORDER);
    934 }
    935 
    936 /**
    937  * Testing iterators with extremely small buffers
    938  */
    939 static void TestSmallBuffer()
    940 {
    941     UErrorCode          status = U_ZERO_ERROR;
    942     UCollator          *coll;
    943     UCollationElements *testiter,
    944                        *iter;
    945     int32_t             count = 0;
    946     OrderAndOffset     *testorders,
    947                        *orders;
    948 
    949     UChar teststr[500];
    950     UChar str[] = {0x300, 0x31A, 0};
    951     /*
    952     creating a long string of decomposable characters,
    953     since by default the writable buffer is of size 256
    954     */
    955     while (count < 500) {
    956         if ((count & 1) == 0) {
    957             teststr[count ++] = 0x300;
    958         }
    959         else {
    960             teststr[count ++] = 0x31A;
    961         }
    962     }
    963 
    964     coll = ucol_open("th_TH", &status);
    965     if(U_SUCCESS(status) && coll) {
    966       testiter = ucol_openElements(coll, teststr, 500, &status);
    967       iter = ucol_openElements(coll, str, 2, &status);
    968 
    969       orders     = getOrders(iter, &count);
    970       if (count != 2) {
    971           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
    972       }
    973 
    974       /*
    975       this will rearrange the string data to 250 characters of 0x300 first then
    976       250 characters of 0x031A
    977       */
    978       testorders = getOrders(testiter, &count);
    979 
    980       if (count != 500) {
    981           log_err("Error decomposition does not give the right sized collation elements\n");
    982       }
    983 
    984       while (count != 0) {
    985           /* UCA collation element for 0x0F76 */
    986           if ((count > 250 && testorders[-- count].order != orders[1].order) ||
    987               (count <= 250 && testorders[-- count].order != orders[0].order)) {
    988               log_err("Error decomposition does not give the right collation element at %d count\n", count);
    989               break;
    990           }
    991       }
    992 
    993       free(testorders);
    994       free(orders);
    995 
    996       ucol_reset(testiter);
    997 
    998       /* ensures closing of elements done properly to clear writable buffer */
    999       ucol_next(testiter, &status);
   1000       ucol_next(testiter, &status);
   1001       ucol_closeElements(testiter);
   1002       ucol_closeElements(iter);
   1003       ucol_close(coll);
   1004     } else {
   1005       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   1006     }
   1007 }
   1008 
   1009 /**
   1010 * Sniplets of code from genuca
   1011 */
   1012 static int32_t hex2num(char hex) {
   1013     if(hex>='0' && hex <='9') {
   1014         return hex-'0';
   1015     } else if(hex>='a' && hex<='f') {
   1016         return hex-'a'+10;
   1017     } else if(hex>='A' && hex<='F') {
   1018         return hex-'A'+10;
   1019     } else {
   1020         return 0;
   1021     }
   1022 }
   1023 
   1024 /**
   1025 * Getting codepoints from a string
   1026 * @param str character string contain codepoints seperated by space and ended
   1027 *        by a semicolon
   1028 * @param codepoints array for storage, assuming size > 5
   1029 * @return position at the end of the codepoint section
   1030 */
   1031 static char * getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
   1032     char *pStartCP = str;
   1033     char *pEndCP   = str + 4;
   1034 
   1035     *codepoints = (UChar)((hex2num(*pStartCP) << 12) |
   1036                           (hex2num(*(pStartCP + 1)) << 8) |
   1037                           (hex2num(*(pStartCP + 2)) << 4) |
   1038                           (hex2num(*(pStartCP + 3))));
   1039     if (*pEndCP == '|' || *(pEndCP+1) == '|') {
   1040         /* pre-context rule */
   1041         pStartCP = pEndCP;
   1042         while (*pStartCP==' ' || *pStartCP== '|' ) {
   1043             pStartCP++;
   1044         }
   1045         pEndCP = pStartCP+4;
   1046         *contextCPs = *codepoints;
   1047         *(++codepoints) = (UChar)((hex2num(*pStartCP) << 12) |
   1048                                   (hex2num(*(pStartCP + 1)) << 8) |
   1049                                   (hex2num(*(pStartCP + 2)) << 4) |
   1050                                   (hex2num(*(pStartCP + 3))));
   1051         contextCPs++;
   1052     }
   1053     *contextCPs = 0;
   1054     codepoints ++;
   1055     while (*pEndCP != ';') {
   1056         pStartCP = pEndCP + 1;
   1057         *codepoints = (UChar)((hex2num(*pStartCP) << 12) |
   1058                           (hex2num(*(pStartCP + 1)) << 8) |
   1059                           (hex2num(*(pStartCP + 2)) << 4) |
   1060                           (hex2num(*(pStartCP + 3))));
   1061         codepoints ++;
   1062         pEndCP = pStartCP + 4;
   1063     }
   1064     *codepoints = 0;
   1065     return pEndCP + 1;
   1066 }
   1067 
   1068 /**
   1069 * Sniplets of code from genuca
   1070 */
   1071 static int32_t
   1072 readElement(char **from, char *to, char separator, UErrorCode *status)
   1073 {
   1074     if (U_SUCCESS(*status)) {
   1075         char    buffer[1024];
   1076         int32_t i = 0;
   1077         while (**from != separator) {
   1078             if (**from != ' ') {
   1079                 *(buffer+i++) = **from;
   1080             }
   1081             (*from)++;
   1082         }
   1083         (*from)++;
   1084         *(buffer + i) = 0;
   1085         strcpy(to, buffer);
   1086         return i/2;
   1087     }
   1088 
   1089     return 0;
   1090 }
   1091 
   1092 /**
   1093 * Sniplets of code from genuca
   1094 */
   1095 static uint32_t
   1096 getSingleCEValue(char *primary, char *secondary, char *tertiary,
   1097                           UErrorCode *status)
   1098 {
   1099     if (U_SUCCESS(*status)) {
   1100         uint32_t  value    = 0;
   1101         char      primsave = '\0';
   1102         char      secsave  = '\0';
   1103         char      tersave  = '\0';
   1104         char     *primend  = primary+4;
   1105         char     *secend   = secondary+2;
   1106         char     *terend   = tertiary+2;
   1107         uint32_t  primvalue;
   1108         uint32_t  secvalue;
   1109         uint32_t  tervalue;
   1110 
   1111         if (uprv_strlen(primary) > 4) {
   1112             primsave = *primend;
   1113             *primend = '\0';
   1114         }
   1115 
   1116         if (uprv_strlen(secondary) > 2) {
   1117             secsave = *secend;
   1118             *secend = '\0';
   1119         }
   1120 
   1121         if (uprv_strlen(tertiary) > 2) {
   1122             tersave = *terend;
   1123             *terend = '\0';
   1124         }
   1125 
   1126         primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
   1127         secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
   1128         tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
   1129         if(primvalue <= 0xFF) {
   1130           primvalue <<= 8;
   1131         }
   1132 
   1133         value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
   1134            | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
   1135            | (tervalue & UCOL_TERTIARYORDERMASK);
   1136 
   1137         if(primsave!='\0') {
   1138             *primend = primsave;
   1139         }
   1140         if(secsave!='\0') {
   1141             *secend = secsave;
   1142         }
   1143         if(tersave!='\0') {
   1144             *terend = tersave;
   1145         }
   1146         return value;
   1147     }
   1148     return 0;
   1149 }
   1150 
   1151 /**
   1152 * Getting collation elements generated from a string
   1153 * @param str character string contain collation elements contained in [] and
   1154 *        seperated by space
   1155 * @param ce array for storage, assuming size > 20
   1156 * @param status error status
   1157 * @return position at the end of the codepoint section
   1158 */
   1159 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
   1160     char       *pStartCP     = uprv_strchr(str, '[');
   1161     int         count        = 0;
   1162     char       *pEndCP;
   1163     char        primary[100];
   1164     char        secondary[100];
   1165     char        tertiary[100];
   1166 
   1167     while (*pStartCP == '[') {
   1168         uint32_t primarycount   = 0;
   1169         uint32_t secondarycount = 0;
   1170         uint32_t tertiarycount  = 0;
   1171         uint32_t CEi = 1;
   1172         pEndCP = strchr(pStartCP, ']');
   1173         if(pEndCP == NULL) {
   1174             break;
   1175         }
   1176         pStartCP ++;
   1177 
   1178         primarycount   = readElement(&pStartCP, primary, ',', status);
   1179         secondarycount = readElement(&pStartCP, secondary, ',', status);
   1180         tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
   1181 
   1182         /* I want to get the CEs entered right here, including continuation */
   1183         ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
   1184         if (U_FAILURE(*status)) {
   1185             break;
   1186         }
   1187 
   1188         while (2 * CEi < primarycount || CEi < secondarycount ||
   1189                CEi < tertiarycount) {
   1190             uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
   1191             if (2 * CEi < primarycount) {
   1192                 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
   1193                 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
   1194             }
   1195 
   1196             if (2 * CEi + 1 < primarycount) {
   1197                 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
   1198                 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
   1199             }
   1200 
   1201             if (CEi < secondarycount) {
   1202                 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
   1203                 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
   1204             }
   1205 
   1206             if (CEi < tertiarycount) {
   1207                 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
   1208                 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
   1209             }
   1210 
   1211             CEi ++;
   1212             ces[count ++] = value;
   1213         }
   1214 
   1215       pStartCP = pEndCP + 1;
   1216     }
   1217     ces[count] = 0;
   1218     return pStartCP;
   1219 }
   1220 
   1221 /**
   1222 * Getting the FractionalUCA.txt file stream
   1223 */
   1224 static FileStream * getFractionalUCA(void)
   1225 {
   1226     char        newPath[256];
   1227     char        backupPath[256];
   1228     FileStream *result = NULL;
   1229 
   1230     /* Look inside ICU_DATA first */
   1231     uprv_strcpy(newPath, ctest_dataSrcDir());
   1232     uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
   1233     uprv_strcat(newPath, "FractionalUCA.txt");
   1234 
   1235     /* As a fallback, try to guess where the source data was located
   1236      *   at the time ICU was built, and look there.
   1237      */
   1238 #if defined (U_TOPSRCDIR)
   1239     strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
   1240 #else
   1241     {
   1242         UErrorCode errorCode = U_ZERO_ERROR;
   1243         strcpy(backupPath, loadTestData(&errorCode));
   1244         strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
   1245     }
   1246 #endif
   1247     strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
   1248 
   1249     result = T_FileStream_open(newPath, "rb");
   1250 
   1251     if (result == NULL) {
   1252         result = T_FileStream_open(backupPath, "rb");
   1253         if (result == NULL) {
   1254             log_err("Failed to open either %s or %s\n", newPath, backupPath);
   1255         }
   1256     }
   1257     return result;
   1258 }
   1259 
   1260 /**
   1261 * Testing the CEs returned by the iterator
   1262 */
   1263 static void TestCEs() {
   1264     FileStream *file = NULL;
   1265     char        line[1024];
   1266     char       *str;
   1267     UChar       codepoints[10];
   1268     uint32_t    ces[20];
   1269     UErrorCode  status = U_ZERO_ERROR;
   1270     UCollator          *coll = ucol_open("", &status);
   1271     uint32_t lineNo = 0;
   1272     UChar       contextCPs[5];
   1273 
   1274     if (U_FAILURE(status)) {
   1275         log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
   1276         return;
   1277     }
   1278 
   1279     file = getFractionalUCA();
   1280 
   1281     if (file == NULL) {
   1282         log_err("*** unable to open input FractionalUCA.txt file ***\n");
   1283         return;
   1284     }
   1285 
   1286 
   1287     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1288         int                 count = 0;
   1289         UCollationElements *iter;
   1290         int32_t            preContextCeLen=0;
   1291         lineNo++;
   1292         /* skip this line if it is empty or a comment or is a return value
   1293         or start of some variable section */
   1294         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1295             line[0] == 0x000D || line[0] == '[') {
   1296             continue;
   1297         }
   1298 
   1299         str = getCodePoints(line, codepoints, contextCPs);
   1300 
   1301         /* these are 'fake' codepoints in the fractional UCA, and are used just
   1302          * for positioning of indirect values. They should not go through this
   1303          * test.
   1304          */
   1305         if(*codepoints == 0xFDD0) {
   1306           continue;
   1307         }
   1308         if (*contextCPs != 0) {
   1309             iter = ucol_openElements(coll, contextCPs, -1, &status);
   1310             if (U_FAILURE(status)) {
   1311                 log_err("Error in opening collation elements\n");
   1312                 break;
   1313             }
   1314             while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
   1315                 preContextCeLen++;
   1316             }
   1317             ucol_closeElements(iter);
   1318         }
   1319 
   1320         getCEs(str, ces+preContextCeLen, &status);
   1321         if (U_FAILURE(status)) {
   1322             log_err("Error in parsing collation elements in FractionalUCA.txt\n");
   1323             break;
   1324         }
   1325         iter = ucol_openElements(coll, codepoints, -1, &status);
   1326         if (U_FAILURE(status)) {
   1327             log_err("Error in opening collation elements\n");
   1328             break;
   1329         }
   1330         for (;;) {
   1331             uint32_t ce = (uint32_t)ucol_next(iter, &status);
   1332             if (ce == 0xFFFFFFFF) {
   1333                 ce = 0;
   1334             }
   1335             /* we now unconditionally reorder Thai/Lao prevowels, so this
   1336              * test would fail if we don't skip here.
   1337              */
   1338             if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
   1339               continue;
   1340             }
   1341             if (ce != ces[count] || U_FAILURE(status)) {
   1342                 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
   1343                 break;
   1344             }
   1345             if (ces[count] == 0) {
   1346                 break;
   1347             }
   1348             count ++;
   1349         }
   1350         ucol_closeElements(iter);
   1351     }
   1352 
   1353     T_FileStream_close(file);
   1354     ucol_close(coll);
   1355 }
   1356 
   1357 /**
   1358 * Testing the discontigous contractions
   1359 */
   1360 static void TestDiscontiguos() {
   1361     const char               *rulestr    =
   1362                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
   1363           UChar               rule[50];
   1364           int                 rulelen = u_unescape(rulestr, rule, 50);
   1365     const char               *src[] = {
   1366      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
   1367     /* base character blocked */
   1368      "XD\\u0300", "XD\\u0300\\u0315",
   1369     /* non blocking combining character */
   1370      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
   1371      /* blocking combining character */
   1372      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
   1373      /* contraction prefix */
   1374      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
   1375      "X\\u0300\\u031A\\u0315",
   1376      /* ends not with a contraction character */
   1377      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
   1378      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
   1379     };
   1380     const char               *tgt[] = {
   1381      /* non blocking combining character */
   1382      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
   1383     /* base character blocked */
   1384      "X D \\u0300", "X D \\u0300\\u0315",
   1385     /* non blocking combining character */
   1386      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
   1387      /* blocking combining character */
   1388      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
   1389      /* contraction prefix */
   1390      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
   1391      "X\\u0300 \\u031A \\u0315",
   1392      /* ends not with a contraction character */
   1393      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
   1394      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
   1395     };
   1396           int                 size   = 20;
   1397           UCollator          *coll;
   1398           UErrorCode          status    = U_ZERO_ERROR;
   1399           int                 count     = 0;
   1400           UCollationElements *iter;
   1401           UCollationElements *resultiter;
   1402 
   1403     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
   1404     iter       = ucol_openElements(coll, rule, 1, &status);
   1405     resultiter = ucol_openElements(coll, rule, 1, &status);
   1406 
   1407     if (U_FAILURE(status)) {
   1408         log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
   1409         return;
   1410     }
   1411 
   1412     while (count < size) {
   1413         UChar  str[20];
   1414         UChar  tstr[20];
   1415         int    strLen = u_unescape(src[count], str, 20);
   1416         UChar *s;
   1417 
   1418         ucol_setText(iter, str, strLen, &status);
   1419         if (U_FAILURE(status)) {
   1420             log_err("Error opening collation iterator\n");
   1421             return;
   1422         }
   1423 
   1424         u_unescape(tgt[count], tstr, 20);
   1425         s = tstr;
   1426 
   1427         log_verbose("count %d\n", count);
   1428 
   1429         for (;;) {
   1430             uint32_t  ce;
   1431             UChar    *e = u_strchr(s, 0x20);
   1432             if (e == 0) {
   1433                 e = u_strchr(s, 0);
   1434             }
   1435             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
   1436             ce = ucol_next(resultiter, &status);
   1437             if (U_FAILURE(status)) {
   1438                 log_err("Error manipulating collation iterator\n");
   1439                 return;
   1440             }
   1441             while (ce != UCOL_NULLORDER) {
   1442                 if (ce != (uint32_t)ucol_next(iter, &status) ||
   1443                     U_FAILURE(status)) {
   1444                     log_err("Discontiguos contraction test mismatch\n");
   1445                     return;
   1446                 }
   1447                 ce = ucol_next(resultiter, &status);
   1448                 if (U_FAILURE(status)) {
   1449                     log_err("Error getting next collation element\n");
   1450                     return;
   1451                 }
   1452             }
   1453             s = e + 1;
   1454             if (*e == 0) {
   1455                 break;
   1456             }
   1457         }
   1458         ucol_reset(iter);
   1459         backAndForth(iter);
   1460         count ++;
   1461     }
   1462     ucol_closeElements(resultiter);
   1463     ucol_closeElements(iter);
   1464     ucol_close(coll);
   1465 }
   1466 
   1467 static void TestCEBufferOverflow()
   1468 {
   1469     UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
   1470     UErrorCode          status = U_ZERO_ERROR;
   1471     UChar               rule[10];
   1472     UCollator          *coll;
   1473     UCollationElements *iter;
   1474 
   1475     u_uastrcpy(rule, "&z < AB");
   1476     coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
   1477     if (U_FAILURE(status)) {
   1478         log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
   1479         return;
   1480     }
   1481 
   1482     /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
   1483     test. this will cause an overflow in getPrev */
   1484     str[0] = 0x0041;    /* 'A' */
   1485     /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
   1486     uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
   1487     str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
   1488     iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
   1489                              &status);
   1490     if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
   1491         status == U_BUFFER_OVERFLOW_ERROR) {
   1492         log_err("CE buffer should not overflow with long string of trail surrogates\n");
   1493     }
   1494     ucol_closeElements(iter);
   1495     ucol_close(coll);
   1496 }
   1497 
   1498 /**
   1499 * Byte bounds checks. Checks if each byte in data is between upper and lower
   1500 * inclusive.
   1501 */
   1502 static UBool checkByteBounds(uint32_t data, char upper, char lower)
   1503 {
   1504     int count = 4;
   1505     while (count > 0) {
   1506         char b = (char)(data & 0xFF);
   1507         if (b > upper || b < lower) {
   1508             return FALSE;
   1509         }
   1510         data = data >> 8;
   1511         count --;
   1512     }
   1513     return TRUE;
   1514 }
   1515 
   1516 /**
   1517 * Determines case of the string of codepoints.
   1518 * If it is a multiple codepoints it has to treated as a contraction.
   1519 */
   1520 #if 0
   1521 static uint8_t getCase(const UChar *s, uint32_t len) {
   1522     UBool       lower = FALSE;
   1523     UBool       upper = FALSE;
   1524     UBool       title = FALSE;
   1525     UErrorCode  status = U_ZERO_ERROR;
   1526     UChar       str[256];
   1527     const UChar      *ps = s;
   1528 
   1529     if (len == 0) {
   1530         return UCOL_LOWER_CASE;
   1531     }
   1532 
   1533     while (len > 0) {
   1534         UChar c = *ps ++;
   1535 
   1536         if (u_islower(c)) {
   1537             lower = TRUE;
   1538         }
   1539         if (u_isupper(c)) {
   1540             upper = TRUE;
   1541         }
   1542         if (u_istitle(c)) {
   1543             title = TRUE;
   1544         }
   1545 
   1546         len --;
   1547     }
   1548     if ((lower && !upper && !title) || (!lower && !upper && !title)){
   1549         return UCOL_LOWER_CASE;
   1550     }
   1551     if (upper && !lower && !title) {
   1552         return UCOL_UPPER_CASE;
   1553     }
   1554     /* mix of cases here */
   1555     /* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
   1556     if (U_FAILURE(status)) {
   1557         log_err("Error normalizing data string\n");
   1558         return UCOL_LOWER_CASE;
   1559     }*/
   1560 
   1561     if ((title && len >= 2) || (lower && upper)) {
   1562         return UCOL_MIXED_CASE;
   1563     }
   1564     if (u_isupper(s[0])) {
   1565         return UCOL_UPPER_CASE;
   1566     }
   1567     return UCOL_LOWER_CASE;
   1568 }
   1569 #endif
   1570 
   1571 /**
   1572 * Checking collation element validity given the boundary arguments.
   1573 */
   1574 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
   1575                              int length, uint32_t primarymax,
   1576                              uint32_t secondarymax)
   1577 {
   1578     UErrorCode          status = U_ZERO_ERROR;
   1579     UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
   1580                                                   &status);
   1581     uint32_t            ce;
   1582     UBool               first  = TRUE;
   1583 /*
   1584     UBool               upper  = FALSE;
   1585     UBool               lower  = FALSE;
   1586 */
   1587 
   1588     if (U_FAILURE(status)) {
   1589         log_err("Error creating iterator for testing validity\n");
   1590     }
   1591 
   1592     ce = ucol_next(iter, &status);
   1593 
   1594     while (ce != UCOL_NULLORDER) {
   1595        if (ce != 0) {
   1596            uint32_t primary   = UCOL_PRIMARYORDER(ce);
   1597            uint32_t secondary = UCOL_SECONDARYORDER(ce);
   1598            uint32_t tertiary  = UCOL_TERTIARYORDER(ce);
   1599 /*           uint32_t scasebits = tertiary & 0xC0;*/
   1600 
   1601            if ((tertiary == 0 && secondary != 0) ||
   1602                (tertiary < 0xC0 && secondary == 0 && primary != 0)) {
   1603                /* n-1th level is not zero when the nth level is
   1604                   except for continuations, this is wrong */
   1605                log_err("Lower level weight not 0 when high level weight is 0\n");
   1606                goto fail;
   1607            }
   1608            else {
   1609                /* checks if any byte is illegal ie = 01 02 03. */
   1610                if (checkByteBounds(ce, 0x3, 0x1)) {
   1611                    log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
   1612                    goto fail;
   1613                }
   1614            }
   1615            if ((primary != 0 && primary < primarymax)
   1616                || ((primary & 0xFF) == 0xFF) || (((primary>>8) & 0xFF) == 0xFF)
   1617                || ((primary & 0xFF) && ((primary & 0xFF) <= 2))
   1618                || (((primary>>8) & 0xFF) && ((primary>>8) & 0xFF) <= 2)
   1619                || (primary >= 0xFE00 && !isContinuation(ce))) {
   1620                log_err("UCA primary weight out of bounds: %04X for string starting with %04X\n",
   1621                    primary, codepoints[0]);
   1622                goto fail;
   1623            }
   1624            /* case matching not done since data generated by ken */
   1625            if (first) {
   1626                if (secondary >= 6 && secondary <= secondarymax) {
   1627                    log_err("Secondary weight out of range\n");
   1628                    goto fail;
   1629                }
   1630                first = FALSE;
   1631            }
   1632        }
   1633        ce   = ucol_next(iter, &status);
   1634    }
   1635    ucol_closeElements(iter);
   1636    return TRUE;
   1637 fail :
   1638    ucol_closeElements(iter);
   1639    return FALSE;
   1640 }
   1641 
   1642 static void TestCEValidity()
   1643 {
   1644     /* testing UCA collation elements */
   1645     UErrorCode  status      = U_ZERO_ERROR;
   1646     /* en_US has no tailorings */
   1647     UCollator  *coll        = ucol_open("root", &status);
   1648     /* tailored locales */
   1649     char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
   1650     const char *loc;
   1651     FileStream *file = NULL;
   1652     char        line[1024];
   1653     UChar       codepoints[10];
   1654     int         count = 0;
   1655     int         maxCount = 0;
   1656     UChar       contextCPs[3];
   1657     UChar32     c;
   1658     UParseError parseError;
   1659     if (U_FAILURE(status)) {
   1660         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
   1661         return;
   1662     }
   1663     log_verbose("Testing UCA elements\n");
   1664     file = getFractionalUCA();
   1665     if (file == NULL) {
   1666         log_err("Fractional UCA data can not be opened\n");
   1667         return;
   1668     }
   1669 
   1670     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1671         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1672             line[0] == 0x000D || line[0] == '[') {
   1673             continue;
   1674         }
   1675 
   1676         getCodePoints(line, codepoints, contextCPs);
   1677         checkCEValidity(coll, codepoints, u_strlen(codepoints), 5, 86);
   1678     }
   1679 
   1680     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
   1681     for (c = 0; c <= 0xffff; ++c) {
   1682         if (u_isdefined(c)) {
   1683             codepoints[0] = (UChar)c;
   1684             checkCEValidity(coll, codepoints, 1, 5, 86);
   1685         }
   1686     }
   1687     for (; c <= 0x10ffff; ++c) {
   1688         if (u_isdefined(c)) {
   1689             int32_t i = 0;
   1690             U16_APPEND_UNSAFE(codepoints, i, c);
   1691             checkCEValidity(coll, codepoints, i, 5, 86);
   1692         }
   1693     }
   1694 
   1695     ucol_close(coll);
   1696 
   1697     /* testing tailored collation elements */
   1698     log_verbose("Testing tailored elements\n");
   1699     if(QUICK) {
   1700         maxCount = sizeof(locale)/sizeof(locale[0]);
   1701     } else {
   1702         maxCount = uloc_countAvailable();
   1703     }
   1704     while (count < maxCount) {
   1705         const UChar *rules = NULL,
   1706                     *current = NULL;
   1707         UChar *rulesCopy = NULL;
   1708         int32_t ruleLen = 0;
   1709 
   1710         uint32_t chOffset = 0;
   1711         uint32_t chLen = 0;
   1712         uint32_t exOffset = 0;
   1713         uint32_t exLen = 0;
   1714         uint32_t prefixOffset = 0;
   1715         uint32_t prefixLen = 0;
   1716         UBool    startOfRules = TRUE;
   1717         UColOptionSet opts;
   1718 
   1719         UColTokenParser src;
   1720         uint32_t strength = 0;
   1721         uint16_t specs = 0;
   1722         if(QUICK) {
   1723             loc = locale[count];
   1724         } else {
   1725             loc = uloc_getAvailable(count);
   1726             if(!hasCollationElements(loc)) {
   1727                 count++;
   1728                 continue;
   1729             }
   1730         }
   1731 
   1732         log_verbose("Testing CEs for %s\n", loc);
   1733 
   1734         coll      = ucol_open(loc, &status);
   1735         if (U_FAILURE(status)) {
   1736             log_err("%s collator creation failed\n", loc);
   1737             return;
   1738         }
   1739 
   1740         src.opts = &opts;
   1741         rules = ucol_getRules(coll, &ruleLen);
   1742 
   1743         if (ruleLen > 0) {
   1744             rulesCopy = (UChar *)malloc((ruleLen +
   1745                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
   1746             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
   1747             src.current = src.source = rulesCopy;
   1748             src.end = rulesCopy + ruleLen;
   1749             src.extraCurrent = src.end;
   1750             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1751 
   1752             while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
   1753               strength = src.parsedToken.strength;
   1754               chOffset = src.parsedToken.charsOffset;
   1755               chLen = src.parsedToken.charsLen;
   1756               exOffset = src.parsedToken.extensionOffset;
   1757               exLen = src.parsedToken.extensionLen;
   1758               prefixOffset = src.parsedToken.prefixOffset;
   1759               prefixLen = src.parsedToken.prefixLen;
   1760               specs = src.parsedToken.flags;
   1761 
   1762                 startOfRules = FALSE;
   1763                 uprv_memcpy(codepoints, src.source + chOffset,
   1764                                                        chLen * sizeof(UChar));
   1765                 codepoints[chLen] = 0;
   1766                 checkCEValidity(coll, codepoints, chLen, 4, 85);
   1767             }
   1768             free(rulesCopy);
   1769         }
   1770 
   1771         ucol_close(coll);
   1772         count ++;
   1773     }
   1774     T_FileStream_close(file);
   1775 }
   1776 
   1777 static void printSortKeyError(const UChar   *codepoints, int length,
   1778                                     uint8_t *sortkey, int sklen)
   1779 {
   1780     int count = 0;
   1781     log_err("Sortkey not valid for ");
   1782     while (length > 0) {
   1783         log_err("0x%04x ", *codepoints);
   1784         length --;
   1785         codepoints ++;
   1786     }
   1787     log_err("\nSortkey : ");
   1788     while (count < sklen) {
   1789         log_err("0x%02x ", sortkey[count]);
   1790         count ++;
   1791     }
   1792     log_err("\n");
   1793 }
   1794 
   1795 /**
   1796 * Checking sort key validity for all levels
   1797 */
   1798 static UBool checkSortKeyValidity(UCollator *coll,
   1799                                   const UChar *codepoints,
   1800                                   int length)
   1801 {
   1802     UErrorCode status  = U_ZERO_ERROR;
   1803     UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
   1804                                       UCOL_TERTIARY, UCOL_QUATERNARY,
   1805                                       UCOL_IDENTICAL};
   1806     int        strengthlen = 5;
   1807     int        index       = 0;
   1808     int        caselevel   = 0;
   1809 
   1810     while (caselevel < 1) {
   1811         if (caselevel == 0) {
   1812             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
   1813         }
   1814         else {
   1815             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
   1816         }
   1817 
   1818         while (index < strengthlen) {
   1819             int        count01 = 0;
   1820             uint32_t   count   = 0;
   1821             uint8_t    sortkey[128];
   1822             uint32_t   sklen;
   1823 
   1824             ucol_setStrength(coll, strength[index]);
   1825             sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
   1826             while (sortkey[count] != 0) {
   1827                 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && index != 4)) {
   1828                     printSortKeyError(codepoints, length, sortkey, sklen);
   1829                     return FALSE;
   1830                 }
   1831                 if (sortkey[count] == 1) {
   1832                     count01 ++;
   1833                 }
   1834                 count ++;
   1835             }
   1836 
   1837             if (count + 1 != sklen || (count01 != index + caselevel)) {
   1838                 printSortKeyError(codepoints, length, sortkey, sklen);
   1839                 return FALSE;
   1840             }
   1841             index ++;
   1842         }
   1843         caselevel ++;
   1844     }
   1845     return TRUE;
   1846 }
   1847 
   1848 static void TestSortKeyValidity(void)
   1849 {
   1850     /* testing UCA collation elements */
   1851     UErrorCode  status      = U_ZERO_ERROR;
   1852     /* en_US has no tailorings */
   1853     UCollator  *coll        = ucol_open("en_US", &status);
   1854     /* tailored locales */
   1855     char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
   1856     FileStream *file = NULL;
   1857     char        line[1024];
   1858     UChar       codepoints[10];
   1859     int         count = 0;
   1860     UChar       contextCPs[5];
   1861     UParseError parseError;
   1862     if (U_FAILURE(status)) {
   1863         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
   1864         return;
   1865     }
   1866     log_verbose("Testing UCA elements\n");
   1867     file = getFractionalUCA();
   1868     if (file == NULL) {
   1869         log_err("Fractional UCA data can not be opened\n");
   1870         return;
   1871     }
   1872 
   1873     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
   1874         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
   1875             line[0] == 0x000D || line[0] == '[') {
   1876             continue;
   1877         }
   1878 
   1879         getCodePoints(line, codepoints, contextCPs);
   1880         checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
   1881     }
   1882 
   1883     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
   1884     codepoints[0] = 0;
   1885 
   1886     while (codepoints[0] < 0xFFFF) {
   1887         if (u_isdefined((UChar32)codepoints[0])) {
   1888             checkSortKeyValidity(coll, codepoints, 1);
   1889         }
   1890         codepoints[0] ++;
   1891     }
   1892 
   1893     ucol_close(coll);
   1894 
   1895     /* testing tailored collation elements */
   1896     log_verbose("Testing tailored elements\n");
   1897     while (count < 5) {
   1898         const UChar *rules = NULL,
   1899                     *current = NULL;
   1900         UChar *rulesCopy = NULL;
   1901         int32_t ruleLen = 0;
   1902 
   1903         uint32_t chOffset = 0;
   1904         uint32_t chLen = 0;
   1905         uint32_t exOffset = 0;
   1906         uint32_t exLen = 0;
   1907         uint32_t prefixOffset = 0;
   1908         uint32_t prefixLen = 0;
   1909         UBool    startOfRules = TRUE;
   1910         UColOptionSet opts;
   1911 
   1912         UColTokenParser src;
   1913         uint32_t strength = 0;
   1914         uint16_t specs = 0;
   1915 
   1916         coll      = ucol_open(locale[count], &status);
   1917         if (U_FAILURE(status)) {
   1918             log_err("%s collator creation failed\n", locale[count]);
   1919             return;
   1920         }
   1921 
   1922         src.opts = &opts;
   1923         rules = ucol_getRules(coll, &ruleLen);
   1924 
   1925         if (ruleLen > 0) {
   1926             rulesCopy = (UChar *)malloc((ruleLen +
   1927                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
   1928             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
   1929             src.current = src.source = rulesCopy;
   1930             src.end = rulesCopy + ruleLen;
   1931             src.extraCurrent = src.end;
   1932             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1933 
   1934             while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
   1935                 strength = src.parsedToken.strength;
   1936                 chOffset = src.parsedToken.charsOffset;
   1937                 chLen = src.parsedToken.charsLen;
   1938                 exOffset = src.parsedToken.extensionOffset;
   1939                 exLen = src.parsedToken.extensionLen;
   1940                 prefixOffset = src.parsedToken.prefixOffset;
   1941                 prefixLen = src.parsedToken.prefixLen;
   1942                 specs = src.parsedToken.flags;
   1943 
   1944                 startOfRules = FALSE;
   1945                 uprv_memcpy(codepoints, src.source + chOffset,
   1946                                                        chLen * sizeof(UChar));
   1947                 codepoints[chLen] = 0;
   1948                 checkSortKeyValidity(coll, codepoints, chLen);
   1949             }
   1950             free(rulesCopy);
   1951         }
   1952 
   1953         ucol_close(coll);
   1954         count ++;
   1955     }
   1956     T_FileStream_close(file);
   1957 }
   1958 
   1959 #endif /* #if !UCONFIG_NO_COLLATION */
   1960