Home | History | Annotate | Download | only in intltest
      1 /*
      2  **********************************************************************
      3  *   Copyright (C) 2005-2011, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  */
      7 
      8 
      9 #include "unicode/utypes.h"
     10 
     11 #if !UCONFIG_NO_COLLATION
     12 
     13 #include "unicode/unistr.h"
     14 #include "unicode/putil.h"
     15 #include "unicode/usearch.h"
     16 
     17 #include "cmemory.h"
     18 #include "unicode/coll.h"
     19 #include "unicode/tblcoll.h"
     20 #include "unicode/coleitr.h"
     21 #include "unicode/ucoleitr.h"
     22 
     23 #include "unicode/regex.h"        // TODO: make conditional on regexp being built.
     24 
     25 #include "unicode/uniset.h"
     26 #include "unicode/uset.h"
     27 #include "unicode/ustring.h"
     28 #include "hash.h"
     29 #include "uhash.h"
     30 #include "ucol_imp.h"
     31 
     32 #include "intltest.h"
     33 #include "ssearch.h"
     34 
     35 #include "unicode/colldata.h"
     36 #include "unicode/bmsearch.h"
     37 #include "unicode/bms.h"
     38 
     39 #include "xmlparser.h"
     40 #include "ucbuf.h"
     41 
     42 #include <stdlib.h>
     43 #include <string.h>
     44 #include <stdio.h>
     45 
     46 char testId[100];
     47 
     48 #define TEST_ASSERT(x) {if (!(x)) { \
     49     errln("Failure in file %s, line %d, test ID = \"%s\"", __FILE__, __LINE__, testId);}}
     50 
     51 #define TEST_ASSERT_M(x, m) {if (!(x)) { \
     52     errln("Failure in file %s, line %d.   \"%s\"", __FILE__, __LINE__, m);return;}}
     53 
     54 #define TEST_ASSERT_SUCCESS(errcode) {if (U_FAILURE(errcode)) { \
     55     dataerrln("Failure in file %s, line %d, test ID \"%s\", status = \"%s\"", \
     56           __FILE__, __LINE__, testId, u_errorName(errcode));}}
     57 
     58 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
     59 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
     60 #define DELETE_ARRAY(array) uprv_free((void *) (array))
     61 
     62 //---------------------------------------------------------------------------
     63 //
     64 //  Test class boilerplate
     65 //
     66 //---------------------------------------------------------------------------
     67 SSearchTest::SSearchTest()
     68 {
     69 }
     70 
     71 SSearchTest::~SSearchTest()
     72 {
     73 }
     74 
     75 void SSearchTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *params )
     76 {
     77     if (exec) logln("TestSuite SSearchTest: ");
     78     switch (index) {
     79 #if !UCONFIG_NO_BREAK_ITERATION
     80        case 0: name = "searchTest";
     81             if (exec) searchTest();
     82             break;
     83 
     84         case 1: name = "offsetTest";
     85             if (exec) offsetTest();
     86             break;
     87 
     88         case 2: name = "monkeyTest";
     89             if (exec) monkeyTest(params);
     90             break;
     91 
     92         case 3: name = "bmMonkeyTest";
     93             if (exec) bmMonkeyTest(params);
     94             break;
     95 
     96         case 4: name = "boyerMooreTest";
     97             if (exec) boyerMooreTest();
     98             break;
     99 
    100         case 5: name = "goodSuffixTest";
    101             if (exec) goodSuffixTest();
    102             break;
    103 
    104         case 6: name = "searchTime";
    105             if (exec) searchTime();
    106             break;
    107 
    108         case 7: name = "bmsTest";
    109             if (exec) bmsTest();
    110             break;
    111 
    112         case 8: name = "bmSearchTest";
    113             if (exec) bmSearchTest();
    114             break;
    115 
    116         case 9: name = "udhrTest";
    117             if (exec) udhrTest();
    118             break;
    119         case 10: name = "stringListTest";
    120             if (exec) stringListTest();
    121             break;
    122 #endif
    123         default: name = "";
    124             break; //needed to end loop
    125     }
    126 }
    127 
    128 
    129 #if !UCONFIG_NO_BREAK_ITERATION
    130 
    131 #define PATH_BUFFER_SIZE 2048
    132 const char *SSearchTest::getPath(char buffer[2048], const char *filename) {
    133     UErrorCode status = U_ZERO_ERROR;
    134     const char *testDataDirectory = IntlTest::getSourceTestData(status);
    135 
    136     if (U_FAILURE(status) || strlen(testDataDirectory) + strlen(filename) + 1 >= PATH_BUFFER_SIZE) {
    137         errln("ERROR: getPath() failed - %s", u_errorName(status));
    138         return NULL;
    139     }
    140 
    141     strcpy(buffer, testDataDirectory);
    142     strcat(buffer, filename);
    143     return buffer;
    144 }
    145 
    146 
    147 void SSearchTest::searchTest()
    148 {
    149 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO
    150     UErrorCode status = U_ZERO_ERROR;
    151     char path[PATH_BUFFER_SIZE];
    152     const char *testFilePath = getPath(path, "ssearch.xml");
    153 
    154     if (testFilePath == NULL) {
    155         return; /* Couldn't get path: error message already output. */
    156     }
    157 
    158     LocalPointer<UXMLParser> parser(UXMLParser::createParser(status));
    159     TEST_ASSERT_SUCCESS(status);
    160     LocalPointer<UXMLElement> root(parser->parseFile(testFilePath, status));
    161     TEST_ASSERT_SUCCESS(status);
    162     if (U_FAILURE(status)) {
    163         return;
    164     }
    165 
    166     const UnicodeString *debugTestCase = root->getAttribute("debug");
    167     if (debugTestCase != NULL) {
    168 //       setenv("USEARCH_DEBUG", "1", 1);
    169     }
    170 
    171 
    172     const UXMLElement *testCase;
    173     int32_t tc = 0;
    174 
    175     while((testCase = root->nextChildElement(tc)) != NULL) {
    176 
    177         if (testCase->getTagName().compare("test-case") != 0) {
    178             errln("ssearch, unrecognized XML Element in test file");
    179             continue;
    180         }
    181         const UnicodeString *id       = testCase->getAttribute("id");
    182         *testId = 0;
    183         if (id != NULL) {
    184             id->extract(0, id->length(), testId,  sizeof(testId), US_INV);
    185         }
    186 
    187         // If debugging test case has been specified and this is not it, skip to next.
    188         if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) {
    189             continue;
    190         }
    191         //
    192         //  Get the requested collation strength.
    193         //    Default is tertiary if the XML attribute is missing from the test case.
    194         //
    195         const UnicodeString *strength = testCase->getAttribute("strength");
    196         UColAttributeValue collatorStrength = UCOL_PRIMARY;
    197         if      (strength==NULL)          { collatorStrength = UCOL_TERTIARY;}
    198         else if (*strength=="PRIMARY")    { collatorStrength = UCOL_PRIMARY;}
    199         else if (*strength=="SECONDARY")  { collatorStrength = UCOL_SECONDARY;}
    200         else if (*strength=="TERTIARY")   { collatorStrength = UCOL_TERTIARY;}
    201         else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}
    202         else if (*strength=="IDENTICAL")  { collatorStrength = UCOL_IDENTICAL;}
    203         else {
    204             // Bogus value supplied for strength.  Shouldn't happen, even from
    205             //  typos, if the  XML source has been validated.
    206             //  This assert is a little deceiving in that strength can be
    207             //   any of the allowed values, not just TERTIARY, but it will
    208             //   do the job of getting the error output.
    209             TEST_ASSERT(*strength=="TERTIARY")
    210         }
    211 
    212         //
    213         // Get the collator normalization flag.  Default is UCOL_OFF.
    214         //
    215         UColAttributeValue normalize = UCOL_OFF;
    216         const UnicodeString *norm = testCase->getAttribute("norm");
    217         TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF");
    218         if (norm!=NULL && *norm=="ON") {
    219             normalize = UCOL_ON;
    220         }
    221 
    222         //
    223         // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.
    224         //
    225         UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;
    226         const UnicodeString *alt = testCase->getAttribute("alternate_handling");
    227         TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE");
    228         if (alt != NULL && *alt == "SHIFTED") {
    229             alternateHandling = UCOL_SHIFTED;
    230         }
    231 
    232         const UnicodeString defLocale("en");
    233         char  clocale[100];
    234         const UnicodeString *locale   = testCase->getAttribute("locale");
    235         if (locale == NULL || locale->length()==0) {
    236             locale = &defLocale;
    237         };
    238         locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);
    239 
    240 
    241         UnicodeString  text;
    242         UnicodeString  target;
    243         UnicodeString  pattern;
    244         int32_t        expectedMatchStart = -1;
    245         int32_t        expectedMatchLimit = -1;
    246         const UXMLElement  *n;
    247         int32_t                nodeCount = 0;
    248 
    249         n = testCase->getChildElement("pattern");
    250         TEST_ASSERT(n != NULL);
    251         if (n==NULL) {
    252             continue;
    253         }
    254         text = n->getText(FALSE);
    255         text = text.unescape();
    256         pattern.append(text);
    257         nodeCount++;
    258 
    259         n = testCase->getChildElement("pre");
    260         if (n!=NULL) {
    261             text = n->getText(FALSE);
    262             text = text.unescape();
    263             target.append(text);
    264             nodeCount++;
    265         }
    266 
    267         n = testCase->getChildElement("m");
    268         if (n!=NULL) {
    269             expectedMatchStart = target.length();
    270             text = n->getText(FALSE);
    271             text = text.unescape();
    272             target.append(text);
    273             expectedMatchLimit = target.length();
    274             nodeCount++;
    275         }
    276 
    277         n = testCase->getChildElement("post");
    278         if (n!=NULL) {
    279             text = n->getText(FALSE);
    280             text = text.unescape();
    281             target.append(text);
    282             nodeCount++;
    283         }
    284 
    285         //  Check that there weren't extra things in the XML
    286         TEST_ASSERT(nodeCount == testCase->countChildren());
    287 
    288         // Open a collator and StringSearch based on the parameters
    289         //   obtained from the XML.
    290         //
    291         status = U_ZERO_ERROR;
    292         LocalUCollatorPointer collator(ucol_open(clocale, &status));
    293         ucol_setStrength(collator.getAlias(), collatorStrength);
    294         ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status);
    295         ucol_setAttribute(collator.getAlias(), UCOL_ALTERNATE_HANDLING, alternateHandling, &status);
    296         LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
    297                                                                target.getBuffer(), target.length(),
    298                                                                collator.getAlias(),
    299                                                                NULL,     // the break iterator
    300                                                                &status));
    301 
    302         TEST_ASSERT_SUCCESS(status);
    303         if (U_FAILURE(status)) {
    304             continue;
    305         }
    306 
    307         int32_t foundStart = 0;
    308         int32_t foundLimit = 0;
    309         UBool   foundMatch;
    310 
    311         //
    312         // Do the search, check the match result against the expected results.
    313         //
    314         foundMatch= usearch_search(uss.getAlias(), 0, &foundStart, &foundLimit, &status);
    315         TEST_ASSERT_SUCCESS(status);
    316         if ((foundMatch && expectedMatchStart<0) ||
    317             (foundStart != expectedMatchStart)   ||
    318             (foundLimit != expectedMatchLimit)) {
    319                 TEST_ASSERT(FALSE);   //  ouput generic error position
    320                 infoln("Found, expected match start = %d, %d \n"
    321                        "Found, expected match limit = %d, %d",
    322                 foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
    323         }
    324 
    325         // In case there are other matches...
    326         // (should we only do this if the test case passed?)
    327         while (foundMatch) {
    328             expectedMatchStart = foundStart;
    329             expectedMatchLimit = foundLimit;
    330 
    331             foundMatch = usearch_search(uss.getAlias(), foundLimit, &foundStart, &foundLimit, &status);
    332         }
    333 
    334         uss.adoptInstead(usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
    335             target.getBuffer(), target.length(),
    336             collator.getAlias(),
    337             NULL,
    338             &status));
    339 
    340         //
    341         // Do the backwards search, check the match result against the expected results.
    342         //
    343         foundMatch= usearch_searchBackwards(uss.getAlias(), target.length(), &foundStart, &foundLimit, &status);
    344         TEST_ASSERT_SUCCESS(status);
    345         if ((foundMatch && expectedMatchStart<0) ||
    346             (foundStart != expectedMatchStart)   ||
    347             (foundLimit != expectedMatchLimit)) {
    348                 TEST_ASSERT(FALSE);   //  ouput generic error position
    349                 infoln("Found, expected backwards match start = %d, %d \n"
    350                        "Found, expected backwards match limit = %d, %d",
    351                 foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
    352         }
    353     }
    354 #endif
    355 }
    356 
    357 struct UdhrTestCase
    358 {
    359     const char *locale;
    360     const char *file;
    361 };
    362 
    363 void SSearchTest::udhrTest()
    364 {
    365     UErrorCode status = U_ZERO_ERROR;
    366     char path[PATH_BUFFER_SIZE];
    367     const char *udhrPath = getPath(path, "udhr");
    368 
    369     if (udhrPath == NULL) {
    370         // couldn't get path: error message already output...
    371         return;
    372     }
    373 
    374     UdhrTestCase testCases[] = {
    375         {"en", "udhr_eng.txt"},
    376         {"de", "udhr_deu_1996.txt"},
    377         {"fr", "udhr_fra.txt"},
    378         {"ru", "udhr_rus.txt"},
    379         {"th", "udhr_tha.txt"},
    380         {"ja", "udhr_jpn.txt"},
    381         {"ko", "udhr_kor.txt"},
    382         {"zh", "udhr_cmn_hans.txt"},
    383         {"zh_Hant", "udhr_cmn_hant.txt"}
    384     };
    385 
    386     int32_t testCount = ARRAY_SIZE(testCases);
    387 
    388     for (int32_t t = 0; t < testCount; t += 1) {
    389         int32_t len = 0;
    390         char *resolvedFileName = NULL;
    391         const char *encoding = NULL;
    392         UCHARBUF *ucharBuf = NULL;
    393 
    394         ucbuf_resolveFileName(udhrPath, testCases[t].file, NULL, &len, &status);
    395         resolvedFileName = NEW_ARRAY(char, len);
    396 
    397         if(resolvedFileName == NULL){
    398             continue;
    399         }
    400 
    401         if(status == U_BUFFER_OVERFLOW_ERROR){
    402             status = U_ZERO_ERROR;
    403         }
    404 
    405         ucbuf_resolveFileName(udhrPath, testCases[t].file, resolvedFileName, &len, &status);
    406         ucharBuf = ucbuf_open(resolvedFileName, &encoding, TRUE, FALSE, &status);
    407 
    408         DELETE_ARRAY(resolvedFileName);
    409 
    410         if(U_FAILURE(status)){
    411             infoln("Could not open the input file %s. Test skipped\n", testCases[t].file);
    412             continue;
    413         }
    414 
    415         int32_t targetLen = 0;
    416         const UChar *target = ucbuf_getBuffer(ucharBuf, &targetLen, &status);
    417 
    418         /* The first line of the file contains the pattern */
    419         int32_t start = 0, end = 0, plen = 0;
    420 
    421         for(end = start; ; end += 1) {
    422             UChar ch = target[end];
    423 
    424             if (ch == 0x000A || ch == 0x000D || ch == 0x2028) {
    425                 break;
    426             }
    427         }
    428 
    429         plen = end - start;
    430 
    431         UChar *pattern = NEW_ARRAY(UChar, plen);
    432         for (int32_t i = 0; i < plen; i += 1) {
    433             pattern[i] =  target[start++];
    434         }
    435 
    436         int32_t offset = 0;
    437         UCollator *coll = ucol_open(testCases[t].locale, &status);
    438         UCD *ucd = NULL;
    439         BMS *bms = NULL;
    440 
    441         if (U_FAILURE(status)) {
    442             errln("Could not open collator for %s", testCases[t].locale);
    443             goto delete_collator;
    444         }
    445 
    446         ucd = ucd_open(coll, &status);
    447 
    448         if (U_FAILURE(status)) {
    449             errln("Could not open CollData object for %s", testCases[t].locale);
    450             goto delete_ucd;
    451         }
    452 
    453         bms = bms_open(ucd, pattern, plen, target, targetLen, &status);
    454 
    455         if (U_FAILURE(status)) {
    456             errln("Could not open search object for %s", testCases[t].locale);
    457             goto delete_bms;
    458         }
    459 
    460         start = end = -1;
    461         while (bms_search(bms, offset, &start, &end)) {
    462             offset = end;
    463         }
    464 
    465         if (offset == 0) {
    466             errln("Could not find pattern - locale: %s, file: %s ", testCases[t].locale, testCases[t].file);
    467         }
    468 
    469 delete_bms:
    470         bms_close(bms);
    471 
    472 delete_ucd:
    473         ucd_close(ucd);
    474 
    475 delete_collator:
    476         ucol_close(coll);
    477 
    478         DELETE_ARRAY(pattern);
    479         ucbuf_close(ucharBuf);
    480     }
    481 
    482     ucd_flushCache();
    483 }
    484 
    485 void SSearchTest::bmSearchTest()
    486 {
    487 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
    488     UErrorCode status = U_ZERO_ERROR;
    489     char path[PATH_BUFFER_SIZE];
    490     const char *testFilePath = getPath(path, "ssearch.xml");
    491 
    492     if (testFilePath == NULL) {
    493         return; /* Couldn't get path: error message already output. */
    494     }
    495 
    496     UXMLParser  *parser = UXMLParser::createParser(status);
    497     TEST_ASSERT_SUCCESS(status);
    498     UXMLElement *root   = parser->parseFile(testFilePath, status);
    499     TEST_ASSERT_SUCCESS(status);
    500     if (U_FAILURE(status)) {
    501         return;
    502     }
    503 
    504     const UnicodeString *debugTestCase = root->getAttribute("debug");
    505     if (debugTestCase != NULL) {
    506 //       setenv("USEARCH_DEBUG", "1", 1);
    507     }
    508 
    509 
    510     const UXMLElement *testCase;
    511     int32_t tc = 0;
    512 
    513     while((testCase = root->nextChildElement(tc)) != NULL) {
    514 
    515         if (testCase->getTagName().compare("test-case") != 0) {
    516             errln("ssearch, unrecognized XML Element in test file");
    517             continue;
    518         }
    519         const UnicodeString *id       = testCase->getAttribute("id");
    520         *testId = 0;
    521         if (id != NULL) {
    522             id->extract(0, id->length(), testId,  sizeof(testId), US_INV);
    523         }
    524 
    525         // If debugging test case has been specified and this is not it, skip to next.
    526         if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) {
    527             continue;
    528         }
    529         //
    530         //  Get the requested collation strength.
    531         //    Default is tertiary if the XML attribute is missing from the test case.
    532         //
    533         const UnicodeString *strength = testCase->getAttribute("strength");
    534         UColAttributeValue collatorStrength = UCOL_PRIMARY;
    535         if      (strength==NULL)          { collatorStrength = UCOL_TERTIARY;}
    536         else if (*strength=="PRIMARY")    { collatorStrength = UCOL_PRIMARY;}
    537         else if (*strength=="SECONDARY")  { collatorStrength = UCOL_SECONDARY;}
    538         else if (*strength=="TERTIARY")   { collatorStrength = UCOL_TERTIARY;}
    539         else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}
    540         else if (*strength=="IDENTICAL")  { collatorStrength = UCOL_IDENTICAL;}
    541         else {
    542             // Bogus value supplied for strength.  Shouldn't happen, even from
    543             //  typos, if the  XML source has been validated.
    544             //  This assert is a little deceiving in that strength can be
    545             //   any of the allowed values, not just TERTIARY, but it will
    546             //   do the job of getting the error output.
    547             TEST_ASSERT(*strength=="TERTIARY")
    548         }
    549 
    550         //
    551         // Get the collator normalization flag.  Default is UCOL_OFF.
    552         //
    553         UColAttributeValue normalize = UCOL_OFF;
    554         const UnicodeString *norm = testCase->getAttribute("norm");
    555         TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF");
    556         if (norm!=NULL && *norm=="ON") {
    557             normalize = UCOL_ON;
    558         }
    559 
    560         //
    561         // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.
    562         //
    563         UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;
    564         const UnicodeString *alt = testCase->getAttribute("alternate_handling");
    565         TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE");
    566         if (alt != NULL && *alt == "SHIFTED") {
    567             alternateHandling = UCOL_SHIFTED;
    568         }
    569 
    570         const UnicodeString defLocale("en");
    571         char  clocale[100];
    572         const UnicodeString *locale   = testCase->getAttribute("locale");
    573         if (locale == NULL || locale->length()==0) {
    574             locale = &defLocale;
    575         };
    576         locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);
    577 
    578 
    579         UnicodeString  text;
    580         UnicodeString  target;
    581         UnicodeString  pattern;
    582         int32_t        expectedMatchStart = -1;
    583         int32_t        expectedMatchLimit = -1;
    584         const UXMLElement  *n;
    585         int32_t                nodeCount = 0;
    586 
    587         n = testCase->getChildElement("pattern");
    588         TEST_ASSERT(n != NULL);
    589         if (n==NULL) {
    590             continue;
    591         }
    592         text = n->getText(FALSE);
    593         text = text.unescape();
    594         pattern.append(text);
    595         nodeCount++;
    596 
    597         n = testCase->getChildElement("pre");
    598         if (n!=NULL) {
    599             text = n->getText(FALSE);
    600             text = text.unescape();
    601             target.append(text);
    602             nodeCount++;
    603         }
    604 
    605         n = testCase->getChildElement("m");
    606         if (n!=NULL) {
    607             expectedMatchStart = target.length();
    608             text = n->getText(FALSE);
    609             text = text.unescape();
    610             target.append(text);
    611             expectedMatchLimit = target.length();
    612             nodeCount++;
    613         }
    614 
    615         n = testCase->getChildElement("post");
    616         if (n!=NULL) {
    617             text = n->getText(FALSE);
    618             text = text.unescape();
    619             target.append(text);
    620             nodeCount++;
    621         }
    622 
    623         //  Check that there weren't extra things in the XML
    624         TEST_ASSERT(nodeCount == testCase->countChildren());
    625 
    626         // Open a collator and StringSearch based on the parameters
    627         //   obtained from the XML.
    628         //
    629         status = U_ZERO_ERROR;
    630         UCollator *collator = ucol_open(clocale, &status);
    631         ucol_setStrength(collator, collatorStrength);
    632         ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status);
    633         ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, alternateHandling, &status);
    634         UCD *ucd = ucd_open(collator, &status);
    635         BMS *bms = bms_open(ucd, pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), &status);
    636 
    637         TEST_ASSERT_SUCCESS(status);
    638         if (U_FAILURE(status)) {
    639             bms_close(bms);
    640             ucd_close(ucd);
    641             ucol_close(collator);
    642             continue;
    643         }
    644 
    645         int32_t foundStart = 0;
    646         int32_t foundLimit = 0;
    647         UBool   foundMatch;
    648 
    649         //
    650         // Do the search, check the match result against the expected results.
    651         //
    652         foundMatch = bms_search(bms, 0, &foundStart, &foundLimit);
    653       //TEST_ASSERT_SUCCESS(status);
    654         if ((foundMatch && expectedMatchStart < 0) ||
    655             (foundStart != expectedMatchStart)     ||
    656             (foundLimit != expectedMatchLimit)) {
    657                 TEST_ASSERT(FALSE);   //  ouput generic error position
    658                 infoln("Found, expected match start = %d, %d \n"
    659                        "Found, expected match limit = %d, %d",
    660                 foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
    661         }
    662 
    663         bms_close(bms);
    664         ucd_close(ucd);
    665         ucol_close(collator);
    666     }
    667 
    668     ucd_flushCache();
    669     delete root;
    670     delete parser;
    671 #endif
    672 }
    673 
    674 struct Order
    675 {
    676     int32_t order;
    677     int32_t lowOffset;
    678     int32_t highOffset;
    679 };
    680 
    681 class OrderList
    682 {
    683 public:
    684     OrderList();
    685     OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset = 0);
    686     ~OrderList();
    687 
    688     int32_t size(void) const;
    689     void add(int32_t order, int32_t low, int32_t high);
    690     const Order *get(int32_t index) const;
    691     int32_t getLowOffset(int32_t index) const;
    692     int32_t getHighOffset(int32_t index) const;
    693     int32_t getOrder(int32_t index) const;
    694     void reverse(void);
    695     UBool compare(const OrderList &other) const;
    696     UBool matchesAt(int32_t offset, const OrderList &other) const;
    697 
    698 private:
    699     Order *list;
    700     int32_t listMax;
    701     int32_t listSize;
    702 };
    703 
    704 OrderList::OrderList()
    705   : list(NULL),  listMax(16), listSize(0)
    706 {
    707     list = new Order[listMax];
    708 }
    709 
    710 OrderList::OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset)
    711     : list(NULL), listMax(16), listSize(0)
    712 {
    713     UErrorCode status = U_ZERO_ERROR;
    714     UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
    715     uint32_t strengthMask = 0;
    716     int32_t order, low, high;
    717 
    718     switch (ucol_getStrength(coll))
    719     {
    720     default:
    721         strengthMask |= UCOL_TERTIARYORDERMASK;
    722         /* fall through */
    723 
    724     case UCOL_SECONDARY:
    725         strengthMask |= UCOL_SECONDARYORDERMASK;
    726         /* fall through */
    727 
    728     case UCOL_PRIMARY:
    729         strengthMask |= UCOL_PRIMARYORDERMASK;
    730     }
    731 
    732     list = new Order[listMax];
    733 
    734     ucol_setOffset(elems, stringOffset, &status);
    735 
    736     do {
    737         low   = ucol_getOffset(elems);
    738         order = ucol_next(elems, &status);
    739         high  = ucol_getOffset(elems);
    740 
    741         if (order != UCOL_NULLORDER) {
    742             order &= strengthMask;
    743         }
    744 
    745         if (order != UCOL_IGNORABLE) {
    746             add(order, low, high);
    747         }
    748     } while (order != UCOL_NULLORDER);
    749 
    750     ucol_closeElements(elems);
    751 }
    752 
    753 OrderList::~OrderList()
    754 {
    755     delete[] list;
    756 }
    757 
    758 void OrderList::add(int32_t order, int32_t low, int32_t high)
    759 {
    760     if (listSize >= listMax) {
    761         listMax *= 2;
    762 
    763         Order *newList = new Order[listMax];
    764 
    765         uprv_memcpy(newList, list, listSize * sizeof(Order));
    766         delete[] list;
    767         list = newList;
    768     }
    769 
    770     list[listSize].order      = order;
    771     list[listSize].lowOffset  = low;
    772     list[listSize].highOffset = high;
    773 
    774     listSize += 1;
    775 }
    776 
    777 const Order *OrderList::get(int32_t index) const
    778 {
    779     if (index >= listSize) {
    780         return NULL;
    781     }
    782 
    783     return &list[index];
    784 }
    785 
    786 int32_t OrderList::getLowOffset(int32_t index) const
    787 {
    788     const Order *order = get(index);
    789 
    790     if (order != NULL) {
    791         return order->lowOffset;
    792     }
    793 
    794     return -1;
    795 }
    796 
    797 int32_t OrderList::getHighOffset(int32_t index) const
    798 {
    799     const Order *order = get(index);
    800 
    801     if (order != NULL) {
    802         return order->highOffset;
    803     }
    804 
    805     return -1;
    806 }
    807 
    808 int32_t OrderList::getOrder(int32_t index) const
    809 {
    810     const Order *order = get(index);
    811 
    812     if (order != NULL) {
    813         return order->order;
    814     }
    815 
    816     return UCOL_NULLORDER;
    817 }
    818 
    819 int32_t OrderList::size() const
    820 {
    821     return listSize;
    822 }
    823 
    824 void OrderList::reverse()
    825 {
    826     for(int32_t f = 0, b = listSize - 1; f < b; f += 1, b -= 1) {
    827         Order swap = list[b];
    828 
    829         list[b] = list[f];
    830         list[f] = swap;
    831     }
    832 }
    833 
    834 UBool OrderList::compare(const OrderList &other) const
    835 {
    836     if (listSize != other.listSize) {
    837         return FALSE;
    838     }
    839 
    840     for(int32_t i = 0; i < listSize; i += 1) {
    841         if (list[i].order  != other.list[i].order ||
    842             list[i].lowOffset != other.list[i].lowOffset ||
    843             list[i].highOffset != other.list[i].highOffset) {
    844                 return FALSE;
    845         }
    846     }
    847 
    848     return TRUE;
    849 }
    850 
    851 UBool OrderList::matchesAt(int32_t offset, const OrderList &other) const
    852 {
    853     // NOTE: sizes include the NULLORDER, which we don't want to compare.
    854     int32_t otherSize = other.size() - 1;
    855 
    856     if (listSize - 1 - offset < otherSize) {
    857         return FALSE;
    858     }
    859 
    860     for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) {
    861         if (getOrder(i) != other.getOrder(j)) {
    862             return FALSE;
    863         }
    864     }
    865 
    866     return TRUE;
    867 }
    868 
    869 static char *printOffsets(char *buffer, OrderList &list)
    870 {
    871     int32_t size = list.size();
    872     char *s = buffer;
    873 
    874     for(int32_t i = 0; i < size; i += 1) {
    875         const Order *order = list.get(i);
    876 
    877         if (i != 0) {
    878             s += sprintf(s, ", ");
    879         }
    880 
    881         s += sprintf(s, "(%d, %d)", order->lowOffset, order->highOffset);
    882     }
    883 
    884     return buffer;
    885 }
    886 
    887 static char *printOrders(char *buffer, OrderList &list)
    888 {
    889     int32_t size = list.size();
    890     char *s = buffer;
    891 
    892     for(int32_t i = 0; i < size; i += 1) {
    893         const Order *order = list.get(i);
    894 
    895         if (i != 0) {
    896             s += sprintf(s, ", ");
    897         }
    898 
    899         s += sprintf(s, "%8.8X", order->order);
    900     }
    901 
    902     return buffer;
    903 }
    904 
    905 void SSearchTest::offsetTest()
    906 {
    907     static const UVersionInfo icu49 = { 4, 9, 0, 0 };
    908     const char *test[] = {
    909         // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous
    910         // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71.
    911         "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0",
    912 
    913         "\\ua191\\u16ef\\u2036\\u017a",
    914 
    915 #if 0
    916         // This results in a complex interaction between contraction,
    917         // expansion and normalization that confuses the backwards offset fixups.
    918         "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
    919 #endif
    920 
    921         "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
    922         "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3",
    923 
    924         "\\u02FE\\u02FF"
    925         "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F"
    926         "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F"
    927         "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F"
    928         "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F"
    929         "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081
    930 
    931         "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081
    932         "a\\u02FF\\u0301\\u0316", // currently not working, see #8081
    933         "a\\u02FF\\u0316\\u0301",
    934         "a\\u0430\\u0301\\u0316",
    935         "a\\u0430\\u0316\\u0301",
    936         "abc\\u0E41\\u0301\\u0316",
    937         "abc\\u0E41\\u0316\\u0301",
    938         "\\u0E41\\u0301\\u0316",
    939         "\\u0E41\\u0316\\u0301",
    940         "a\\u0301\\u0316",
    941         "a\\u0316\\u0301",
    942         "\\uAC52\\uAC53",
    943         "\\u34CA\\u34CB",
    944         "\\u11ED\\u11EE",
    945         "\\u30C3\\u30D0",
    946         "p\\u00E9ch\\u00E9",
    947         "a\\u0301\\u0325",
    948         "a\\u0300\\u0325",
    949         "a\\u0325\\u0300",
    950         "A\\u0323\\u0300B",
    951         "A\\u0300\\u0323B",
    952         "A\\u0301\\u0323B",
    953         "A\\u0302\\u0301\\u0323B",
    954         "abc",
    955         "ab\\u0300c",
    956         "ab\\u0300\\u0323c",
    957         " \\uD800\\uDC00\\uDC00",
    958         "a\\uD800\\uDC00\\uDC00",
    959         "A\\u0301\\u0301",
    960         "A\\u0301\\u0323",
    961         "A\\u0301\\u0323B",
    962         "B\\u0301\\u0323C",
    963         "A\\u0300\\u0323B",
    964         "\\u0301A\\u0301\\u0301",
    965         "abcd\\r\\u0301",
    966         "p\\u00EAche",
    967         "pe\\u0302che",
    968     };
    969 
    970     int32_t testCount = ARRAY_SIZE(test);
    971     UErrorCode status = U_ZERO_ERROR;
    972     RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status);
    973     if (U_FAILURE(status)) {
    974         errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status));
    975         return;
    976     }
    977     char buffer[4096];  // A bit of a hack... just happens to be long enough for all the test cases...
    978                         // We could allocate one that's the right size by (CE_count * 10) + 2
    979                         // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]"
    980 
    981     col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    982 
    983     for(int32_t i = 0; i < testCount; i += 1) {
    984         if (!isICUVersionAtLeast(icu49) && i>=4 && i<=6) {
    985             continue; // timebomb until ticket #8080 is resolved
    986         }
    987         UnicodeString ts = CharsToUnicodeString(test[i]);
    988         CollationElementIterator *iter = col->createCollationElementIterator(ts);
    989         OrderList forwardList;
    990         OrderList backwardList;
    991         int32_t order, low, high;
    992 
    993         do {
    994             low   = iter->getOffset();
    995             order = iter->next(status);
    996             high  = iter->getOffset();
    997 
    998             forwardList.add(order, low, high);
    999         } while (order != CollationElementIterator::NULLORDER);
   1000 
   1001         iter->reset();
   1002         iter->setOffset(ts.length(), status);
   1003 
   1004         backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset());
   1005 
   1006         do {
   1007             high  = iter->getOffset();
   1008             order = iter->previous(status);
   1009             low   = iter->getOffset();
   1010 
   1011             if (order == CollationElementIterator::NULLORDER) {
   1012                 break;
   1013             }
   1014 
   1015             backwardList.add(order, low, high);
   1016         } while (TRUE);
   1017 
   1018         backwardList.reverse();
   1019 
   1020         if (forwardList.compare(backwardList)) {
   1021             logln("Works with \"%s\"", test[i]);
   1022             logln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
   1023 //          logln("Backward offsets: [%s]", printOffsets(buffer, backwardList));
   1024 
   1025             logln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
   1026 //          logln("Backward CEs: [%s]", printOrders(buffer, backwardList));
   1027 
   1028             logln();
   1029         } else {
   1030             errln("Fails with \"%s\"", test[i]);
   1031             infoln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
   1032             infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList));
   1033 
   1034             infoln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
   1035             infoln("Backward CEs: [%s]", printOrders(buffer, backwardList));
   1036 
   1037             infoln();
   1038         }
   1039         delete iter;
   1040     }
   1041     delete col;
   1042 }
   1043 
   1044 #if 0
   1045 static UnicodeString &escape(const UnicodeString &string, UnicodeString &buffer)
   1046 {
   1047     for(int32_t i = 0; i < string.length(); i += 1) {
   1048         UChar32 ch = string.char32At(i);
   1049 
   1050         if (ch >= 0x0020 && ch <= 0x007F) {
   1051             if (ch == 0x005C) {
   1052                 buffer.append("\\\\");
   1053             } else {
   1054                 buffer.append(ch);
   1055             }
   1056         } else {
   1057             char cbuffer[12];
   1058 
   1059             if (ch <= 0xFFFFL) {
   1060                 sprintf(cbuffer, "\\u%4.4X", ch);
   1061             } else {
   1062                 sprintf(cbuffer, "\\U%8.8X", ch);
   1063             }
   1064 
   1065             buffer.append(cbuffer);
   1066         }
   1067 
   1068         if (ch >= 0x10000L) {
   1069             i += 1;
   1070         }
   1071     }
   1072 
   1073     return buffer;
   1074 }
   1075 #endif
   1076 
   1077 #if 1
   1078 
   1079 struct PCE
   1080 {
   1081     uint64_t ce;
   1082     int32_t  lowOffset;
   1083     int32_t  highOffset;
   1084 };
   1085 
   1086 class PCEList
   1087 {
   1088 public:
   1089     PCEList(UCollator *coll, const UnicodeString &string);
   1090     ~PCEList();
   1091 
   1092     int32_t size() const;
   1093 
   1094     const PCE *get(int32_t index) const;
   1095 
   1096     int32_t getLowOffset(int32_t index) const;
   1097     int32_t getHighOffset(int32_t index) const;
   1098     uint64_t getOrder(int32_t index) const;
   1099 
   1100     UBool matchesAt(int32_t offset, const PCEList &other) const;
   1101 
   1102     uint64_t operator[](int32_t index) const;
   1103 
   1104 private:
   1105     void add(uint64_t ce, int32_t low, int32_t high);
   1106 
   1107     PCE *list;
   1108     int32_t listMax;
   1109     int32_t listSize;
   1110 };
   1111 
   1112 PCEList::PCEList(UCollator *coll, const UnicodeString &string)
   1113 {
   1114     UErrorCode status = U_ZERO_ERROR;
   1115     UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
   1116     uint64_t order;
   1117     int32_t low, high;
   1118 
   1119     list = new PCE[listMax];
   1120 
   1121     ucol_setOffset(elems, 0, &status);
   1122 
   1123     do {
   1124         order = ucol_nextProcessed(elems, &low, &high, &status);
   1125         add(order, low, high);
   1126     } while (order != UCOL_PROCESSED_NULLORDER);
   1127 
   1128     ucol_closeElements(elems);
   1129 }
   1130 
   1131 PCEList::~PCEList()
   1132 {
   1133     delete[] list;
   1134 }
   1135 
   1136 void PCEList::add(uint64_t order, int32_t low, int32_t high)
   1137 {
   1138     if (listSize >= listMax) {
   1139         listMax *= 2;
   1140 
   1141         PCE *newList = new PCE[listMax];
   1142 
   1143         uprv_memcpy(newList, list, listSize * sizeof(Order));
   1144         delete[] list;
   1145         list = newList;
   1146     }
   1147 
   1148     list[listSize].ce         = order;
   1149     list[listSize].lowOffset  = low;
   1150     list[listSize].highOffset = high;
   1151 
   1152     listSize += 1;
   1153 }
   1154 
   1155 const PCE *PCEList::get(int32_t index) const
   1156 {
   1157     if (index >= listSize) {
   1158         return NULL;
   1159     }
   1160 
   1161     return &list[index];
   1162 }
   1163 
   1164 int32_t PCEList::getLowOffset(int32_t index) const
   1165 {
   1166     const PCE *pce = get(index);
   1167 
   1168     if (pce != NULL) {
   1169         return pce->lowOffset;
   1170     }
   1171 
   1172     return -1;
   1173 }
   1174 
   1175 int32_t PCEList::getHighOffset(int32_t index) const
   1176 {
   1177     const PCE *pce = get(index);
   1178 
   1179     if (pce != NULL) {
   1180         return pce->highOffset;
   1181     }
   1182 
   1183     return -1;
   1184 }
   1185 
   1186 uint64_t PCEList::getOrder(int32_t index) const
   1187 {
   1188     const PCE *pce = get(index);
   1189 
   1190     if (pce != NULL) {
   1191         return pce->ce;
   1192     }
   1193 
   1194     return UCOL_PROCESSED_NULLORDER;
   1195 }
   1196 
   1197 int32_t PCEList::size() const
   1198 {
   1199     return listSize;
   1200 }
   1201 
   1202 UBool PCEList::matchesAt(int32_t offset, const PCEList &other) const
   1203 {
   1204     // NOTE: sizes include the NULLORDER, which we don't want to compare.
   1205     int32_t otherSize = other.size() - 1;
   1206 
   1207     if (listSize - 1 - offset < otherSize) {
   1208         return FALSE;
   1209     }
   1210 
   1211     for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) {
   1212         if (getOrder(i) != other.getOrder(j)) {
   1213             return FALSE;
   1214         }
   1215     }
   1216 
   1217     return TRUE;
   1218 }
   1219 
   1220 uint64_t PCEList::operator[](int32_t index) const
   1221 {
   1222     return getOrder(index);
   1223 }
   1224 
   1225 void SSearchTest::boyerMooreTest()
   1226 {
   1227     UErrorCode status = U_ZERO_ERROR;
   1228     UCollator *coll = NULL;
   1229     CollData *data = NULL;
   1230     const CEList* ce = NULL;
   1231     const CEList* ce1 = NULL;
   1232     UnicodeString lp  = "fuss";
   1233     UnicodeString sp = "fu\\u00DF";
   1234     BoyerMooreSearch *longPattern = NULL;
   1235     BoyerMooreSearch *shortPattern = NULL;
   1236     UnicodeString targets[]  = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",
   1237                                 "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",
   1238                                 "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"};
   1239     int32_t start = -1, end = -1;
   1240 
   1241     coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);
   1242     if (U_FAILURE(status)) {
   1243         errcheckln(status, "Could not open collator. - %s", u_errorName(status));
   1244         return;
   1245     }
   1246 
   1247     data = CollData::open(coll, status);
   1248     if (U_FAILURE(status)) {
   1249         errln("Could not open CollData object.");
   1250         goto close_data;
   1251     }
   1252 
   1253     data->getDynamicClassID();
   1254     if (U_FAILURE(status)) {
   1255         errln("Could not get dynamic class ID of CollData.");
   1256         goto close_patterns;
   1257     }
   1258 
   1259     data->getStaticClassID();
   1260     if (U_FAILURE(status)) {
   1261         errln("Could not get static class ID of CollData.");
   1262         goto close_patterns;
   1263     }
   1264 
   1265     longPattern = new BoyerMooreSearch(data, lp.unescape(), NULL, status);
   1266     shortPattern = new BoyerMooreSearch(data, sp.unescape(), NULL, status);
   1267     if (U_FAILURE(status)) {
   1268         errln("Could not create pattern objects.");
   1269         goto close_patterns;
   1270     }
   1271 
   1272     longPattern->getBadCharacterTable();
   1273     shortPattern->getBadCharacterTable();
   1274     if (U_FAILURE(status)) {
   1275         errln("Could not get bad character table.");
   1276         goto close_patterns;
   1277     }
   1278 
   1279     longPattern->getGoodSuffixTable();
   1280     shortPattern->getGoodSuffixTable();
   1281     if (U_FAILURE(status)) {
   1282         errln("Could not get good suffix table.");
   1283         goto close_patterns;
   1284     }
   1285 
   1286     longPattern->getDynamicClassID();
   1287     shortPattern->getDynamicClassID();
   1288     if (U_FAILURE(status)) {
   1289         errln("Could not get dynamic class ID of BoyerMooreSearch.");
   1290         goto close_patterns;
   1291     }
   1292 
   1293     longPattern->getStaticClassID();
   1294     shortPattern->getStaticClassID();
   1295     if (U_FAILURE(status)) {
   1296         errln("Could not get static class ID of BoyerMooreSearch.");
   1297         goto close_patterns;
   1298     }
   1299 
   1300     longPattern->getData();
   1301     shortPattern->getData();
   1302     if (U_FAILURE(status)) {
   1303         errln("Could not get collate data.");
   1304         goto close_patterns;
   1305     }
   1306 
   1307     ce = longPattern->getPatternCEs();
   1308     ce1 = shortPattern->getPatternCEs();
   1309     if (U_FAILURE(status)) {
   1310         errln("Could not get pattern CEs.");
   1311         goto close_patterns;
   1312     }
   1313 
   1314     ce->getDynamicClassID();
   1315     ce1->getDynamicClassID();
   1316     if (U_FAILURE(status)) {
   1317         errln("Could not get dynamic class ID of CEList.");
   1318         goto close_patterns;
   1319     }
   1320 
   1321     ce->getStaticClassID();
   1322     ce1->getStaticClassID();
   1323     if (U_FAILURE(status)) {
   1324         errln("Could not get static class ID of CEList.");
   1325         goto close_patterns;
   1326     }
   1327 
   1328     if(data->minLengthInChars(ce,0) != 3){
   1329         errln("Minimal Length in Characters for 'data' with 'ce' was suppose to give 3.");
   1330         goto close_patterns;
   1331     }
   1332 
   1333     if(data->minLengthInChars(ce1,0) != 3){
   1334         errln("Minimal Length in Characters for 'data' with 'ce1' was suppose to give 3.");
   1335         goto close_patterns;
   1336     }
   1337 
   1338     for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {
   1339         UnicodeString target = targets[t].unescape();
   1340 
   1341         longPattern->setTargetString(&target, status);
   1342         if (longPattern->search(0, start, end)) {
   1343             logln("Test %d: found long pattern at [%d, %d].", t, start, end);
   1344         } else {
   1345             errln("Test %d: did not find long pattern.", t);
   1346         }
   1347 
   1348         shortPattern->setTargetString(&target, status);
   1349         if (shortPattern->search(0, start, end)) {
   1350             logln("Test %d: found short pattern at [%d, %d].", t, start, end);
   1351         } else {
   1352             errln("Test %d: did not find short pattern.", t);
   1353         }
   1354 
   1355         if(longPattern->empty()){
   1356             errln("Test %d: Long pattern should not have been empty.");
   1357         }
   1358 
   1359         if(shortPattern->empty()){
   1360             errln("Test %d: Short pattern should not have been empty.");
   1361         }
   1362     }
   1363 
   1364 close_patterns:
   1365     delete shortPattern;
   1366     delete longPattern;
   1367 
   1368 close_data:
   1369     CollData::close(data);
   1370     ucol_close(coll);
   1371 }
   1372 
   1373 void SSearchTest::bmsTest()
   1374 {
   1375     UErrorCode status = U_ZERO_ERROR;
   1376     UCollator *coll = NULL;
   1377     UCD *data = NULL;
   1378     UnicodeString lp  = "fuss";
   1379     UnicodeString lpu = lp.unescape();
   1380     UnicodeString sp  = "fu\\u00DF";
   1381     UnicodeString spu = sp.unescape();
   1382     BMS *longPattern = NULL;
   1383     BMS *shortPattern = NULL;
   1384     UnicodeString targets[]  = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",
   1385                                 "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",
   1386                                 "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"};
   1387     int32_t start = -1, end = -1;
   1388 
   1389     coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);
   1390     if (U_FAILURE(status)) {
   1391         errcheckln(status, "Could not open collator. - %s", u_errorName(status));
   1392         return;
   1393     }
   1394 
   1395     data = ucd_open(coll, &status);
   1396     if (U_FAILURE(status)) {
   1397         errln("Could not open CollData object.");
   1398         goto close_data;
   1399     }
   1400 
   1401     longPattern = bms_open(data, lpu.getBuffer(), lpu.length(), NULL, 0, &status);
   1402     shortPattern = bms_open(data, spu.getBuffer(), spu.length(), NULL, 0, &status);
   1403     if (U_FAILURE(status)) {
   1404         errln("Couldn't open pattern objects.");
   1405         goto close_patterns;
   1406     }
   1407 
   1408     for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {
   1409         UnicodeString target = targets[t].unescape();
   1410 
   1411         bms_setTargetString(longPattern, target.getBuffer(), target.length(), &status);
   1412         if (bms_search(longPattern, 0, &start, &end)) {
   1413             logln("Test %d: found long pattern at [%d, %d].", t, start, end);
   1414         } else {
   1415             errln("Test %d: did not find long pattern.", t);
   1416         }
   1417 
   1418         bms_setTargetString(shortPattern, target.getBuffer(), target.length(), &status);
   1419         if (bms_search(shortPattern, 0, &start, &end)) {
   1420             logln("Test %d: found short pattern at [%d, %d].", t, start, end);
   1421         } else {
   1422             errln("Test %d: did not find short pattern.", t);
   1423         }
   1424     }
   1425 
   1426     /* Add better coverage for bms code. */
   1427     if(bms_empty(longPattern)) {
   1428         errln("FAIL: longgPattern is empty.");
   1429     }
   1430 
   1431     if (!bms_getData(longPattern)) {
   1432         errln("FAIL: bms_getData returned NULL.");
   1433     }
   1434 
   1435     if (!ucd_getCollator(data)) {
   1436         errln("FAIL: ucd_getCollator returned NULL.");
   1437     }
   1438 
   1439 close_patterns:
   1440     bms_close(shortPattern);
   1441     bms_close(longPattern);
   1442 
   1443 close_data:
   1444     ucd_close(data);
   1445     ucd_freeCache();
   1446     ucol_close(coll);
   1447 }
   1448 
   1449 void SSearchTest::goodSuffixTest()
   1450 {
   1451     UErrorCode status = U_ZERO_ERROR;
   1452     UCollator *coll = NULL;
   1453     CollData *data = NULL;
   1454     UnicodeString pat = /*"gcagagag"*/ "fxeld";
   1455     UnicodeString target = /*"gcatcgcagagagtatacagtacg"*/ "cloveldfxeld";
   1456     BoyerMooreSearch *pattern = NULL;
   1457     int32_t start = -1, end = -1;
   1458 
   1459     coll = ucol_open(NULL, &status);
   1460     if (U_FAILURE(status)) {
   1461         errcheckln(status, "Couldn't open collator. - %s", u_errorName(status));
   1462         return;
   1463     }
   1464 
   1465     data = CollData::open(coll, status);
   1466     if (U_FAILURE(status)) {
   1467         errln("Couldn't open CollData object.");
   1468         goto close_data;
   1469     }
   1470 
   1471     pattern = new BoyerMooreSearch(data, pat, &target, status);
   1472     if (U_FAILURE(status)) {
   1473         errln("Couldn't open pattern object.");
   1474         goto close_pattern;
   1475     }
   1476 
   1477     if (pattern->search(0, start, end)) {
   1478         logln("Found pattern at [%d, %d].", start, end);
   1479     } else {
   1480         errln("Did not find pattern.");
   1481     }
   1482 
   1483 close_pattern:
   1484     delete pattern;
   1485 
   1486 close_data:
   1487     CollData::close(data);
   1488     ucol_close(coll);
   1489 }
   1490 
   1491 //
   1492 //  searchTime()    A quick and dirty performance test for string search.
   1493 //                  Probably  doesn't really belong as part of intltest, but it
   1494 //                  does check that the search succeeds, and gets the right result,
   1495 //                  so it serves as a functionality test also.
   1496 //
   1497 //                  To run as a perf test, up the loop count, select by commenting
   1498 //                  and uncommenting in the code the operation to be measured,
   1499 //                  rebuild, and measure the running time of this test alone.
   1500 //
   1501 //                     time LD_LIBRARY_PATH=whatever  ./intltest  collate/SSearchTest/searchTime
   1502 //
   1503 void SSearchTest::searchTime() {
   1504     static const char *longishText =
   1505 "Whylom, as olde stories tellen us,\n"
   1506 "Ther was a duk that highte Theseus:\n"
   1507 "Of Athenes he was lord and governour,\n"
   1508 "And in his tyme swich a conquerour,\n"
   1509 "That gretter was ther noon under the sonne.\n"
   1510 "Ful many a riche contree hadde he wonne;\n"
   1511 "What with his wisdom and his chivalrye,\n"
   1512 "He conquered al the regne of Femenye,\n"
   1513 "That whylom was y-cleped Scithia;\n"
   1514 "And weddede the quene Ipolita,\n"
   1515 "And broghte hir hoom with him in his contree\n"
   1516 "With muchel glorie and greet solempnitee,\n"
   1517 "And eek hir yonge suster Emelye.\n"
   1518 "And thus with victorie and with melodye\n"
   1519 "Lete I this noble duk to Athenes ryde,\n"
   1520 "And al his hoost, in armes, him bisyde.\n"
   1521 "And certes, if it nere to long to here,\n"
   1522 "I wolde han told yow fully the manere,\n"
   1523 "How wonnen was the regne of Femenye\n"
   1524 "By Theseus, and by his chivalrye;\n"
   1525 "And of the grete bataille for the nones\n"
   1526 "Bitwixen Athen's and Amazones;\n"
   1527 "And how asseged was Ipolita,\n"
   1528 "The faire hardy quene of Scithia;\n"
   1529 "And of the feste that was at hir weddinge,\n"
   1530 "And of the tempest at hir hoom-cominge;\n"
   1531 "But al that thing I moot as now forbere.\n"
   1532 "I have, God woot, a large feeld to ere,\n"
   1533 "And wayke been the oxen in my plough.\n"
   1534 "The remenant of the tale is long y-nough.\n"
   1535 "I wol nat letten eek noon of this route;\n"
   1536 "Lat every felawe telle his tale aboute,\n"
   1537 "And lat see now who shal the soper winne;\n"
   1538 "And ther I lefte, I wol ageyn biginne.\n"
   1539 "This duk, of whom I make mencioun,\n"
   1540 "When he was come almost unto the toun,\n"
   1541 "In al his wele and in his moste pryde,\n"
   1542 "He was war, as he caste his eye asyde,\n"
   1543 "Wher that ther kneled in the hye weye\n"
   1544 "A companye of ladies, tweye and tweye,\n"
   1545 "Ech after other, clad in clothes blake; \n"
   1546 "But swich a cry and swich a wo they make,\n"
   1547 "That in this world nis creature livinge,\n"
   1548 "That herde swich another weymentinge;\n"
   1549 "And of this cry they nolde never stenten,\n"
   1550 "Til they the reynes of his brydel henten.\n"
   1551 "'What folk ben ye, that at myn hoomcominge\n"
   1552 "Perturben so my feste with cryinge'?\n"
   1553 "Quod Theseus, 'have ye so greet envye\n"
   1554 "Of myn honour, that thus compleyne and crye? \n"
   1555 "Or who hath yow misboden, or offended?\n"
   1556 "And telleth me if it may been amended;\n"
   1557 "And why that ye ben clothed thus in blak'?\n"
   1558 "The eldest lady of hem alle spak,\n"
   1559 "When she hadde swowned with a deedly chere,\n"
   1560 "That it was routhe for to seen and here,\n"
   1561 "And seyde: 'Lord, to whom Fortune hath yiven\n"
   1562 "Victorie, and as a conquerour to liven,\n"
   1563 "Noght greveth us your glorie and your honour;\n"
   1564 "But we biseken mercy and socour.\n"
   1565 "Have mercy on our wo and our distresse.\n"
   1566 "Som drope of pitee, thurgh thy gentilesse,\n"
   1567 "Up-on us wrecched wommen lat thou falle.\n"
   1568 "For certes, lord, ther nis noon of us alle,\n"
   1569 "That she nath been a duchesse or a quene;\n"
   1570 "Now be we caitifs, as it is wel sene:\n"
   1571 "Thanked be Fortune, and hir false wheel,\n"
   1572 "That noon estat assureth to be weel.\n"
   1573 "And certes, lord, t'abyden your presence,\n"
   1574 "Here in the temple of the goddesse Clemence\n"
   1575 "We han ben waytinge al this fourtenight;\n"
   1576 "Now help us, lord, sith it is in thy might.\n"
   1577 "I wrecche, which that wepe and waille thus,\n"
   1578 "Was whylom wyf to king Capaneus,\n"
   1579 "That starf at Thebes, cursed be that day!\n"
   1580 "And alle we, that been in this array,\n"
   1581 "And maken al this lamentacioun,\n"
   1582 "We losten alle our housbondes at that toun,\n"
   1583 "Whyl that the sege ther-aboute lay.\n"
   1584 "And yet now th'olde Creon, weylaway!\n"
   1585 "The lord is now of Thebes the citee, \n"
   1586 "Fulfild of ire and of iniquitee,\n"
   1587 "He, for despyt, and for his tirannye,\n"
   1588 "To do the dede bodyes vileinye,\n"
   1589 "Of alle our lordes, whiche that ben slawe,\n"
   1590 "Hath alle the bodyes on an heep y-drawe,\n"
   1591 "And wol nat suffren hem, by noon assent,\n"
   1592 "Neither to been y-buried nor y-brent,\n"
   1593 "But maketh houndes ete hem in despyt. zet'\n";
   1594 
   1595 #define TEST_BOYER_MOORE 1
   1596 const char *cPattern = "maketh houndes ete hem";
   1597 //const char *cPattern = "Whylom";
   1598 //const char *cPattern = "zet";
   1599     const char *testId = "searchTime()";   // for error macros.
   1600     UnicodeString target = longishText;
   1601     UErrorCode status = U_ZERO_ERROR;
   1602 
   1603 
   1604     LocalUCollatorPointer collator(ucol_open("en", &status));
   1605     CollData *data = CollData::open(collator.getAlias(), status);
   1606     if (U_FAILURE(status) || collator.isNull() || data == NULL) {
   1607         errcheckln(status, "Unable to open UCollator or CollData. - %s", u_errorName(status));
   1608         return;
   1609     }
   1610     //ucol_setStrength(collator.getAlias(), collatorStrength);
   1611     //ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status);
   1612     UnicodeString uPattern = cPattern;
   1613 #ifndef TEST_BOYER_MOORE
   1614     LocalUStringSearchPointer uss(usearch_openFromCollator(uPattern.getBuffer(), uPattern.length(),
   1615                                                            target.getBuffer(), target.length(),
   1616                                                            collator.getAlias(),
   1617                                                            NULL,     // the break iterator
   1618                                                            &status));
   1619     TEST_ASSERT_SUCCESS(status);
   1620 #else
   1621     BoyerMooreSearch bms(data, uPattern, &target, status);
   1622     TEST_ASSERT_SUCCESS(status);
   1623 #endif
   1624 
   1625 //  int32_t foundStart;
   1626 //  int32_t foundEnd;
   1627     UBool   found;
   1628 
   1629     // Find the match position usgin strstr
   1630     const char *pm = strstr(longishText, cPattern);
   1631     TEST_ASSERT_M(pm!=NULL, "No pattern match with strstr");
   1632     int32_t  refMatchPos = (int32_t)(pm - longishText);
   1633     int32_t  icuMatchPos;
   1634     int32_t  icuMatchEnd;
   1635 #ifndef TEST_BOYER_MOORE
   1636     usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status);
   1637     TEST_ASSERT_SUCCESS(status);
   1638 #else
   1639     found = bms.search(0, icuMatchPos, icuMatchEnd);
   1640 #endif
   1641     TEST_ASSERT_M(refMatchPos == icuMatchPos, "strstr and icu give different match positions.");
   1642 
   1643     int32_t i;
   1644     // int32_t j=0;
   1645 
   1646     // Try loopcounts around 100000 to some millions, depending on the operation,
   1647     //   to get runtimes of at least several seconds.
   1648     for (i=0; i<10000; i++) {
   1649 #ifndef TEST_BOYER_MOORE
   1650         found = usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status);
   1651 #else
   1652         found = bms.search(0, icuMatchPos, icuMatchEnd);
   1653 #endif
   1654         //TEST_ASSERT_SUCCESS(status);
   1655         //TEST_ASSERT(found);
   1656 
   1657         // usearch_setOffset(uss.getAlias(), 0, &status);
   1658         // icuMatchPos = usearch_next(uss.getAlias(), &status);
   1659 
   1660          // The i+j stuff is to confuse the optimizer and get it to actually leave the
   1661          //   call to strstr in place.
   1662          //pm = strstr(longishText+j, cPattern);
   1663          //j = (j + i)%5;
   1664     }
   1665 
   1666     //printf("%ld, %d\n", pm-longishText, j);
   1667 #ifdef TEST_BOYER_MOORE
   1668     CollData::close(data);
   1669 #endif
   1670 }
   1671 #endif
   1672 
   1673 //----------------------------------------------------------------------------------------
   1674 //
   1675 //   Random Numbers.  Similar to standard lib rand() and srand()
   1676 //                    Not using library to
   1677 //                      1.  Get same results on all platforms.
   1678 //                      2.  Get access to current seed, to more easily reproduce failures.
   1679 //
   1680 //---------------------------------------------------------------------------------------
   1681 static uint32_t m_seed = 1;
   1682 
   1683 static uint32_t m_rand()
   1684 {
   1685     m_seed = m_seed * 1103515245 + 12345;
   1686     return (uint32_t)(m_seed/65536) % 32768;
   1687 }
   1688 
   1689 class Monkey
   1690 {
   1691 public:
   1692     virtual void append(UnicodeString &test, UnicodeString &alternate) = 0;
   1693 
   1694 protected:
   1695     Monkey();
   1696     virtual ~Monkey();
   1697 };
   1698 
   1699 Monkey::Monkey()
   1700 {
   1701     // ook?
   1702 }
   1703 
   1704 Monkey::~Monkey()
   1705 {
   1706     // ook?
   1707 }
   1708 
   1709 class SetMonkey : public Monkey
   1710 {
   1711 public:
   1712     SetMonkey(const USet *theSet);
   1713     ~SetMonkey();
   1714 
   1715     virtual void append(UnicodeString &test, UnicodeString &alternate);
   1716 
   1717 private:
   1718     const USet *set;
   1719 };
   1720 
   1721 SetMonkey::SetMonkey(const USet *theSet)
   1722     : Monkey(), set(theSet)
   1723 {
   1724     // ook?
   1725 }
   1726 
   1727 SetMonkey::~SetMonkey()
   1728 {
   1729     //ook...
   1730 }
   1731 
   1732 void SetMonkey::append(UnicodeString &test, UnicodeString &alternate)
   1733 {
   1734     int32_t size = uset_size(set);
   1735     int32_t index = m_rand() % size;
   1736     UChar32 ch = uset_charAt(set, index);
   1737     UnicodeString str(ch);
   1738 
   1739     test.append(str);
   1740     alternate.append(str); // flip case, or some junk?
   1741 }
   1742 
   1743 class StringSetMonkey : public Monkey
   1744 {
   1745 public:
   1746     StringSetMonkey(const USet *theSet, UCollator *theCollator, CollData *theCollData);
   1747     ~StringSetMonkey();
   1748 
   1749     void append(UnicodeString &testCase, UnicodeString &alternate);
   1750 
   1751 private:
   1752     UnicodeString &generateAlternative(const UnicodeString &testCase, UnicodeString &alternate);
   1753 
   1754     const USet *set;
   1755     UCollator  *coll;
   1756     CollData   *collData;
   1757 };
   1758 
   1759 StringSetMonkey::StringSetMonkey(const USet *theSet, UCollator *theCollator, CollData *theCollData)
   1760 : Monkey(), set(theSet), coll(theCollator), collData(theCollData)
   1761 {
   1762     // ook.
   1763 }
   1764 
   1765 StringSetMonkey::~StringSetMonkey()
   1766 {
   1767     // ook?
   1768 }
   1769 
   1770 void StringSetMonkey::append(UnicodeString &testCase, UnicodeString &alternate)
   1771 {
   1772     int32_t itemCount = uset_getItemCount(set), len = 0;
   1773     int32_t index = m_rand() % itemCount;
   1774     UChar32 rangeStart = 0, rangeEnd = 0;
   1775     UChar buffer[16];
   1776     UErrorCode err = U_ZERO_ERROR;
   1777 
   1778     len = uset_getItem(set, index, &rangeStart, &rangeEnd, buffer, 16, &err);
   1779 
   1780     if (len == 0) {
   1781         int32_t offset = m_rand() % (rangeEnd - rangeStart + 1);
   1782         UChar32 ch = rangeStart + offset;
   1783         UnicodeString str(ch);
   1784 
   1785         testCase.append(str);
   1786         generateAlternative(str, alternate);
   1787     } else if (len > 0) {
   1788         // should check that len < 16...
   1789         UnicodeString str(buffer, len);
   1790 
   1791         testCase.append(str);
   1792         generateAlternative(str, alternate);
   1793     } else {
   1794         // shouldn't happen...
   1795     }
   1796 }
   1797 
   1798 UnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCase, UnicodeString &alternate)
   1799 {
   1800     // find out shortest string for the longest sequence of ces.
   1801     // needs to be refined to use dynamic programming, but will be roughly right
   1802     UErrorCode status = U_ZERO_ERROR;
   1803     CEList ceList(coll, testCase, status);
   1804     UnicodeString alt;
   1805     int32_t offset = 0;
   1806 
   1807     if (ceList.size() == 0) {
   1808         return alternate.append(testCase);
   1809     }
   1810 
   1811     while (offset < ceList.size()) {
   1812         int32_t ce = ceList.get(offset);
   1813         const StringList *strings = collData->getStringList(ce);
   1814 
   1815         if (strings == NULL) {
   1816             return alternate.append(testCase);
   1817         }
   1818 
   1819         int32_t stringCount = strings->size();
   1820         int32_t tries = 0;
   1821 
   1822         // find random string that generates the same CEList
   1823         const CEList *ceList2 = NULL;
   1824         const UnicodeString *string = NULL;
   1825               UBool matches = FALSE;
   1826 
   1827         do {
   1828             int32_t s = m_rand() % stringCount;
   1829 
   1830             if (tries++ > stringCount) {
   1831                 alternate.append(testCase);
   1832                 return alternate;
   1833             }
   1834 
   1835             string = strings->get(s);
   1836             ceList2 = collData->getCEList(string);
   1837             matches = ceList.matchesAt(offset, ceList2);
   1838 
   1839             if (! matches) {
   1840                 collData->freeCEList((CEList *) ceList2);
   1841             }
   1842         } while (! matches);
   1843 
   1844         alt.append(*string);
   1845         offset += ceList2->size();
   1846         collData->freeCEList(ceList2);
   1847     }
   1848 
   1849     const CEList altCEs(coll, alt, status);
   1850 
   1851     if (ceList.matchesAt(0, &altCEs)) {
   1852         return alternate.append(alt);
   1853     }
   1854 
   1855     return alternate.append(testCase);
   1856 }
   1857 
   1858 static void generateTestCase(UCollator *coll, Monkey *monkeys[], int32_t monkeyCount, UnicodeString &testCase, UnicodeString &alternate)
   1859 {
   1860     int32_t pieces = (m_rand() % 4) + 1;
   1861     UErrorCode status = U_ZERO_ERROR;
   1862     UBool matches;
   1863 
   1864     do {
   1865         testCase.remove();
   1866         alternate.remove();
   1867         monkeys[0]->append(testCase, alternate);
   1868 
   1869         for(int32_t piece = 0; piece < pieces; piece += 1) {
   1870             int32_t monkey = m_rand() % monkeyCount;
   1871 
   1872             monkeys[monkey]->append(testCase, alternate);
   1873         }
   1874 
   1875         const CEList ceTest(coll, testCase, status);
   1876         const CEList ceAlt(coll, alternate, status);
   1877 
   1878         matches = ceTest.matchesAt(0, &ceAlt);
   1879     } while (! matches);
   1880 }
   1881 
   1882 //
   1883 //  Find the next acceptable boundary following the specified starting index
   1884 //     in the target text being searched.
   1885 //      TODO:  refine what is an acceptable boundary.  For the moment,
   1886 //             choose the next position not within a combining sequence.
   1887 //
   1888 #if 0
   1889 static int32_t nextBoundaryAfter(const UnicodeString &string, int32_t startIndex) {
   1890     const UChar *text = string.getBuffer();
   1891     int32_t textLen   = string.length();
   1892 
   1893     if (startIndex >= textLen) {
   1894         return startIndex;
   1895     }
   1896 
   1897     UChar32  c;
   1898     int32_t  i = startIndex;
   1899 
   1900     U16_NEXT(text, i, textLen, c);
   1901 
   1902     // If we are on a control character, stop without looking for combining marks.
   1903     //    Control characters do not combine.
   1904     int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
   1905     if (gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR) {
   1906         return i;
   1907     }
   1908 
   1909     // The initial character was not a control, and can thus accept trailing
   1910     //   combining characters.  Advance over however many of them there are.
   1911     int32_t  indexOfLastCharChecked;
   1912 
   1913     for (;;) {
   1914         indexOfLastCharChecked = i;
   1915 
   1916         if (i>=textLen) {
   1917             break;
   1918         }
   1919 
   1920         U16_NEXT(text, i, textLen, c);
   1921         gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
   1922 
   1923         if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
   1924             break;
   1925         }
   1926     }
   1927 
   1928     return indexOfLastCharChecked;
   1929 }
   1930 #endif
   1931 
   1932 #if 0
   1933 static UBool isInCombiningSequence(const UnicodeString &string, int32_t index) {
   1934     const UChar *text = string.getBuffer();
   1935     int32_t textLen   = string.length();
   1936 
   1937     if (index>=textLen || index<=0) {
   1938         return FALSE;
   1939     }
   1940 
   1941     // If the character at the current index is not a GRAPHEME_EXTEND
   1942     //    then we can not be within a combining sequence.
   1943     UChar32  c;
   1944     U16_GET(text, 0, index, textLen, c);
   1945     int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
   1946     if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
   1947         return FALSE;
   1948     }
   1949 
   1950     // We are at a combining mark.  If the preceding character is anything
   1951     //   except a CONTROL, CR or LF, we are in a combining sequence.
   1952     U16_PREV(text, 0, index, c);
   1953     gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
   1954 
   1955     return !(gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR);
   1956 }
   1957 #endif
   1958 
   1959 static UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t offset, const UnicodeString &pattern, int32_t &matchStart, int32_t &matchEnd)
   1960 {
   1961     UErrorCode      status = U_ZERO_ERROR;
   1962     OrderList       targetOrders(coll, target, offset);
   1963     OrderList       patternOrders(coll, pattern);
   1964     int32_t         targetSize  = targetOrders.size() - 1;
   1965     int32_t         patternSize = patternOrders.size() - 1;
   1966     UBreakIterator *charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
   1967                                                   target.getBuffer(), target.length(), &status);
   1968 
   1969     if (patternSize == 0) {
   1970         // Searching for an empty pattern always fails
   1971         matchStart = matchEnd = -1;
   1972         ubrk_close(charBreakIterator);
   1973         return FALSE;
   1974     }
   1975 
   1976     matchStart = matchEnd = -1;
   1977 
   1978     for(int32_t i = 0; i < targetSize; i += 1) {
   1979         if (targetOrders.matchesAt(i, patternOrders)) {
   1980             int32_t start    = targetOrders.getLowOffset(i);
   1981             int32_t maxLimit = targetOrders.getLowOffset(i + patternSize);
   1982             int32_t minLimit = targetOrders.getLowOffset(i + patternSize - 1);
   1983 
   1984             // if the low and high offsets of the first CE in
   1985             // the match are the same, it means that the match
   1986             // starts in the middle of an expansion - all but
   1987             // the first CE of the expansion will have the offset
   1988             // of the following character.
   1989             if (start == targetOrders.getHighOffset(i)) {
   1990                 continue;
   1991             }
   1992 
   1993             // Make sure match starts on a grapheme boundary
   1994             if (! ubrk_isBoundary(charBreakIterator, start)) {
   1995                 continue;
   1996             }
   1997 
   1998             // If the low and high offsets of the CE after the match
   1999             // are the same, it means that the match ends in the middle
   2000             // of an expansion sequence.
   2001             if (maxLimit == targetOrders.getHighOffset(i + patternSize) &&
   2002                 targetOrders.getOrder(i + patternSize) != UCOL_NULLORDER) {
   2003                 continue;
   2004             }
   2005 
   2006             int32_t mend = maxLimit;
   2007 
   2008             // Find the first grapheme break after the character index
   2009             // of the last CE in the match. If it's after character index
   2010             // that's after the last CE in the match, use that index
   2011             // as the end of the match.
   2012             if (minLimit < maxLimit) {
   2013                 // When the last CE's low index is same with its high index, the CE is likely
   2014                 // a part of expansion. In this case, the index is located just after the
   2015                 // character corresponding to the CEs compared above. If the index is right
   2016                 // at the break boundary, move the position to the next boundary will result
   2017                 // incorrect match length when there are ignorable characters exist between
   2018                 // the position and the next character produces CE(s). See ticket#8482.
   2019                 if (minLimit == targetOrders.getHighOffset(i + patternSize - 1) && ubrk_isBoundary(charBreakIterator, minLimit)) {
   2020                     mend = minLimit;
   2021                 } else {
   2022                     int32_t nba = ubrk_following(charBreakIterator, minLimit);
   2023 
   2024                     if (nba >= targetOrders.getHighOffset(i + patternSize - 1)) {
   2025                         mend = nba;
   2026                     }
   2027                 }
   2028             }
   2029 
   2030             if (mend > maxLimit) {
   2031                 continue;
   2032             }
   2033 
   2034             if (! ubrk_isBoundary(charBreakIterator, mend)) {
   2035                 continue;
   2036             }
   2037 
   2038             matchStart = start;
   2039             matchEnd   = mend;
   2040 
   2041             ubrk_close(charBreakIterator);
   2042             return TRUE;
   2043         }
   2044     }
   2045 
   2046     ubrk_close(charBreakIterator);
   2047     return FALSE;
   2048 }
   2049 
   2050 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
   2051 static int32_t  getIntParam(UnicodeString name, UnicodeString &params, int32_t defaultVal) {
   2052     int32_t val = defaultVal;
   2053 
   2054     name.append(" *= *(-?\\d+)");
   2055 
   2056     UErrorCode status = U_ZERO_ERROR;
   2057     RegexMatcher m(name, params, 0, status);
   2058 
   2059     if (m.find()) {
   2060         // The param exists.  Convert the string to an int.
   2061         char valString[100];
   2062         int32_t paramLength = m.end(1, status) - m.start(1, status);
   2063 
   2064         if (paramLength >= (int32_t)(sizeof(valString)-1)) {
   2065             paramLength = (int32_t)(sizeof(valString)-2);
   2066         }
   2067 
   2068         params.extract(m.start(1, status), paramLength, valString, sizeof(valString));
   2069         val = strtol(valString,  NULL, 10);
   2070 
   2071         // Delete this parameter from the params string.
   2072         m.reset();
   2073         params = m.replaceFirst("", status);
   2074     }
   2075 
   2076   //U_ASSERT(U_SUCCESS(status));
   2077     if (! U_SUCCESS(status)) {
   2078         val = defaultVal;
   2079     }
   2080 
   2081     return val;
   2082 }
   2083 #endif
   2084 
   2085 #if !UCONFIG_NO_COLLATION
   2086 int32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
   2087                                     const char *name, const char *strength, uint32_t seed)
   2088 {
   2089     UErrorCode status = U_ZERO_ERROR;
   2090     int32_t actualStart = -1, actualEnd = -1;
   2091   //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length();
   2092     int32_t expectedStart = -1, expectedEnd = -1;
   2093     int32_t notFoundCount = 0;
   2094     LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
   2095                                                            testCase.getBuffer(), testCase.length(),
   2096                                                            coll,
   2097                                                            NULL,     // the break iterator
   2098                                                            &status));
   2099 
   2100     // **** TODO: find *all* matches, not just first one ****
   2101     simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd);
   2102 
   2103     usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status);
   2104 
   2105     if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
   2106         errln("Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
   2107               "    strength=%s seed=%d",
   2108               name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
   2109     }
   2110 
   2111     if (expectedStart == -1 && actualStart == -1) {
   2112         notFoundCount += 1;
   2113     }
   2114 
   2115     // **** TODO: find *all* matches, not just first one ****
   2116     simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd);
   2117 
   2118     usearch_setPattern(uss.getAlias(), altPattern.getBuffer(), altPattern.length(), &status);
   2119 
   2120     usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status);
   2121 
   2122     if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
   2123         errln("Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
   2124               "    strength=%s seed=%d",
   2125               name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
   2126     }
   2127 
   2128     if (expectedStart == -1 && actualStart == -1) {
   2129         notFoundCount += 1;
   2130     }
   2131 
   2132     return notFoundCount;
   2133 }
   2134 
   2135 static void hexForUnicodeString(const UnicodeString &ustr, char * cbuf, int32_t cbuflen)
   2136 {
   2137     int32_t ustri, ustrlen = ustr.length();
   2138 
   2139     for (ustri = 0; ustri < ustrlen; ++ustri) {
   2140         if (cbuflen >= 9 /* format width for single code unit(5) + terminating ellipsis(3) + null(1) */) {
   2141             int len = sprintf(cbuf, " %04X", ustr.charAt(ustri));
   2142             cbuflen -= len;
   2143             cbuf += len;
   2144         } else {
   2145             if (cbuflen >= 4 /* terminating ellipsis(3) + null(1) */) {
   2146                 sprintf(cbuf, "...");
   2147             } else if (cbuflen >= 1) {
   2148                 cbuf = 0;
   2149             }
   2150             break;
   2151         }
   2152     }
   2153 }
   2154 
   2155 int32_t SSearchTest::bmMonkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
   2156                                     BoyerMooreSearch *bms, BoyerMooreSearch *abms,
   2157                                     const char *name, const char *strength, uint32_t seed)
   2158 {
   2159     UErrorCode status = U_ZERO_ERROR;
   2160     int32_t actualStart = -1, actualEnd = -1;
   2161   //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length();
   2162     int32_t expectedStart = -1, expectedEnd = -1;
   2163     int32_t notFoundCount = 0;
   2164     char    hexbuf[128];
   2165 
   2166     // **** TODO: find *all* matches, not just first one ****
   2167     simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd);
   2168 
   2169     bms->setTargetString(&testCase, status);
   2170     bms->search(0, actualStart, actualEnd);
   2171 
   2172     if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
   2173         hexForUnicodeString(pattern, hexbuf, sizeof(hexbuf));
   2174         errln("Boyer-Moore Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
   2175               "    strength=%s seed=%d <pattern>: %s",
   2176               name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed, hexbuf);
   2177     }
   2178 
   2179     if (expectedStart == -1 && actualStart == -1) {
   2180         notFoundCount += 1;
   2181     }
   2182 
   2183     // **** TODO: find *all* matches, not just first one ****
   2184     simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd);
   2185 
   2186     abms->setTargetString(&testCase, status);
   2187     abms->search(0, actualStart, actualEnd);
   2188 
   2189     if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
   2190         hexForUnicodeString(altPattern, hexbuf, sizeof(hexbuf));
   2191         errln("Boyer-Moore Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
   2192               "    strength=%s seed=%d <pattern>: %s",
   2193               name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed, hexbuf);
   2194     }
   2195 
   2196     if (expectedStart == -1 && actualStart == -1) {
   2197         notFoundCount += 1;
   2198     }
   2199 
   2200 
   2201     return notFoundCount;
   2202 }
   2203 #endif
   2204 
   2205 void SSearchTest::monkeyTest(char *params)
   2206 {
   2207     // ook!
   2208     UErrorCode status = U_ZERO_ERROR;
   2209   //UCollator *coll = ucol_open(NULL, &status);
   2210     UCollator *coll = ucol_openFromShortString("S1", FALSE, NULL, &status);
   2211 
   2212     if (U_FAILURE(status)) {
   2213         errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status));
   2214         return;
   2215     }
   2216 
   2217     CollData  *monkeyData = CollData::open(coll, status);
   2218 
   2219     USet *expansions   = uset_openEmpty();
   2220     USet *contractions = uset_openEmpty();
   2221 
   2222     ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
   2223 
   2224     U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
   2225     U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
   2226     USet *letters = uset_openPattern(letter_pattern, 39, &status);
   2227     SetMonkey letterMonkey(letters);
   2228     StringSetMonkey contractionMonkey(contractions, coll, monkeyData);
   2229     StringSetMonkey expansionMonkey(expansions, coll, monkeyData);
   2230     UnicodeString testCase;
   2231     UnicodeString alternate;
   2232     UnicodeString pattern, altPattern;
   2233     UnicodeString prefix, altPrefix;
   2234     UnicodeString suffix, altSuffix;
   2235 
   2236     Monkey *monkeys[] = {
   2237         &letterMonkey,
   2238         &contractionMonkey,
   2239         &expansionMonkey,
   2240         &contractionMonkey,
   2241         &expansionMonkey,
   2242         &contractionMonkey,
   2243         &expansionMonkey,
   2244         &contractionMonkey,
   2245         &expansionMonkey};
   2246     int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]);
   2247     // int32_t nonMatchCount = 0;
   2248 
   2249     UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY};
   2250     const char *strengthNames[] = {"primary", "secondary", "tertiary"};
   2251     int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);
   2252     int32_t loopCount = quick? 1000 : 10000;
   2253     int32_t firstStrength = 0;
   2254     int32_t lastStrength  = strengthCount - 1; //*/ 0;
   2255 
   2256     if (params != NULL) {
   2257 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
   2258         UnicodeString p(params);
   2259 
   2260         loopCount = getIntParam("loop", p, loopCount);
   2261         m_seed    = getIntParam("seed", p, m_seed);
   2262 
   2263         RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status);
   2264         if (m.find()) {
   2265             UnicodeString breakType = m.group(1, status);
   2266 
   2267             for (int32_t s = 0; s < strengthCount; s += 1) {
   2268                 if (breakType == strengthNames[s]) {
   2269                     firstStrength = lastStrength = s;
   2270                     break;
   2271                 }
   2272             }
   2273 
   2274             m.reset();
   2275             p = m.replaceFirst("", status);
   2276         }
   2277 
   2278         if (RegexMatcher("\\S", p, 0, status).find()) {
   2279             // Each option is stripped out of the option string as it is processed.
   2280             // All options have been checked.  The option string should have been completely emptied..
   2281             char buf[100];
   2282             p.extract(buf, sizeof(buf), NULL, status);
   2283             buf[sizeof(buf)-1] = 0;
   2284             errln("Unrecognized or extra parameter:  %s\n", buf);
   2285             return;
   2286         }
   2287 #else
   2288         infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters.");
   2289 #endif
   2290     }
   2291 
   2292     for(int32_t s = firstStrength; s <= lastStrength; s += 1) {
   2293         int32_t notFoundCount = 0;
   2294 
   2295         logln("Setting strength to %s.", strengthNames[s]);
   2296         ucol_setStrength(coll, strengths[s]);
   2297 
   2298         // TODO: try alternate prefix and suffix too?
   2299         // TODO: alterntaes are only equal at primary strength. Is this OK?
   2300         for(int32_t t = 0; t < loopCount; t += 1) {
   2301             uint32_t seed = m_seed;
   2302             // int32_t  nmc = 0;
   2303 
   2304             generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern);
   2305             generateTestCase(coll, monkeys, monkeyCount, prefix,  altPrefix);
   2306             generateTestCase(coll, monkeys, monkeyCount, suffix,  altSuffix);
   2307 
   2308             // pattern
   2309             notFoundCount += monkeyTestCase(coll, pattern, pattern, altPattern, "pattern", strengthNames[s], seed);
   2310 
   2311             testCase.remove();
   2312             testCase.append(prefix);
   2313             testCase.append(/*alt*/pattern);
   2314 
   2315             // prefix + pattern
   2316             notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern", strengthNames[s], seed);
   2317 
   2318             testCase.append(suffix);
   2319 
   2320             // prefix + pattern + suffix
   2321             notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern + suffix", strengthNames[s], seed);
   2322 
   2323             testCase.remove();
   2324             testCase.append(pattern);
   2325             testCase.append(suffix);
   2326 
   2327             // pattern + suffix
   2328             notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "pattern + suffix", strengthNames[s], seed);
   2329         }
   2330 
   2331        logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount);
   2332     }
   2333 
   2334     uset_close(contractions);
   2335     uset_close(expansions);
   2336     uset_close(letters);
   2337 
   2338     CollData::close(monkeyData);
   2339 
   2340     ucol_close(coll);
   2341 }
   2342 
   2343 void SSearchTest::bmMonkeyTest(char *params)
   2344 {
   2345     static const UVersionInfo icu49 = { 4, 9, 0, 0 }; // for timebomb
   2346     static const UChar skipChars[] = { 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0xAAB5, 0xAAB6, 0xAAB9, 0xAABB, 0xAABC, 0 }; // for timebomb
   2347     // ook!
   2348     UErrorCode status = U_ZERO_ERROR;
   2349     UCollator *coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);
   2350 
   2351     if (U_FAILURE(status)) {
   2352         errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status));
   2353         return;
   2354     }
   2355 
   2356     CollData  *monkeyData = CollData::open(coll, status);
   2357 
   2358     USet *expansions   = uset_openEmpty();
   2359     USet *contractions = uset_openEmpty();
   2360 
   2361     ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
   2362 
   2363     U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
   2364     U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
   2365     USet *letters = uset_openPattern(letter_pattern, 39, &status);
   2366     SetMonkey letterMonkey(letters);
   2367     StringSetMonkey contractionMonkey(contractions, coll, monkeyData);
   2368     StringSetMonkey expansionMonkey(expansions, coll, monkeyData);
   2369     UnicodeString testCase;
   2370     UnicodeString alternate;
   2371     UnicodeString pattern, altPattern;
   2372     UnicodeString prefix, altPrefix;
   2373     UnicodeString suffix, altSuffix;
   2374 
   2375     Monkey *monkeys[] = {
   2376         &letterMonkey,
   2377         &contractionMonkey,
   2378         &expansionMonkey,
   2379         &contractionMonkey,
   2380         &expansionMonkey,
   2381         &contractionMonkey,
   2382         &expansionMonkey,
   2383         &contractionMonkey,
   2384         &expansionMonkey};
   2385     int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]);
   2386     // int32_t nonMatchCount = 0;
   2387 
   2388     UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY};
   2389     const char *strengthNames[] = {"primary", "secondary", "tertiary"};
   2390     int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);
   2391     int32_t loopCount = quick? 1000 : 10000;
   2392     int32_t firstStrength = 0;
   2393     int32_t lastStrength  = strengthCount - 1; //*/ 0;
   2394 
   2395     if (params != NULL) {
   2396 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
   2397         UnicodeString p(params);
   2398 
   2399         loopCount = getIntParam("loop", p, loopCount);
   2400         m_seed    = getIntParam("seed", p, m_seed);
   2401 
   2402         RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status);
   2403         if (m.find()) {
   2404             UnicodeString breakType = m.group(1, status);
   2405 
   2406             for (int32_t s = 0; s < strengthCount; s += 1) {
   2407                 if (breakType == strengthNames[s]) {
   2408                     firstStrength = lastStrength = s;
   2409                     break;
   2410                 }
   2411             }
   2412 
   2413             m.reset();
   2414             p = m.replaceFirst("", status);
   2415         }
   2416 
   2417         if (RegexMatcher("\\S", p, 0, status).find()) {
   2418             // Each option is stripped out of the option string as it is processed.
   2419             // All options have been checked.  The option string should have been completely emptied..
   2420             char buf[100];
   2421             p.extract(buf, sizeof(buf), NULL, status);
   2422             buf[sizeof(buf)-1] = 0;
   2423             errln("Unrecognized or extra parameter:  %s\n", buf);
   2424             return;
   2425         }
   2426 #else
   2427         infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters.");
   2428 #endif
   2429     }
   2430 
   2431     for(int32_t s = firstStrength; s <= lastStrength; s += 1) {
   2432         int32_t notFoundCount = 0;
   2433 
   2434         logln("Setting strength to %s.", strengthNames[s]);
   2435         ucol_setStrength(coll, strengths[s]);
   2436 
   2437         CollData *data = CollData::open(coll, status);
   2438 
   2439         UnicodeString skipString(skipChars); // for timebomb
   2440         UnicodeSet* skipSet = UnicodeSet::createFromAll(skipString); // for timebomb
   2441         // TODO: try alternate prefix and suffix too?
   2442         // TODO: alterntaes are only equal at primary strength. Is this OK?
   2443         for(int32_t t = 0; t < loopCount; t += 1) {
   2444             uint32_t seed = m_seed;
   2445             // int32_t  nmc = 0;
   2446 
   2447             generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern);
   2448             generateTestCase(coll, monkeys, monkeyCount, prefix,  altPrefix);
   2449             generateTestCase(coll, monkeys, monkeyCount, suffix,  altSuffix);
   2450 
   2451             if (!isICUVersionAtLeast(icu49) && skipSet->containsSome(pattern)) {
   2452                 continue; // timebomb until ticket #8080 is resolved
   2453             }
   2454 
   2455             BoyerMooreSearch pat(data, pattern, NULL, status);
   2456             BoyerMooreSearch alt(data, altPattern, NULL, status);
   2457 
   2458             // **** need a better way to deal with this ****
   2459 #if 0
   2460             if (pat.empty() ||
   2461                 alt.empty()) {
   2462                     continue;
   2463             }
   2464 #endif
   2465 
   2466             // pattern
   2467             notFoundCount += bmMonkeyTestCase(coll, pattern, pattern, altPattern, &pat, &alt, "pattern", strengthNames[s], seed);
   2468 
   2469             testCase.remove();
   2470             testCase.append(prefix);
   2471             testCase.append(/*alt*/pattern);
   2472 
   2473             // prefix + pattern
   2474             notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern", strengthNames[s], seed);
   2475 
   2476             testCase.append(suffix);
   2477 
   2478             // prefix + pattern + suffix
   2479             notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern + suffix", strengthNames[s], seed);
   2480 
   2481             testCase.remove();
   2482             testCase.append(pattern);
   2483             testCase.append(suffix);
   2484 
   2485             // pattern + suffix
   2486             notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "pattern + suffix", strengthNames[s], seed);
   2487         }
   2488         delete skipSet; // for timebomb
   2489 
   2490         CollData::close(data);
   2491 
   2492         logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount);
   2493     }
   2494 
   2495     uset_close(contractions);
   2496     uset_close(expansions);
   2497     uset_close(letters);
   2498 
   2499     CollData::close(monkeyData);
   2500 
   2501     ucol_close(coll);
   2502 }
   2503 
   2504 void SSearchTest::stringListTest(){
   2505     UErrorCode status = U_ZERO_ERROR;
   2506     StringList *sl = new StringList(status);
   2507     if(U_FAILURE(status)){
   2508         errln("ERROR: stringListTest: Could not start StringList");
   2509     }
   2510 
   2511     const UChar chars[] = {
   2512             0x0000
   2513     };
   2514     sl->add(chars, (int32_t) 0, status);
   2515     if(U_FAILURE(status)){
   2516         errln("ERROR: stringListTest: StringList::add");
   2517     }
   2518 
   2519     if(sl->getDynamicClassID() != StringList::getStaticClassID()){
   2520         errln("ERROR: stringListTest: getDynamicClassID and getStaticClassID does not match");
   2521     }
   2522     delete sl;
   2523 }
   2524 
   2525 #endif
   2526 
   2527 #endif
   2528