Home | History | Annotate | Download | only in intltest
      1 /*
      2  **********************************************************************
      3  *   Copyright (C) 2005-2012, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  */
      7 
      8 
      9 #include "unicode/utypes.h"
     10 
     11 #if !UCONFIG_NO_COLLATION
     12 
     13 #include "unicode/unistr.h"
     14 #include "unicode/putil.h"
     15 #include "unicode/usearch.h"
     16 
     17 #include "cmemory.h"
     18 #include "unicode/coll.h"
     19 #include "unicode/tblcoll.h"
     20 #include "unicode/coleitr.h"
     21 #include "unicode/ucoleitr.h"
     22 
     23 #include "unicode/regex.h"        // TODO: make conditional on regexp being built.
     24 
     25 #include "unicode/uniset.h"
     26 #include "unicode/uset.h"
     27 #include "unicode/ustring.h"
     28 #include "hash.h"
     29 #include "uhash.h"
     30 #include "ucol_imp.h"
     31 
     32 #include "intltest.h"
     33 #include "ssearch.h"
     34 
     35 #include "unicode/colldata.h"
     36 #include "unicode/bmsearch.h"
     37 #include "unicode/bms.h"
     38 
     39 #include "xmlparser.h"
     40 #include "ucbuf.h"
     41 
     42 #include <stdlib.h>
     43 #include <string.h>
     44 #include <stdio.h>
     45 
     46 char testId[100];
     47 
     48 #define TEST_ASSERT(x) {if (!(x)) { \
     49     errln("Failure in file %s, line %d, test ID = \"%s\"", __FILE__, __LINE__, testId);}}
     50 
     51 #define TEST_ASSERT_M(x, m) {if (!(x)) { \
     52     errln("Failure in file %s, line %d.   \"%s\"", __FILE__, __LINE__, m);return;}}
     53 
     54 #define TEST_ASSERT_SUCCESS(errcode) {if (U_FAILURE(errcode)) { \
     55     dataerrln("Failure in file %s, line %d, test ID \"%s\", status = \"%s\"", \
     56           __FILE__, __LINE__, testId, u_errorName(errcode));}}
     57 
     58 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
     59 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
     60 #define DELETE_ARRAY(array) uprv_free((void *) (array))
     61 
     62 //---------------------------------------------------------------------------
     63 //
     64 //  Test class boilerplate
     65 //
     66 //---------------------------------------------------------------------------
     67 SSearchTest::SSearchTest()
     68 {
     69 }
     70 
     71 SSearchTest::~SSearchTest()
     72 {
     73 }
     74 
     75 void SSearchTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *params )
     76 {
     77     if (exec) logln("TestSuite SSearchTest: ");
     78     switch (index) {
     79 #if !UCONFIG_NO_BREAK_ITERATION
     80        case 0: name = "searchTest";
     81             if (exec) searchTest();
     82             break;
     83 
     84         case 1: name = "offsetTest";
     85             if (exec) offsetTest();
     86             break;
     87 
     88         case 2: name = "monkeyTest";
     89             if (exec) monkeyTest(params);
     90             break;
     91 
     92         case 3: name = "bmMonkeyTest";
     93             if (exec) bmMonkeyTest(params);
     94             break;
     95 
     96         case 4: name = "boyerMooreTest";
     97             if (exec) boyerMooreTest();
     98             break;
     99 
    100         case 5: name = "goodSuffixTest";
    101             if (exec) goodSuffixTest();
    102             break;
    103 
    104         case 6: name = "searchTime";
    105             if (exec) searchTime();
    106             break;
    107 
    108         case 7: name = "bmsTest";
    109             if (exec) bmsTest();
    110             break;
    111 
    112         case 8: name = "bmSearchTest";
    113             if (exec) bmSearchTest();
    114             break;
    115 
    116         case 9: name = "udhrTest";
    117             if (exec) udhrTest();
    118             break;
    119         case 10: name = "stringListTest";
    120             if (exec) stringListTest();
    121             break;
    122 #endif
    123         default: name = "";
    124             break; //needed to end loop
    125     }
    126 }
    127 
    128 
    129 #if !UCONFIG_NO_BREAK_ITERATION
    130 
    131 #define PATH_BUFFER_SIZE 2048
    132 const char *SSearchTest::getPath(char buffer[2048], const char *filename) {
    133     UErrorCode status = U_ZERO_ERROR;
    134     const char *testDataDirectory = IntlTest::getSourceTestData(status);
    135 
    136     if (U_FAILURE(status) || strlen(testDataDirectory) + strlen(filename) + 1 >= PATH_BUFFER_SIZE) {
    137         errln("ERROR: getPath() failed - %s", u_errorName(status));
    138         return NULL;
    139     }
    140 
    141     strcpy(buffer, testDataDirectory);
    142     strcat(buffer, filename);
    143     return buffer;
    144 }
    145 
    146 
    147 void SSearchTest::searchTest()
    148 {
    149 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO
    150     UErrorCode status = U_ZERO_ERROR;
    151     char path[PATH_BUFFER_SIZE];
    152     const char *testFilePath = getPath(path, "ssearch.xml");
    153 
    154     if (testFilePath == NULL) {
    155         return; /* Couldn't get path: error message already output. */
    156     }
    157 
    158     LocalPointer<UXMLParser> parser(UXMLParser::createParser(status));
    159     TEST_ASSERT_SUCCESS(status);
    160     LocalPointer<UXMLElement> root(parser->parseFile(testFilePath, status));
    161     TEST_ASSERT_SUCCESS(status);
    162     if (U_FAILURE(status)) {
    163         return;
    164     }
    165 
    166     const UnicodeString *debugTestCase = root->getAttribute("debug");
    167     if (debugTestCase != NULL) {
    168 //       setenv("USEARCH_DEBUG", "1", 1);
    169     }
    170 
    171 
    172     const UXMLElement *testCase;
    173     int32_t tc = 0;
    174 
    175     while((testCase = root->nextChildElement(tc)) != NULL) {
    176 
    177         if (testCase->getTagName().compare("test-case") != 0) {
    178             errln("ssearch, unrecognized XML Element in test file");
    179             continue;
    180         }
    181         const UnicodeString *id       = testCase->getAttribute("id");
    182         *testId = 0;
    183         if (id != NULL) {
    184             id->extract(0, id->length(), testId,  sizeof(testId), US_INV);
    185         }
    186 
    187         // If debugging test case has been specified and this is not it, skip to next.
    188         if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) {
    189             continue;
    190         }
    191         //
    192         //  Get the requested collation strength.
    193         //    Default is tertiary if the XML attribute is missing from the test case.
    194         //
    195         const UnicodeString *strength = testCase->getAttribute("strength");
    196         UColAttributeValue collatorStrength = UCOL_PRIMARY;
    197         if      (strength==NULL)          { collatorStrength = UCOL_TERTIARY;}
    198         else if (*strength=="PRIMARY")    { collatorStrength = UCOL_PRIMARY;}
    199         else if (*strength=="SECONDARY")  { collatorStrength = UCOL_SECONDARY;}
    200         else if (*strength=="TERTIARY")   { collatorStrength = UCOL_TERTIARY;}
    201         else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}
    202         else if (*strength=="IDENTICAL")  { collatorStrength = UCOL_IDENTICAL;}
    203         else {
    204             // Bogus value supplied for strength.  Shouldn't happen, even from
    205             //  typos, if the  XML source has been validated.
    206             //  This assert is a little deceiving in that strength can be
    207             //   any of the allowed values, not just TERTIARY, but it will
    208             //   do the job of getting the error output.
    209             TEST_ASSERT(*strength=="TERTIARY")
    210         }
    211 
    212         //
    213         // Get the collator normalization flag.  Default is UCOL_OFF.
    214         //
    215         UColAttributeValue normalize = UCOL_OFF;
    216         const UnicodeString *norm = testCase->getAttribute("norm");
    217         TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF");
    218         if (norm!=NULL && *norm=="ON") {
    219             normalize = UCOL_ON;
    220         }
    221 
    222         //
    223         // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.
    224         //
    225         UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;
    226         const UnicodeString *alt = testCase->getAttribute("alternate_handling");
    227         TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE");
    228         if (alt != NULL && *alt == "SHIFTED") {
    229             alternateHandling = UCOL_SHIFTED;
    230         }
    231 
    232         const UnicodeString defLocale("en");
    233         char  clocale[100];
    234         const UnicodeString *locale   = testCase->getAttribute("locale");
    235         if (locale == NULL || locale->length()==0) {
    236             locale = &defLocale;
    237         };
    238         locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);
    239 
    240 
    241         UnicodeString  text;
    242         UnicodeString  target;
    243         UnicodeString  pattern;
    244         int32_t        expectedMatchStart = -1;
    245         int32_t        expectedMatchLimit = -1;
    246         const UXMLElement  *n;
    247         int32_t                nodeCount = 0;
    248 
    249         n = testCase->getChildElement("pattern");
    250         TEST_ASSERT(n != NULL);
    251         if (n==NULL) {
    252             continue;
    253         }
    254         text = n->getText(FALSE);
    255         text = text.unescape();
    256         pattern.append(text);
    257         nodeCount++;
    258 
    259         n = testCase->getChildElement("pre");
    260         if (n!=NULL) {
    261             text = n->getText(FALSE);
    262             text = text.unescape();
    263             target.append(text);
    264             nodeCount++;
    265         }
    266 
    267         n = testCase->getChildElement("m");
    268         if (n!=NULL) {
    269             expectedMatchStart = target.length();
    270             text = n->getText(FALSE);
    271             text = text.unescape();
    272             target.append(text);
    273             expectedMatchLimit = target.length();
    274             nodeCount++;
    275         }
    276 
    277         n = testCase->getChildElement("post");
    278         if (n!=NULL) {
    279             text = n->getText(FALSE);
    280             text = text.unescape();
    281             target.append(text);
    282             nodeCount++;
    283         }
    284 
    285         //  Check that there weren't extra things in the XML
    286         TEST_ASSERT(nodeCount == testCase->countChildren());
    287 
    288         // Open a collator and StringSearch based on the parameters
    289         //   obtained from the XML.
    290         //
    291         status = U_ZERO_ERROR;
    292         LocalUCollatorPointer collator(ucol_open(clocale, &status));
    293         ucol_setStrength(collator.getAlias(), collatorStrength);
    294         ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status);
    295         ucol_setAttribute(collator.getAlias(), UCOL_ALTERNATE_HANDLING, alternateHandling, &status);
    296         LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
    297                                                                target.getBuffer(), target.length(),
    298                                                                collator.getAlias(),
    299                                                                NULL,     // the break iterator
    300                                                                &status));
    301 
    302         TEST_ASSERT_SUCCESS(status);
    303         if (U_FAILURE(status)) {
    304             continue;
    305         }
    306 
    307         int32_t foundStart = 0;
    308         int32_t foundLimit = 0;
    309         UBool   foundMatch;
    310 
    311         //
    312         // Do the search, check the match result against the expected results.
    313         //
    314         foundMatch= usearch_search(uss.getAlias(), 0, &foundStart, &foundLimit, &status);
    315         TEST_ASSERT_SUCCESS(status);
    316         if ((foundMatch && expectedMatchStart<0) ||
    317             (foundStart != expectedMatchStart)   ||
    318             (foundLimit != expectedMatchLimit)) {
    319                 TEST_ASSERT(FALSE);   //  ouput generic error position
    320                 infoln("Found, expected match start = %d, %d \n"
    321                        "Found, expected match limit = %d, %d",
    322                 foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
    323         }
    324 
    325         // In case there are other matches...
    326         // (should we only do this if the test case passed?)
    327         while (foundMatch) {
    328             expectedMatchStart = foundStart;
    329             expectedMatchLimit = foundLimit;
    330 
    331             foundMatch = usearch_search(uss.getAlias(), foundLimit, &foundStart, &foundLimit, &status);
    332         }
    333 
    334         uss.adoptInstead(usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
    335             target.getBuffer(), target.length(),
    336             collator.getAlias(),
    337             NULL,
    338             &status));
    339 
    340         //
    341         // Do the backwards search, check the match result against the expected results.
    342         //
    343         foundMatch= usearch_searchBackwards(uss.getAlias(), target.length(), &foundStart, &foundLimit, &status);
    344         TEST_ASSERT_SUCCESS(status);
    345         if ((foundMatch && expectedMatchStart<0) ||
    346             (foundStart != expectedMatchStart)   ||
    347             (foundLimit != expectedMatchLimit)) {
    348                 TEST_ASSERT(FALSE);   //  ouput generic error position
    349                 infoln("Found, expected backwards match start = %d, %d \n"
    350                        "Found, expected backwards match limit = %d, %d",
    351                 foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
    352         }
    353     }
    354 #endif
    355 }
    356 
    357 struct UdhrTestCase
    358 {
    359     const char *locale;
    360     const char *file;
    361 };
    362 
    363 void SSearchTest::udhrTest()
    364 {
    365     UErrorCode status = U_ZERO_ERROR;
    366     char path[PATH_BUFFER_SIZE];
    367     const char *udhrPath = getPath(path, "udhr");
    368 
    369     if (udhrPath == NULL) {
    370         // couldn't get path: error message already output...
    371         return;
    372     }
    373 
    374     UdhrTestCase testCases[] = {
    375         {"en", "udhr_eng.txt"},
    376         {"de", "udhr_deu_1996.txt"},
    377         {"fr", "udhr_fra.txt"},
    378         {"ru", "udhr_rus.txt"},
    379         {"th", "udhr_tha.txt"},
    380         {"ja", "udhr_jpn.txt"},
    381         {"ko", "udhr_kor.txt"},
    382         {"zh", "udhr_cmn_hans.txt"},
    383         {"zh_Hant", "udhr_cmn_hant.txt"}
    384     };
    385 
    386     int32_t testCount = ARRAY_SIZE(testCases);
    387 
    388     for (int32_t t = 0; t < testCount; t += 1) {
    389         int32_t len = 0;
    390         char *resolvedFileName = NULL;
    391         const char *encoding = NULL;
    392         UCHARBUF *ucharBuf = NULL;
    393 
    394         ucbuf_resolveFileName(udhrPath, testCases[t].file, NULL, &len, &status);
    395         resolvedFileName = NEW_ARRAY(char, len);
    396 
    397         if(resolvedFileName == NULL){
    398             continue;
    399         }
    400 
    401         if(status == U_BUFFER_OVERFLOW_ERROR){
    402             status = U_ZERO_ERROR;
    403         }
    404 
    405         ucbuf_resolveFileName(udhrPath, testCases[t].file, resolvedFileName, &len, &status);
    406         ucharBuf = ucbuf_open(resolvedFileName, &encoding, TRUE, FALSE, &status);
    407 
    408         DELETE_ARRAY(resolvedFileName);
    409 
    410         if(U_FAILURE(status)){
    411             infoln("Could not open the input file %s. Test skipped\n", testCases[t].file);
    412             continue;
    413         }
    414 
    415         int32_t targetLen = 0;
    416         const UChar *target = ucbuf_getBuffer(ucharBuf, &targetLen, &status);
    417 
    418         /* The first line of the file contains the pattern */
    419         int32_t start = 0, end = 0, plen = 0;
    420 
    421         for(end = start; ; end += 1) {
    422             UChar ch = target[end];
    423 
    424             if (ch == 0x000A || ch == 0x000D || ch == 0x2028) {
    425                 break;
    426             }
    427         }
    428 
    429         plen = end - start;
    430 
    431         UChar *pattern = NEW_ARRAY(UChar, plen);
    432         for (int32_t i = 0; i < plen; i += 1) {
    433             pattern[i] =  target[start++];
    434         }
    435 
    436         int32_t offset = 0;
    437         UCollator *coll = ucol_open(testCases[t].locale, &status);
    438         UCD *ucd = NULL;
    439         BMS *bms = NULL;
    440 
    441         if (U_FAILURE(status)) {
    442             errln("Could not open collator for %s", testCases[t].locale);
    443             goto delete_collator;
    444         }
    445 
    446         ucd = ucd_open(coll, &status);
    447 
    448         if (U_FAILURE(status)) {
    449             errln("Could not open CollData object for %s", testCases[t].locale);
    450             goto delete_ucd;
    451         }
    452 
    453         bms = bms_open(ucd, pattern, plen, target, targetLen, &status);
    454 
    455         if (U_FAILURE(status)) {
    456             errln("Could not open search object for %s", testCases[t].locale);
    457             goto delete_bms;
    458         }
    459 
    460         start = end = -1;
    461         while (bms_search(bms, offset, &start, &end)) {
    462             offset = end;
    463         }
    464 
    465         if (offset == 0) {
    466             errln("Could not find pattern - locale: %s, file: %s ", testCases[t].locale, testCases[t].file);
    467         }
    468 
    469 delete_bms:
    470         bms_close(bms);
    471 
    472 delete_ucd:
    473         ucd_close(ucd);
    474 
    475 delete_collator:
    476         ucol_close(coll);
    477 
    478         DELETE_ARRAY(pattern);
    479         ucbuf_close(ucharBuf);
    480     }
    481 
    482     ucd_flushCache();
    483 }
    484 
    485 void SSearchTest::bmSearchTest()
    486 {
    487 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
    488     UErrorCode status = U_ZERO_ERROR;
    489     char path[PATH_BUFFER_SIZE];
    490     const char *testFilePath = getPath(path, "ssearch.xml");
    491 
    492     if (testFilePath == NULL) {
    493         return; /* Couldn't get path: error message already output. */
    494     }
    495 
    496     UXMLParser  *parser = UXMLParser::createParser(status);
    497     TEST_ASSERT_SUCCESS(status);
    498     UXMLElement *root   = parser->parseFile(testFilePath, status);
    499     TEST_ASSERT_SUCCESS(status);
    500     if (U_FAILURE(status)) {
    501         return;
    502     }
    503 
    504     const UnicodeString *debugTestCase = root->getAttribute("debug");
    505     if (debugTestCase != NULL) {
    506 //       setenv("USEARCH_DEBUG", "1", 1);
    507     }
    508 
    509 
    510     const UXMLElement *testCase;
    511     int32_t tc = 0;
    512 
    513     while((testCase = root->nextChildElement(tc)) != NULL) {
    514 
    515         if (testCase->getTagName().compare("test-case") != 0) {
    516             errln("ssearch, unrecognized XML Element in test file");
    517             continue;
    518         }
    519         const UnicodeString *id       = testCase->getAttribute("id");
    520         *testId = 0;
    521         if (id != NULL) {
    522             id->extract(0, id->length(), testId,  sizeof(testId), US_INV);
    523         }
    524 
    525         // If debugging test case has been specified and this is not it, skip to next.
    526         if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) {
    527             continue;
    528         }
    529         //
    530         //  Get the requested collation strength.
    531         //    Default is tertiary if the XML attribute is missing from the test case.
    532         //
    533         const UnicodeString *strength = testCase->getAttribute("strength");
    534         UColAttributeValue collatorStrength = UCOL_PRIMARY;
    535         if      (strength==NULL)          { collatorStrength = UCOL_TERTIARY;}
    536         else if (*strength=="PRIMARY")    { collatorStrength = UCOL_PRIMARY;}
    537         else if (*strength=="SECONDARY")  { collatorStrength = UCOL_SECONDARY;}
    538         else if (*strength=="TERTIARY")   { collatorStrength = UCOL_TERTIARY;}
    539         else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}
    540         else if (*strength=="IDENTICAL")  { collatorStrength = UCOL_IDENTICAL;}
    541         else {
    542             // Bogus value supplied for strength.  Shouldn't happen, even from
    543             //  typos, if the  XML source has been validated.
    544             //  This assert is a little deceiving in that strength can be
    545             //   any of the allowed values, not just TERTIARY, but it will
    546             //   do the job of getting the error output.
    547             TEST_ASSERT(*strength=="TERTIARY")
    548         }
    549 
    550         //
    551         // Get the collator normalization flag.  Default is UCOL_OFF.
    552         //
    553         UColAttributeValue normalize = UCOL_OFF;
    554         const UnicodeString *norm = testCase->getAttribute("norm");
    555         TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF");
    556         if (norm!=NULL && *norm=="ON") {
    557             normalize = UCOL_ON;
    558         }
    559 
    560         //
    561         // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.
    562         //
    563         UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;
    564         const UnicodeString *alt = testCase->getAttribute("alternate_handling");
    565         TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE");
    566         if (alt != NULL && *alt == "SHIFTED") {
    567             alternateHandling = UCOL_SHIFTED;
    568         }
    569 
    570         const UnicodeString defLocale("en");
    571         char  clocale[100];
    572         const UnicodeString *locale   = testCase->getAttribute("locale");
    573         if (locale == NULL || locale->length()==0) {
    574             locale = &defLocale;
    575         };
    576         locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);
    577 
    578 
    579         UnicodeString  text;
    580         UnicodeString  target;
    581         UnicodeString  pattern;
    582         int32_t        expectedMatchStart = -1;
    583         int32_t        expectedMatchLimit = -1;
    584         const UXMLElement  *n;
    585         int32_t                nodeCount = 0;
    586 
    587         n = testCase->getChildElement("pattern");
    588         TEST_ASSERT(n != NULL);
    589         if (n==NULL) {
    590             continue;
    591         }
    592         text = n->getText(FALSE);
    593         text = text.unescape();
    594         pattern.append(text);
    595         nodeCount++;
    596 
    597         n = testCase->getChildElement("pre");
    598         if (n!=NULL) {
    599             text = n->getText(FALSE);
    600             text = text.unescape();
    601             target.append(text);
    602             nodeCount++;
    603         }
    604 
    605         n = testCase->getChildElement("m");
    606         if (n!=NULL) {
    607             expectedMatchStart = target.length();
    608             text = n->getText(FALSE);
    609             text = text.unescape();
    610             target.append(text);
    611             expectedMatchLimit = target.length();
    612             nodeCount++;
    613         }
    614 
    615         n = testCase->getChildElement("post");
    616         if (n!=NULL) {
    617             text = n->getText(FALSE);
    618             text = text.unescape();
    619             target.append(text);
    620             nodeCount++;
    621         }
    622 
    623         //  Check that there weren't extra things in the XML
    624         TEST_ASSERT(nodeCount == testCase->countChildren());
    625 
    626         // Open a collator and StringSearch based on the parameters
    627         //   obtained from the XML.
    628         //
    629         status = U_ZERO_ERROR;
    630         UCollator *collator = ucol_open(clocale, &status);
    631         ucol_setStrength(collator, collatorStrength);
    632         ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status);
    633         ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, alternateHandling, &status);
    634         UCD *ucd = ucd_open(collator, &status);
    635         BMS *bms = bms_open(ucd, pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), &status);
    636 
    637         TEST_ASSERT_SUCCESS(status);
    638         if (U_FAILURE(status)) {
    639             bms_close(bms);
    640             ucd_close(ucd);
    641             ucol_close(collator);
    642             continue;
    643         }
    644 
    645         int32_t foundStart = 0;
    646         int32_t foundLimit = 0;
    647         UBool   foundMatch;
    648 
    649         //
    650         // Do the search, check the match result against the expected results.
    651         //
    652         foundMatch = bms_search(bms, 0, &foundStart, &foundLimit);
    653       //TEST_ASSERT_SUCCESS(status);
    654         if ((foundMatch && expectedMatchStart < 0) ||
    655             (foundStart != expectedMatchStart)     ||
    656             (foundLimit != expectedMatchLimit)) {
    657                 TEST_ASSERT(FALSE);   //  ouput generic error position
    658                 infoln("Found, expected match start = %d, %d \n"
    659                        "Found, expected match limit = %d, %d",
    660                 foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
    661         }
    662 
    663         bms_close(bms);
    664         ucd_close(ucd);
    665         ucol_close(collator);
    666     }
    667 
    668     ucd_flushCache();
    669     delete root;
    670     delete parser;
    671 #endif
    672 }
    673 
    674 struct Order
    675 {
    676     int32_t order;
    677     int32_t lowOffset;
    678     int32_t highOffset;
    679 };
    680 
    681 class OrderList
    682 {
    683 public:
    684     OrderList();
    685     OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset = 0);
    686     ~OrderList();
    687 
    688     int32_t size(void) const;
    689     void add(int32_t order, int32_t low, int32_t high);
    690     const Order *get(int32_t index) const;
    691     int32_t getLowOffset(int32_t index) const;
    692     int32_t getHighOffset(int32_t index) const;
    693     int32_t getOrder(int32_t index) const;
    694     void reverse(void);
    695     UBool compare(const OrderList &other) const;
    696     UBool matchesAt(int32_t offset, const OrderList &other) const;
    697 
    698 private:
    699     Order *list;
    700     int32_t listMax;
    701     int32_t listSize;
    702 };
    703 
    704 OrderList::OrderList()
    705   : list(NULL),  listMax(16), listSize(0)
    706 {
    707     list = new Order[listMax];
    708 }
    709 
    710 OrderList::OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset)
    711     : list(NULL), listMax(16), listSize(0)
    712 {
    713     UErrorCode status = U_ZERO_ERROR;
    714     UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
    715     uint32_t strengthMask = 0;
    716     int32_t order, low, high;
    717 
    718     switch (ucol_getStrength(coll))
    719     {
    720     default:
    721         strengthMask |= UCOL_TERTIARYORDERMASK;
    722         /* fall through */
    723 
    724     case UCOL_SECONDARY:
    725         strengthMask |= UCOL_SECONDARYORDERMASK;
    726         /* fall through */
    727 
    728     case UCOL_PRIMARY:
    729         strengthMask |= UCOL_PRIMARYORDERMASK;
    730     }
    731 
    732     list = new Order[listMax];
    733 
    734     ucol_setOffset(elems, stringOffset, &status);
    735 
    736     do {
    737         low   = ucol_getOffset(elems);
    738         order = ucol_next(elems, &status);
    739         high  = ucol_getOffset(elems);
    740 
    741         if (order != UCOL_NULLORDER) {
    742             order &= strengthMask;
    743         }
    744 
    745         if (order != UCOL_IGNORABLE) {
    746             add(order, low, high);
    747         }
    748     } while (order != UCOL_NULLORDER);
    749 
    750     ucol_closeElements(elems);
    751 }
    752 
    753 OrderList::~OrderList()
    754 {
    755     delete[] list;
    756 }
    757 
    758 void OrderList::add(int32_t order, int32_t low, int32_t high)
    759 {
    760     if (listSize >= listMax) {
    761         listMax *= 2;
    762 
    763         Order *newList = new Order[listMax];
    764 
    765         uprv_memcpy(newList, list, listSize * sizeof(Order));
    766         delete[] list;
    767         list = newList;
    768     }
    769 
    770     list[listSize].order      = order;
    771     list[listSize].lowOffset  = low;
    772     list[listSize].highOffset = high;
    773 
    774     listSize += 1;
    775 }
    776 
    777 const Order *OrderList::get(int32_t index) const
    778 {
    779     if (index >= listSize) {
    780         return NULL;
    781     }
    782 
    783     return &list[index];
    784 }
    785 
    786 int32_t OrderList::getLowOffset(int32_t index) const
    787 {
    788     const Order *order = get(index);
    789 
    790     if (order != NULL) {
    791         return order->lowOffset;
    792     }
    793 
    794     return -1;
    795 }
    796 
    797 int32_t OrderList::getHighOffset(int32_t index) const
    798 {
    799     const Order *order = get(index);
    800 
    801     if (order != NULL) {
    802         return order->highOffset;
    803     }
    804 
    805     return -1;
    806 }
    807 
    808 int32_t OrderList::getOrder(int32_t index) const
    809 {
    810     const Order *order = get(index);
    811 
    812     if (order != NULL) {
    813         return order->order;
    814     }
    815 
    816     return UCOL_NULLORDER;
    817 }
    818 
    819 int32_t OrderList::size() const
    820 {
    821     return listSize;
    822 }
    823 
    824 void OrderList::reverse()
    825 {
    826     for(int32_t f = 0, b = listSize - 1; f < b; f += 1, b -= 1) {
    827         Order swap = list[b];
    828 
    829         list[b] = list[f];
    830         list[f] = swap;
    831     }
    832 }
    833 
    834 UBool OrderList::compare(const OrderList &other) const
    835 {
    836     if (listSize != other.listSize) {
    837         return FALSE;
    838     }
    839 
    840     for(int32_t i = 0; i < listSize; i += 1) {
    841         if (list[i].order  != other.list[i].order ||
    842             list[i].lowOffset != other.list[i].lowOffset ||
    843             list[i].highOffset != other.list[i].highOffset) {
    844                 return FALSE;
    845         }
    846     }
    847 
    848     return TRUE;
    849 }
    850 
    851 UBool OrderList::matchesAt(int32_t offset, const OrderList &other) const
    852 {
    853     // NOTE: sizes include the NULLORDER, which we don't want to compare.
    854     int32_t otherSize = other.size() - 1;
    855 
    856     if (listSize - 1 - offset < otherSize) {
    857         return FALSE;
    858     }
    859 
    860     for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) {
    861         if (getOrder(i) != other.getOrder(j)) {
    862             return FALSE;
    863         }
    864     }
    865 
    866     return TRUE;
    867 }
    868 
    869 static char *printOffsets(char *buffer, OrderList &list)
    870 {
    871     int32_t size = list.size();
    872     char *s = buffer;
    873 
    874     for(int32_t i = 0; i < size; i += 1) {
    875         const Order *order = list.get(i);
    876 
    877         if (i != 0) {
    878             s += sprintf(s, ", ");
    879         }
    880 
    881         s += sprintf(s, "(%d, %d)", order->lowOffset, order->highOffset);
    882     }
    883 
    884     return buffer;
    885 }
    886 
    887 static char *printOrders(char *buffer, OrderList &list)
    888 {
    889     int32_t size = list.size();
    890     char *s = buffer;
    891 
    892     for(int32_t i = 0; i < size; i += 1) {
    893         const Order *order = list.get(i);
    894 
    895         if (i != 0) {
    896             s += sprintf(s, ", ");
    897         }
    898 
    899         s += sprintf(s, "%8.8X", order->order);
    900     }
    901 
    902     return buffer;
    903 }
    904 
    905 void SSearchTest::offsetTest()
    906 {
    907     const char *test[] = {
    908         // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous
    909         // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71.
    910         "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0",
    911 
    912         "\\ua191\\u16ef\\u2036\\u017a",
    913 
    914 #if 0
    915         // This results in a complex interaction between contraction,
    916         // expansion and normalization that confuses the backwards offset fixups.
    917         "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
    918 #endif
    919 
    920         "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
    921         "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3",
    922 
    923         "\\u02FE\\u02FF"
    924         "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F"
    925         "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F"
    926         "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F"
    927         "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F"
    928         "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081
    929 
    930         "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081
    931         "a\\u02FF\\u0301\\u0316", // currently not working, see #8081
    932         "a\\u02FF\\u0316\\u0301",
    933         "a\\u0430\\u0301\\u0316",
    934         "a\\u0430\\u0316\\u0301",
    935         "abc\\u0E41\\u0301\\u0316",
    936         "abc\\u0E41\\u0316\\u0301",
    937         "\\u0E41\\u0301\\u0316",
    938         "\\u0E41\\u0316\\u0301",
    939         "a\\u0301\\u0316",
    940         "a\\u0316\\u0301",
    941         "\\uAC52\\uAC53",
    942         "\\u34CA\\u34CB",
    943         "\\u11ED\\u11EE",
    944         "\\u30C3\\u30D0",
    945         "p\\u00E9ch\\u00E9",
    946         "a\\u0301\\u0325",
    947         "a\\u0300\\u0325",
    948         "a\\u0325\\u0300",
    949         "A\\u0323\\u0300B",
    950         "A\\u0300\\u0323B",
    951         "A\\u0301\\u0323B",
    952         "A\\u0302\\u0301\\u0323B",
    953         "abc",
    954         "ab\\u0300c",
    955         "ab\\u0300\\u0323c",
    956         " \\uD800\\uDC00\\uDC00",
    957         "a\\uD800\\uDC00\\uDC00",
    958         "A\\u0301\\u0301",
    959         "A\\u0301\\u0323",
    960         "A\\u0301\\u0323B",
    961         "B\\u0301\\u0323C",
    962         "A\\u0300\\u0323B",
    963         "\\u0301A\\u0301\\u0301",
    964         "abcd\\r\\u0301",
    965         "p\\u00EAche",
    966         "pe\\u0302che",
    967     };
    968 
    969     int32_t testCount = ARRAY_SIZE(test);
    970     UErrorCode status = U_ZERO_ERROR;
    971     RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status);
    972     if (U_FAILURE(status)) {
    973         errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status));
    974         return;
    975     }
    976     char buffer[4096];  // A bit of a hack... just happens to be long enough for all the test cases...
    977                         // We could allocate one that's the right size by (CE_count * 10) + 2
    978                         // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]"
    979 
    980     col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    981 
    982     for(int32_t i = 0; i < testCount; i += 1) {
    983         if (!isICUVersionAtLeast(51, 1) && i>=4 && i<=6) {
    984             continue; // timebomb until ticket #9156 (was #8081) is resolved
    985         }
    986         UnicodeString ts = CharsToUnicodeString(test[i]);
    987         CollationElementIterator *iter = col->createCollationElementIterator(ts);
    988         OrderList forwardList;
    989         OrderList backwardList;
    990         int32_t order, low, high;
    991 
    992         do {
    993             low   = iter->getOffset();
    994             order = iter->next(status);
    995             high  = iter->getOffset();
    996 
    997             forwardList.add(order, low, high);
    998         } while (order != CollationElementIterator::NULLORDER);
    999 
   1000         iter->reset();
   1001         iter->setOffset(ts.length(), status);
   1002 
   1003         backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset());
   1004 
   1005         do {
   1006             high  = iter->getOffset();
   1007             order = iter->previous(status);
   1008             low   = iter->getOffset();
   1009 
   1010             if (order == CollationElementIterator::NULLORDER) {
   1011                 break;
   1012             }
   1013 
   1014             backwardList.add(order, low, high);
   1015         } while (TRUE);
   1016 
   1017         backwardList.reverse();
   1018 
   1019         if (forwardList.compare(backwardList)) {
   1020             logln("Works with \"%s\"", test[i]);
   1021             logln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
   1022 //          logln("Backward offsets: [%s]", printOffsets(buffer, backwardList));
   1023 
   1024             logln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
   1025 //          logln("Backward CEs: [%s]", printOrders(buffer, backwardList));
   1026 
   1027             logln();
   1028         } else {
   1029             errln("Fails with \"%s\"", test[i]);
   1030             infoln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
   1031             infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList));
   1032 
   1033             infoln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
   1034             infoln("Backward CEs: [%s]", printOrders(buffer, backwardList));
   1035 
   1036             infoln();
   1037         }
   1038         delete iter;
   1039     }
   1040     delete col;
   1041 }
   1042 
   1043 #if 0
   1044 static UnicodeString &escape(const UnicodeString &string, UnicodeString &buffer)
   1045 {
   1046     for(int32_t i = 0; i < string.length(); i += 1) {
   1047         UChar32 ch = string.char32At(i);
   1048 
   1049         if (ch >= 0x0020 && ch <= 0x007F) {
   1050             if (ch == 0x005C) {
   1051                 buffer.append("\\\\");
   1052             } else {
   1053                 buffer.append(ch);
   1054             }
   1055         } else {
   1056             char cbuffer[12];
   1057 
   1058             if (ch <= 0xFFFFL) {
   1059                 sprintf(cbuffer, "\\u%4.4X", ch);
   1060             } else {
   1061                 sprintf(cbuffer, "\\U%8.8X", ch);
   1062             }
   1063 
   1064             buffer.append(cbuffer);
   1065         }
   1066 
   1067         if (ch >= 0x10000L) {
   1068             i += 1;
   1069         }
   1070     }
   1071 
   1072     return buffer;
   1073 }
   1074 #endif
   1075 
   1076 #if 1
   1077 
   1078 struct PCE
   1079 {
   1080     uint64_t ce;
   1081     int32_t  lowOffset;
   1082     int32_t  highOffset;
   1083 };
   1084 
   1085 class PCEList
   1086 {
   1087 public:
   1088     PCEList(UCollator *coll, const UnicodeString &string);
   1089     ~PCEList();
   1090 
   1091     int32_t size() const;
   1092 
   1093     const PCE *get(int32_t index) const;
   1094 
   1095     int32_t getLowOffset(int32_t index) const;
   1096     int32_t getHighOffset(int32_t index) const;
   1097     uint64_t getOrder(int32_t index) const;
   1098 
   1099     UBool matchesAt(int32_t offset, const PCEList &other) const;
   1100 
   1101     uint64_t operator[](int32_t index) const;
   1102 
   1103 private:
   1104     void add(uint64_t ce, int32_t low, int32_t high);
   1105 
   1106     PCE *list;
   1107     int32_t listMax;
   1108     int32_t listSize;
   1109 };
   1110 
   1111 PCEList::PCEList(UCollator *coll, const UnicodeString &string)
   1112 {
   1113     UErrorCode status = U_ZERO_ERROR;
   1114     UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
   1115     uint64_t order;
   1116     int32_t low, high;
   1117 
   1118     list = new PCE[listMax];
   1119 
   1120     ucol_setOffset(elems, 0, &status);
   1121 
   1122     do {
   1123         order = ucol_nextProcessed(elems, &low, &high, &status);
   1124         add(order, low, high);
   1125     } while (order != UCOL_PROCESSED_NULLORDER);
   1126 
   1127     ucol_closeElements(elems);
   1128 }
   1129 
   1130 PCEList::~PCEList()
   1131 {
   1132     delete[] list;
   1133 }
   1134 
   1135 void PCEList::add(uint64_t order, int32_t low, int32_t high)
   1136 {
   1137     if (listSize >= listMax) {
   1138         listMax *= 2;
   1139 
   1140         PCE *newList = new PCE[listMax];
   1141 
   1142         uprv_memcpy(newList, list, listSize * sizeof(Order));
   1143         delete[] list;
   1144         list = newList;
   1145     }
   1146 
   1147     list[listSize].ce         = order;
   1148     list[listSize].lowOffset  = low;
   1149     list[listSize].highOffset = high;
   1150 
   1151     listSize += 1;
   1152 }
   1153 
   1154 const PCE *PCEList::get(int32_t index) const
   1155 {
   1156     if (index >= listSize) {
   1157         return NULL;
   1158     }
   1159 
   1160     return &list[index];
   1161 }
   1162 
   1163 int32_t PCEList::getLowOffset(int32_t index) const
   1164 {
   1165     const PCE *pce = get(index);
   1166 
   1167     if (pce != NULL) {
   1168         return pce->lowOffset;
   1169     }
   1170 
   1171     return -1;
   1172 }
   1173 
   1174 int32_t PCEList::getHighOffset(int32_t index) const
   1175 {
   1176     const PCE *pce = get(index);
   1177 
   1178     if (pce != NULL) {
   1179         return pce->highOffset;
   1180     }
   1181 
   1182     return -1;
   1183 }
   1184 
   1185 uint64_t PCEList::getOrder(int32_t index) const
   1186 {
   1187     const PCE *pce = get(index);
   1188 
   1189     if (pce != NULL) {
   1190         return pce->ce;
   1191     }
   1192 
   1193     return UCOL_PROCESSED_NULLORDER;
   1194 }
   1195 
   1196 int32_t PCEList::size() const
   1197 {
   1198     return listSize;
   1199 }
   1200 
   1201 UBool PCEList::matchesAt(int32_t offset, const PCEList &other) const
   1202 {
   1203     // NOTE: sizes include the NULLORDER, which we don't want to compare.
   1204     int32_t otherSize = other.size() - 1;
   1205 
   1206     if (listSize - 1 - offset < otherSize) {
   1207         return FALSE;
   1208     }
   1209 
   1210     for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) {
   1211         if (getOrder(i) != other.getOrder(j)) {
   1212             return FALSE;
   1213         }
   1214     }
   1215 
   1216     return TRUE;
   1217 }
   1218 
   1219 uint64_t PCEList::operator[](int32_t index) const
   1220 {
   1221     return getOrder(index);
   1222 }
   1223 
   1224 void SSearchTest::boyerMooreTest()
   1225 {
   1226     UErrorCode status = U_ZERO_ERROR;
   1227     UCollator *coll = NULL;
   1228     CollData *data = NULL;
   1229     const CEList* ce = NULL;
   1230     const CEList* ce1 = NULL;
   1231     UnicodeString lp  = "fuss";
   1232     UnicodeString sp = "fu\\u00DF";
   1233     BoyerMooreSearch *longPattern = NULL;
   1234     BoyerMooreSearch *shortPattern = NULL;
   1235     UnicodeString targets[]  = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",
   1236                                 "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",
   1237                                 "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"};
   1238     int32_t start = -1, end = -1;
   1239 
   1240     coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);
   1241     if (U_FAILURE(status)) {
   1242         errcheckln(status, "Could not open collator. - %s", u_errorName(status));
   1243         return;
   1244     }
   1245 
   1246     data = CollData::open(coll, status);
   1247     if (U_FAILURE(status)) {
   1248         errln("Could not open CollData object.");
   1249         goto close_data;
   1250     }
   1251 
   1252     data->getDynamicClassID();
   1253     if (U_FAILURE(status)) {
   1254         errln("Could not get dynamic class ID of CollData.");
   1255         goto close_patterns;
   1256     }
   1257 
   1258     data->getStaticClassID();
   1259     if (U_FAILURE(status)) {
   1260         errln("Could not get static class ID of CollData.");
   1261         goto close_patterns;
   1262     }
   1263 
   1264     longPattern = new BoyerMooreSearch(data, lp.unescape(), NULL, status);
   1265     shortPattern = new BoyerMooreSearch(data, sp.unescape(), NULL, status);
   1266     if (U_FAILURE(status)) {
   1267         errln("Could not create pattern objects.");
   1268         goto close_patterns;
   1269     }
   1270 
   1271     longPattern->getBadCharacterTable();
   1272     shortPattern->getBadCharacterTable();
   1273     if (U_FAILURE(status)) {
   1274         errln("Could not get bad character table.");
   1275         goto close_patterns;
   1276     }
   1277 
   1278     longPattern->getGoodSuffixTable();
   1279     shortPattern->getGoodSuffixTable();
   1280     if (U_FAILURE(status)) {
   1281         errln("Could not get good suffix table.");
   1282         goto close_patterns;
   1283     }
   1284 
   1285     longPattern->getDynamicClassID();
   1286     shortPattern->getDynamicClassID();
   1287     if (U_FAILURE(status)) {
   1288         errln("Could not get dynamic class ID of BoyerMooreSearch.");
   1289         goto close_patterns;
   1290     }
   1291 
   1292     longPattern->getStaticClassID();
   1293     shortPattern->getStaticClassID();
   1294     if (U_FAILURE(status)) {
   1295         errln("Could not get static class ID of BoyerMooreSearch.");
   1296         goto close_patterns;
   1297     }
   1298 
   1299     longPattern->getData();
   1300     shortPattern->getData();
   1301     if (U_FAILURE(status)) {
   1302         errln("Could not get collate data.");
   1303         goto close_patterns;
   1304     }
   1305 
   1306     ce = longPattern->getPatternCEs();
   1307     ce1 = shortPattern->getPatternCEs();
   1308     if (U_FAILURE(status)) {
   1309         errln("Could not get pattern CEs.");
   1310         goto close_patterns;
   1311     }
   1312 
   1313     ce->getDynamicClassID();
   1314     ce1->getDynamicClassID();
   1315     if (U_FAILURE(status)) {
   1316         errln("Could not get dynamic class ID of CEList.");
   1317         goto close_patterns;
   1318     }
   1319 
   1320     ce->getStaticClassID();
   1321     ce1->getStaticClassID();
   1322     if (U_FAILURE(status)) {
   1323         errln("Could not get static class ID of CEList.");
   1324         goto close_patterns;
   1325     }
   1326 
   1327     if(data->minLengthInChars(ce,0) != 3){
   1328         errln("Minimal Length in Characters for 'data' with 'ce' was suppose to give 3.");
   1329         goto close_patterns;
   1330     }
   1331 
   1332     if(data->minLengthInChars(ce1,0) != 3){
   1333         errln("Minimal Length in Characters for 'data' with 'ce1' was suppose to give 3.");
   1334         goto close_patterns;
   1335     }
   1336 
   1337     for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {
   1338         UnicodeString target = targets[t].unescape();
   1339 
   1340         longPattern->setTargetString(&target, status);
   1341         if (longPattern->search(0, start, end)) {
   1342             logln("Test %d: found long pattern at [%d, %d].", t, start, end);
   1343         } else {
   1344             errln("Test %d: did not find long pattern.", t);
   1345         }
   1346 
   1347         shortPattern->setTargetString(&target, status);
   1348         if (shortPattern->search(0, start, end)) {
   1349             logln("Test %d: found short pattern at [%d, %d].", t, start, end);
   1350         } else {
   1351             errln("Test %d: did not find short pattern.", t);
   1352         }
   1353 
   1354         if(longPattern->empty()){
   1355             errln("Test %d: Long pattern should not have been empty.");
   1356         }
   1357 
   1358         if(shortPattern->empty()){
   1359             errln("Test %d: Short pattern should not have been empty.");
   1360         }
   1361     }
   1362 
   1363 close_patterns:
   1364     delete shortPattern;
   1365     delete longPattern;
   1366 
   1367 close_data:
   1368     CollData::close(data);
   1369     ucol_close(coll);
   1370 }
   1371 
   1372 void SSearchTest::bmsTest()
   1373 {
   1374     UErrorCode status = U_ZERO_ERROR;
   1375     UCollator *coll = NULL;
   1376     UCD *data = NULL;
   1377     UnicodeString lp  = "fuss";
   1378     UnicodeString lpu = lp.unescape();
   1379     UnicodeString sp  = "fu\\u00DF";
   1380     UnicodeString spu = sp.unescape();
   1381     BMS *longPattern = NULL;
   1382     BMS *shortPattern = NULL;
   1383     UnicodeString targets[]  = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",
   1384                                 "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",
   1385                                 "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"};
   1386     int32_t start = -1, end = -1;
   1387 
   1388     coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);
   1389     if (U_FAILURE(status)) {
   1390         errcheckln(status, "Could not open collator. - %s", u_errorName(status));
   1391         return;
   1392     }
   1393 
   1394     data = ucd_open(coll, &status);
   1395     if (U_FAILURE(status)) {
   1396         errln("Could not open CollData object.");
   1397         goto close_data;
   1398     }
   1399 
   1400     longPattern = bms_open(data, lpu.getBuffer(), lpu.length(), NULL, 0, &status);
   1401     shortPattern = bms_open(data, spu.getBuffer(), spu.length(), NULL, 0, &status);
   1402     if (U_FAILURE(status)) {
   1403         errln("Couldn't open pattern objects.");
   1404         goto close_patterns;
   1405     }
   1406 
   1407     for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {
   1408         UnicodeString target = targets[t].unescape();
   1409 
   1410         bms_setTargetString(longPattern, target.getBuffer(), target.length(), &status);
   1411         if (bms_search(longPattern, 0, &start, &end)) {
   1412             logln("Test %d: found long pattern at [%d, %d].", t, start, end);
   1413         } else {
   1414             errln("Test %d: did not find long pattern.", t);
   1415         }
   1416 
   1417         bms_setTargetString(shortPattern, target.getBuffer(), target.length(), &status);
   1418         if (bms_search(shortPattern, 0, &start, &end)) {
   1419             logln("Test %d: found short pattern at [%d, %d].", t, start, end);
   1420         } else {
   1421             errln("Test %d: did not find short pattern.", t);
   1422         }
   1423     }
   1424 
   1425     /* Add better coverage for bms code. */
   1426     if(bms_empty(longPattern)) {
   1427         errln("FAIL: longgPattern is empty.");
   1428     }
   1429 
   1430     if (!bms_getData(longPattern)) {
   1431         errln("FAIL: bms_getData returned NULL.");
   1432     }
   1433 
   1434     if (!ucd_getCollator(data)) {
   1435         errln("FAIL: ucd_getCollator returned NULL.");
   1436     }
   1437 
   1438 close_patterns:
   1439     bms_close(shortPattern);
   1440     bms_close(longPattern);
   1441 
   1442 close_data:
   1443     ucd_close(data);
   1444     ucd_freeCache();
   1445     ucol_close(coll);
   1446 }
   1447 
   1448 void SSearchTest::goodSuffixTest()
   1449 {
   1450     UErrorCode status = U_ZERO_ERROR;
   1451     UCollator *coll = NULL;
   1452     CollData *data = NULL;
   1453     UnicodeString pat = /*"gcagagag"*/ "fxeld";
   1454     UnicodeString target = /*"gcatcgcagagagtatacagtacg"*/ "cloveldfxeld";
   1455     BoyerMooreSearch *pattern = NULL;
   1456     int32_t start = -1, end = -1;
   1457 
   1458     coll = ucol_open(NULL, &status);
   1459     if (U_FAILURE(status)) {
   1460         errcheckln(status, "Couldn't open collator. - %s", u_errorName(status));
   1461         return;
   1462     }
   1463 
   1464     data = CollData::open(coll, status);
   1465     if (U_FAILURE(status)) {
   1466         errln("Couldn't open CollData object.");
   1467         goto close_data;
   1468     }
   1469 
   1470     pattern = new BoyerMooreSearch(data, pat, &target, status);
   1471     if (U_FAILURE(status)) {
   1472         errln("Couldn't open pattern object.");
   1473         goto close_pattern;
   1474     }
   1475 
   1476     if (pattern->search(0, start, end)) {
   1477         logln("Found pattern at [%d, %d].", start, end);
   1478     } else {
   1479         errln("Did not find pattern.");
   1480     }
   1481 
   1482 close_pattern:
   1483     delete pattern;
   1484 
   1485 close_data:
   1486     CollData::close(data);
   1487     ucol_close(coll);
   1488 }
   1489 
   1490 //
   1491 //  searchTime()    A quick and dirty performance test for string search.
   1492 //                  Probably  doesn't really belong as part of intltest, but it
   1493 //                  does check that the search succeeds, and gets the right result,
   1494 //                  so it serves as a functionality test also.
   1495 //
   1496 //                  To run as a perf test, up the loop count, select by commenting
   1497 //                  and uncommenting in the code the operation to be measured,
   1498 //                  rebuild, and measure the running time of this test alone.
   1499 //
   1500 //                     time LD_LIBRARY_PATH=whatever  ./intltest  collate/SSearchTest/searchTime
   1501 //
   1502 void SSearchTest::searchTime() {
   1503     static const char *longishText =
   1504 "Whylom, as olde stories tellen us,\n"
   1505 "Ther was a duk that highte Theseus:\n"
   1506 "Of Athenes he was lord and governour,\n"
   1507 "And in his tyme swich a conquerour,\n"
   1508 "That gretter was ther noon under the sonne.\n"
   1509 "Ful many a riche contree hadde he wonne;\n"
   1510 "What with his wisdom and his chivalrye,\n"
   1511 "He conquered al the regne of Femenye,\n"
   1512 "That whylom was y-cleped Scithia;\n"
   1513 "And weddede the quene Ipolita,\n"
   1514 "And broghte hir hoom with him in his contree\n"
   1515 "With muchel glorie and greet solempnitee,\n"
   1516 "And eek hir yonge suster Emelye.\n"
   1517 "And thus with victorie and with melodye\n"
   1518 "Lete I this noble duk to Athenes ryde,\n"
   1519 "And al his hoost, in armes, him bisyde.\n"
   1520 "And certes, if it nere to long to here,\n"
   1521 "I wolde han told yow fully the manere,\n"
   1522 "How wonnen was the regne of Femenye\n"
   1523 "By Theseus, and by his chivalrye;\n"
   1524 "And of the grete bataille for the nones\n"
   1525 "Bitwixen Athen's and Amazones;\n"
   1526 "And how asseged was Ipolita,\n"
   1527 "The faire hardy quene of Scithia;\n"
   1528 "And of the feste that was at hir weddinge,\n"
   1529 "And of the tempest at hir hoom-cominge;\n"
   1530 "But al that thing I moot as now forbere.\n"
   1531 "I have, God woot, a large feeld to ere,\n"
   1532 "And wayke been the oxen in my plough.\n"
   1533 "The remenant of the tale is long y-nough.\n"
   1534 "I wol nat letten eek noon of this route;\n"
   1535 "Lat every felawe telle his tale aboute,\n"
   1536 "And lat see now who shal the soper winne;\n"
   1537 "And ther I lefte, I wol ageyn biginne.\n"
   1538 "This duk, of whom I make mencioun,\n"
   1539 "When he was come almost unto the toun,\n"
   1540 "In al his wele and in his moste pryde,\n"
   1541 "He was war, as he caste his eye asyde,\n"
   1542 "Wher that ther kneled in the hye weye\n"
   1543 "A companye of ladies, tweye and tweye,\n"
   1544 "Ech after other, clad in clothes blake; \n"
   1545 "But swich a cry and swich a wo they make,\n"
   1546 "That in this world nis creature livinge,\n"
   1547 "That herde swich another weymentinge;\n"
   1548 "And of this cry they nolde never stenten,\n"
   1549 "Til they the reynes of his brydel henten.\n"
   1550 "'What folk ben ye, that at myn hoomcominge\n"
   1551 "Perturben so my feste with cryinge'?\n"
   1552 "Quod Theseus, 'have ye so greet envye\n"
   1553 "Of myn honour, that thus compleyne and crye? \n"
   1554 "Or who hath yow misboden, or offended?\n"
   1555 "And telleth me if it may been amended;\n"
   1556 "And why that ye ben clothed thus in blak'?\n"
   1557 "The eldest lady of hem alle spak,\n"
   1558 "When she hadde swowned with a deedly chere,\n"
   1559 "That it was routhe for to seen and here,\n"
   1560 "And seyde: 'Lord, to whom Fortune hath yiven\n"
   1561 "Victorie, and as a conquerour to liven,\n"
   1562 "Noght greveth us your glorie and your honour;\n"
   1563 "But we biseken mercy and socour.\n"
   1564 "Have mercy on our wo and our distresse.\n"
   1565 "Som drope of pitee, thurgh thy gentilesse,\n"
   1566 "Up-on us wrecched wommen lat thou falle.\n"
   1567 "For certes, lord, ther nis noon of us alle,\n"
   1568 "That she nath been a duchesse or a quene;\n"
   1569 "Now be we caitifs, as it is wel sene:\n"
   1570 "Thanked be Fortune, and hir false wheel,\n"
   1571 "That noon estat assureth to be weel.\n"
   1572 "And certes, lord, t'abyden your presence,\n"
   1573 "Here in the temple of the goddesse Clemence\n"
   1574 "We han ben waytinge al this fourtenight;\n"
   1575 "Now help us, lord, sith it is in thy might.\n"
   1576 "I wrecche, which that wepe and waille thus,\n"
   1577 "Was whylom wyf to king Capaneus,\n"
   1578 "That starf at Thebes, cursed be that day!\n"
   1579 "And alle we, that been in this array,\n"
   1580 "And maken al this lamentacioun,\n"
   1581 "We losten alle our housbondes at that toun,\n"
   1582 "Whyl that the sege ther-aboute lay.\n"
   1583 "And yet now th'olde Creon, weylaway!\n"
   1584 "The lord is now of Thebes the citee, \n"
   1585 "Fulfild of ire and of iniquitee,\n"
   1586 "He, for despyt, and for his tirannye,\n"
   1587 "To do the dede bodyes vileinye,\n"
   1588 "Of alle our lordes, whiche that ben slawe,\n"
   1589 "Hath alle the bodyes on an heep y-drawe,\n"
   1590 "And wol nat suffren hem, by noon assent,\n"
   1591 "Neither to been y-buried nor y-brent,\n"
   1592 "But maketh houndes ete hem in despyt. zet'\n";
   1593 
   1594 #define TEST_BOYER_MOORE 1
   1595 const char *cPattern = "maketh houndes ete hem";
   1596 //const char *cPattern = "Whylom";
   1597 //const char *cPattern = "zet";
   1598     const char *testId = "searchTime()";   // for error macros.
   1599     UnicodeString target = longishText;
   1600     UErrorCode status = U_ZERO_ERROR;
   1601 
   1602 
   1603     LocalUCollatorPointer collator(ucol_open("en", &status));
   1604     CollData *data = CollData::open(collator.getAlias(), status);
   1605     if (U_FAILURE(status) || collator.isNull() || data == NULL) {
   1606         errcheckln(status, "Unable to open UCollator or CollData. - %s", u_errorName(status));
   1607         return;
   1608     }
   1609     //ucol_setStrength(collator.getAlias(), collatorStrength);
   1610     //ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status);
   1611     UnicodeString uPattern = cPattern;
   1612 #ifndef TEST_BOYER_MOORE
   1613     LocalUStringSearchPointer uss(usearch_openFromCollator(uPattern.getBuffer(), uPattern.length(),
   1614                                                            target.getBuffer(), target.length(),
   1615                                                            collator.getAlias(),
   1616                                                            NULL,     // the break iterator
   1617                                                            &status));
   1618     TEST_ASSERT_SUCCESS(status);
   1619 #else
   1620     BoyerMooreSearch bms(data, uPattern, &target, status);
   1621     TEST_ASSERT_SUCCESS(status);
   1622 #endif
   1623 
   1624 //  int32_t foundStart;
   1625 //  int32_t foundEnd;
   1626     UBool   found;
   1627 
   1628     // Find the match position usgin strstr
   1629     const char *pm = strstr(longishText, cPattern);
   1630     TEST_ASSERT_M(pm!=NULL, "No pattern match with strstr");
   1631     int32_t  refMatchPos = (int32_t)(pm - longishText);
   1632     int32_t  icuMatchPos;
   1633     int32_t  icuMatchEnd;
   1634 #ifndef TEST_BOYER_MOORE
   1635     usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status);
   1636     TEST_ASSERT_SUCCESS(status);
   1637 #else
   1638     found = bms.search(0, icuMatchPos, icuMatchEnd);
   1639 #endif
   1640     TEST_ASSERT_M(refMatchPos == icuMatchPos, "strstr and icu give different match positions.");
   1641 
   1642     int32_t i;
   1643     // int32_t j=0;
   1644 
   1645     // Try loopcounts around 100000 to some millions, depending on the operation,
   1646     //   to get runtimes of at least several seconds.
   1647     for (i=0; i<10000; i++) {
   1648 #ifndef TEST_BOYER_MOORE
   1649         found = usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status);
   1650 #else
   1651         found = bms.search(0, icuMatchPos, icuMatchEnd);
   1652 #endif
   1653         //TEST_ASSERT_SUCCESS(status);
   1654         //TEST_ASSERT(found);
   1655 
   1656         // usearch_setOffset(uss.getAlias(), 0, &status);
   1657         // icuMatchPos = usearch_next(uss.getAlias(), &status);
   1658 
   1659          // The i+j stuff is to confuse the optimizer and get it to actually leave the
   1660          //   call to strstr in place.
   1661          //pm = strstr(longishText+j, cPattern);
   1662          //j = (j + i)%5;
   1663     }
   1664 
   1665     //printf("%ld, %d\n", pm-longishText, j);
   1666 #ifdef TEST_BOYER_MOORE
   1667     CollData::close(data);
   1668 #endif
   1669 }
   1670 #endif
   1671 
   1672 //----------------------------------------------------------------------------------------
   1673 //
   1674 //   Random Numbers.  Similar to standard lib rand() and srand()
   1675 //                    Not using library to
   1676 //                      1.  Get same results on all platforms.
   1677 //                      2.  Get access to current seed, to more easily reproduce failures.
   1678 //
   1679 //---------------------------------------------------------------------------------------
   1680 static uint32_t m_seed = 1;
   1681 
   1682 static uint32_t m_rand()
   1683 {
   1684     m_seed = m_seed * 1103515245 + 12345;
   1685     return (uint32_t)(m_seed/65536) % 32768;
   1686 }
   1687 
   1688 class Monkey
   1689 {
   1690 public:
   1691     virtual void append(UnicodeString &test, UnicodeString &alternate) = 0;
   1692 
   1693 protected:
   1694     Monkey();
   1695     virtual ~Monkey();
   1696 };
   1697 
   1698 Monkey::Monkey()
   1699 {
   1700     // ook?
   1701 }
   1702 
   1703 Monkey::~Monkey()
   1704 {
   1705     // ook?
   1706 }
   1707 
   1708 class SetMonkey : public Monkey
   1709 {
   1710 public:
   1711     SetMonkey(const USet *theSet);
   1712     ~SetMonkey();
   1713 
   1714     virtual void append(UnicodeString &test, UnicodeString &alternate);
   1715 
   1716 private:
   1717     const USet *set;
   1718 };
   1719 
   1720 SetMonkey::SetMonkey(const USet *theSet)
   1721     : Monkey(), set(theSet)
   1722 {
   1723     // ook?
   1724 }
   1725 
   1726 SetMonkey::~SetMonkey()
   1727 {
   1728     //ook...
   1729 }
   1730 
   1731 void SetMonkey::append(UnicodeString &test, UnicodeString &alternate)
   1732 {
   1733     int32_t size = uset_size(set);
   1734     int32_t index = m_rand() % size;
   1735     UChar32 ch = uset_charAt(set, index);
   1736     UnicodeString str(ch);
   1737 
   1738     test.append(str);
   1739     alternate.append(str); // flip case, or some junk?
   1740 }
   1741 
   1742 class StringSetMonkey : public Monkey
   1743 {
   1744 public:
   1745     StringSetMonkey(const USet *theSet, UCollator *theCollator, CollData *theCollData);
   1746     ~StringSetMonkey();
   1747 
   1748     void append(UnicodeString &testCase, UnicodeString &alternate);
   1749 
   1750 private:
   1751     UnicodeString &generateAlternative(const UnicodeString &testCase, UnicodeString &alternate);
   1752 
   1753     const USet *set;
   1754     UCollator  *coll;
   1755     CollData   *collData;
   1756 };
   1757 
   1758 StringSetMonkey::StringSetMonkey(const USet *theSet, UCollator *theCollator, CollData *theCollData)
   1759 : Monkey(), set(theSet), coll(theCollator), collData(theCollData)
   1760 {
   1761     // ook.
   1762 }
   1763 
   1764 StringSetMonkey::~StringSetMonkey()
   1765 {
   1766     // ook?
   1767 }
   1768 
   1769 void StringSetMonkey::append(UnicodeString &testCase, UnicodeString &alternate)
   1770 {
   1771     int32_t itemCount = uset_getItemCount(set), len = 0;
   1772     int32_t index = m_rand() % itemCount;
   1773     UChar32 rangeStart = 0, rangeEnd = 0;
   1774     UChar buffer[16];
   1775     UErrorCode err = U_ZERO_ERROR;
   1776 
   1777     len = uset_getItem(set, index, &rangeStart, &rangeEnd, buffer, 16, &err);
   1778 
   1779     if (len == 0) {
   1780         int32_t offset = m_rand() % (rangeEnd - rangeStart + 1);
   1781         UChar32 ch = rangeStart + offset;
   1782         UnicodeString str(ch);
   1783 
   1784         testCase.append(str);
   1785         generateAlternative(str, alternate);
   1786     } else if (len > 0) {
   1787         // should check that len < 16...
   1788         UnicodeString str(buffer, len);
   1789 
   1790         testCase.append(str);
   1791         generateAlternative(str, alternate);
   1792     } else {
   1793         // shouldn't happen...
   1794     }
   1795 }
   1796 
   1797 UnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCase, UnicodeString &alternate)
   1798 {
   1799     // find out shortest string for the longest sequence of ces.
   1800     // needs to be refined to use dynamic programming, but will be roughly right
   1801     UErrorCode status = U_ZERO_ERROR;
   1802     CEList ceList(coll, testCase, status);
   1803     UnicodeString alt;
   1804     int32_t offset = 0;
   1805 
   1806     if (ceList.size() == 0) {
   1807         return alternate.append(testCase);
   1808     }
   1809 
   1810     while (offset < ceList.size()) {
   1811         int32_t ce = ceList.get(offset);
   1812         const StringList *strings = collData->getStringList(ce);
   1813 
   1814         if (strings == NULL) {
   1815             return alternate.append(testCase);
   1816         }
   1817 
   1818         int32_t stringCount = strings->size();
   1819         int32_t tries = 0;
   1820 
   1821         // find random string that generates the same CEList
   1822         const CEList *ceList2 = NULL;
   1823         const UnicodeString *string = NULL;
   1824               UBool matches = FALSE;
   1825 
   1826         do {
   1827             int32_t s = m_rand() % stringCount;
   1828 
   1829             if (tries++ > stringCount) {
   1830                 alternate.append(testCase);
   1831                 return alternate;
   1832             }
   1833 
   1834             string = strings->get(s);
   1835             ceList2 = collData->getCEList(string);
   1836             matches = ceList.matchesAt(offset, ceList2);
   1837 
   1838             if (! matches) {
   1839                 collData->freeCEList((CEList *) ceList2);
   1840             }
   1841         } while (! matches);
   1842 
   1843         alt.append(*string);
   1844         offset += ceList2->size();
   1845         collData->freeCEList(ceList2);
   1846     }
   1847 
   1848     const CEList altCEs(coll, alt, status);
   1849 
   1850     if (ceList.matchesAt(0, &altCEs)) {
   1851         return alternate.append(alt);
   1852     }
   1853 
   1854     return alternate.append(testCase);
   1855 }
   1856 
   1857 static void generateTestCase(UCollator *coll, Monkey *monkeys[], int32_t monkeyCount, UnicodeString &testCase, UnicodeString &alternate)
   1858 {
   1859     int32_t pieces = (m_rand() % 4) + 1;
   1860     UErrorCode status = U_ZERO_ERROR;
   1861     UBool matches;
   1862 
   1863     do {
   1864         testCase.remove();
   1865         alternate.remove();
   1866         monkeys[0]->append(testCase, alternate);
   1867 
   1868         for(int32_t piece = 0; piece < pieces; piece += 1) {
   1869             int32_t monkey = m_rand() % monkeyCount;
   1870 
   1871             monkeys[monkey]->append(testCase, alternate);
   1872         }
   1873 
   1874         const CEList ceTest(coll, testCase, status);
   1875         const CEList ceAlt(coll, alternate, status);
   1876 
   1877         matches = ceTest.matchesAt(0, &ceAlt);
   1878     } while (! matches);
   1879 }
   1880 
   1881 //
   1882 //  Find the next acceptable boundary following the specified starting index
   1883 //     in the target text being searched.
   1884 //      TODO:  refine what is an acceptable boundary.  For the moment,
   1885 //             choose the next position not within a combining sequence.
   1886 //
   1887 #if 0
   1888 static int32_t nextBoundaryAfter(const UnicodeString &string, int32_t startIndex) {
   1889     const UChar *text = string.getBuffer();
   1890     int32_t textLen   = string.length();
   1891 
   1892     if (startIndex >= textLen) {
   1893         return startIndex;
   1894     }
   1895 
   1896     UChar32  c;
   1897     int32_t  i = startIndex;
   1898 
   1899     U16_NEXT(text, i, textLen, c);
   1900 
   1901     // If we are on a control character, stop without looking for combining marks.
   1902     //    Control characters do not combine.
   1903     int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
   1904     if (gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR) {
   1905         return i;
   1906     }
   1907 
   1908     // The initial character was not a control, and can thus accept trailing
   1909     //   combining characters.  Advance over however many of them there are.
   1910     int32_t  indexOfLastCharChecked;
   1911 
   1912     for (;;) {
   1913         indexOfLastCharChecked = i;
   1914 
   1915         if (i>=textLen) {
   1916             break;
   1917         }
   1918 
   1919         U16_NEXT(text, i, textLen, c);
   1920         gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
   1921 
   1922         if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
   1923             break;
   1924         }
   1925     }
   1926 
   1927     return indexOfLastCharChecked;
   1928 }
   1929 #endif
   1930 
   1931 #if 0
   1932 static UBool isInCombiningSequence(const UnicodeString &string, int32_t index) {
   1933     const UChar *text = string.getBuffer();
   1934     int32_t textLen   = string.length();
   1935 
   1936     if (index>=textLen || index<=0) {
   1937         return FALSE;
   1938     }
   1939 
   1940     // If the character at the current index is not a GRAPHEME_EXTEND
   1941     //    then we can not be within a combining sequence.
   1942     UChar32  c;
   1943     U16_GET(text, 0, index, textLen, c);
   1944     int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
   1945     if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
   1946         return FALSE;
   1947     }
   1948 
   1949     // We are at a combining mark.  If the preceding character is anything
   1950     //   except a CONTROL, CR or LF, we are in a combining sequence.
   1951     U16_PREV(text, 0, index, c);
   1952     gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
   1953 
   1954     return !(gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR);
   1955 }
   1956 #endif
   1957 
   1958 static UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t offset, const UnicodeString &pattern, int32_t &matchStart, int32_t &matchEnd)
   1959 {
   1960     UErrorCode      status = U_ZERO_ERROR;
   1961     OrderList       targetOrders(coll, target, offset);
   1962     OrderList       patternOrders(coll, pattern);
   1963     int32_t         targetSize  = targetOrders.size() - 1;
   1964     int32_t         patternSize = patternOrders.size() - 1;
   1965     UBreakIterator *charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
   1966                                                   target.getBuffer(), target.length(), &status);
   1967 
   1968     if (patternSize == 0) {
   1969         // Searching for an empty pattern always fails
   1970         matchStart = matchEnd = -1;
   1971         ubrk_close(charBreakIterator);
   1972         return FALSE;
   1973     }
   1974 
   1975     matchStart = matchEnd = -1;
   1976 
   1977     for(int32_t i = 0; i < targetSize; i += 1) {
   1978         if (targetOrders.matchesAt(i, patternOrders)) {
   1979             int32_t start    = targetOrders.getLowOffset(i);
   1980             int32_t maxLimit = targetOrders.getLowOffset(i + patternSize);
   1981             int32_t minLimit = targetOrders.getLowOffset(i + patternSize - 1);
   1982 
   1983             // if the low and high offsets of the first CE in
   1984             // the match are the same, it means that the match
   1985             // starts in the middle of an expansion - all but
   1986             // the first CE of the expansion will have the offset
   1987             // of the following character.
   1988             if (start == targetOrders.getHighOffset(i)) {
   1989                 continue;
   1990             }
   1991 
   1992             // Make sure match starts on a grapheme boundary
   1993             if (! ubrk_isBoundary(charBreakIterator, start)) {
   1994                 continue;
   1995             }
   1996 
   1997             // If the low and high offsets of the CE after the match
   1998             // are the same, it means that the match ends in the middle
   1999             // of an expansion sequence.
   2000             if (maxLimit == targetOrders.getHighOffset(i + patternSize) &&
   2001                 targetOrders.getOrder(i + patternSize) != UCOL_NULLORDER) {
   2002                 continue;
   2003             }
   2004 
   2005             int32_t mend = maxLimit;
   2006 
   2007             // Find the first grapheme break after the character index
   2008             // of the last CE in the match. If it's after character index
   2009             // that's after the last CE in the match, use that index
   2010             // as the end of the match.
   2011             if (minLimit < maxLimit) {
   2012                 // When the last CE's low index is same with its high index, the CE is likely
   2013                 // a part of expansion. In this case, the index is located just after the
   2014                 // character corresponding to the CEs compared above. If the index is right
   2015                 // at the break boundary, move the position to the next boundary will result
   2016                 // incorrect match length when there are ignorable characters exist between
   2017                 // the position and the next character produces CE(s). See ticket#8482.
   2018                 if (minLimit == targetOrders.getHighOffset(i + patternSize - 1) && ubrk_isBoundary(charBreakIterator, minLimit)) {
   2019                     mend = minLimit;
   2020                 } else {
   2021                     int32_t nba = ubrk_following(charBreakIterator, minLimit);
   2022 
   2023                     if (nba >= targetOrders.getHighOffset(i + patternSize - 1)) {
   2024                         mend = nba;
   2025                     }
   2026                 }
   2027             }
   2028 
   2029             if (mend > maxLimit) {
   2030                 continue;
   2031             }
   2032 
   2033             if (! ubrk_isBoundary(charBreakIterator, mend)) {
   2034                 continue;
   2035             }
   2036 
   2037             matchStart = start;
   2038             matchEnd   = mend;
   2039 
   2040             ubrk_close(charBreakIterator);
   2041             return TRUE;
   2042         }
   2043     }
   2044 
   2045     ubrk_close(charBreakIterator);
   2046     return FALSE;
   2047 }
   2048 
   2049 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
   2050 static int32_t  getIntParam(UnicodeString name, UnicodeString &params, int32_t defaultVal) {
   2051     int32_t val = defaultVal;
   2052 
   2053     name.append(" *= *(-?\\d+)");
   2054 
   2055     UErrorCode status = U_ZERO_ERROR;
   2056     RegexMatcher m(name, params, 0, status);
   2057 
   2058     if (m.find()) {
   2059         // The param exists.  Convert the string to an int.
   2060         char valString[100];
   2061         int32_t paramLength = m.end(1, status) - m.start(1, status);
   2062 
   2063         if (paramLength >= (int32_t)(sizeof(valString)-1)) {
   2064             paramLength = (int32_t)(sizeof(valString)-2);
   2065         }
   2066 
   2067         params.extract(m.start(1, status), paramLength, valString, sizeof(valString));
   2068         val = strtol(valString,  NULL, 10);
   2069 
   2070         // Delete this parameter from the params string.
   2071         m.reset();
   2072         params = m.replaceFirst("", status);
   2073     }
   2074 
   2075   //U_ASSERT(U_SUCCESS(status));
   2076     if (! U_SUCCESS(status)) {
   2077         val = defaultVal;
   2078     }
   2079 
   2080     return val;
   2081 }
   2082 #endif
   2083 
   2084 #if !UCONFIG_NO_COLLATION
   2085 int32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
   2086                                     const char *name, const char *strength, uint32_t seed)
   2087 {
   2088     UErrorCode status = U_ZERO_ERROR;
   2089     int32_t actualStart = -1, actualEnd = -1;
   2090   //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length();
   2091     int32_t expectedStart = -1, expectedEnd = -1;
   2092     int32_t notFoundCount = 0;
   2093     LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
   2094                                                            testCase.getBuffer(), testCase.length(),
   2095                                                            coll,
   2096                                                            NULL,     // the break iterator
   2097                                                            &status));
   2098 
   2099     // **** TODO: find *all* matches, not just first one ****
   2100     simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd);
   2101 
   2102     usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status);
   2103 
   2104     if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
   2105         errln("Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
   2106               "    strength=%s seed=%d",
   2107               name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
   2108     }
   2109 
   2110     if (expectedStart == -1 && actualStart == -1) {
   2111         notFoundCount += 1;
   2112     }
   2113 
   2114     // **** TODO: find *all* matches, not just first one ****
   2115     simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd);
   2116 
   2117     usearch_setPattern(uss.getAlias(), altPattern.getBuffer(), altPattern.length(), &status);
   2118 
   2119     usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status);
   2120 
   2121     if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
   2122         errln("Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
   2123               "    strength=%s seed=%d",
   2124               name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
   2125     }
   2126 
   2127     if (expectedStart == -1 && actualStart == -1) {
   2128         notFoundCount += 1;
   2129     }
   2130 
   2131     return notFoundCount;
   2132 }
   2133 
   2134 int32_t SSearchTest::bmMonkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
   2135                                     BoyerMooreSearch *bms, BoyerMooreSearch *abms,
   2136                                     const char *name, const char *strength, uint32_t seed)
   2137 {
   2138     UErrorCode status = U_ZERO_ERROR;
   2139     int32_t actualStart = -1, actualEnd = -1;
   2140   //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length();
   2141     int32_t expectedStart = -1, expectedEnd = -1;
   2142     int32_t notFoundCount = 0;
   2143 
   2144     // **** TODO: find *all* matches, not just first one ****
   2145     simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd);
   2146 
   2147     bms->setTargetString(&testCase, status);
   2148     bms->search(0, actualStart, actualEnd);
   2149 
   2150     if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
   2151         errln("Boyer-Moore Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
   2152               "    strength=%s seed=%d",
   2153               name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
   2154         errln(UNICODE_STRING_SIMPLE("    <pattern>: ") + prettify(pattern));
   2155     }
   2156 
   2157     if (expectedStart == -1 && actualStart == -1) {
   2158         notFoundCount += 1;
   2159     }
   2160 
   2161     // **** TODO: find *all* matches, not just first one ****
   2162     simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd);
   2163 
   2164     abms->setTargetString(&testCase, status);
   2165     abms->search(0, actualStart, actualEnd);
   2166 
   2167     if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
   2168         errln("Boyer-Moore Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
   2169               "    strength=%s seed=%d",
   2170               name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
   2171         errln(UNICODE_STRING_SIMPLE("    <alt_pattern>: ") + prettify(altPattern));
   2172     }
   2173 
   2174     if (expectedStart == -1 && actualStart == -1) {
   2175         notFoundCount += 1;
   2176     }
   2177 
   2178 
   2179     return notFoundCount;
   2180 }
   2181 #endif
   2182 
   2183 void SSearchTest::monkeyTest(char *params)
   2184 {
   2185     // ook!
   2186     UErrorCode status = U_ZERO_ERROR;
   2187   //UCollator *coll = ucol_open(NULL, &status);
   2188     UCollator *coll = ucol_openFromShortString("S1", FALSE, NULL, &status);
   2189 
   2190     if (U_FAILURE(status)) {
   2191         errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status));
   2192         return;
   2193     }
   2194 
   2195     CollData  *monkeyData = CollData::open(coll, status);
   2196 
   2197     USet *expansions   = uset_openEmpty();
   2198     USet *contractions = uset_openEmpty();
   2199 
   2200     ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
   2201 
   2202     U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
   2203     U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
   2204     USet *letters = uset_openPattern(letter_pattern, 39, &status);
   2205     SetMonkey letterMonkey(letters);
   2206     StringSetMonkey contractionMonkey(contractions, coll, monkeyData);
   2207     StringSetMonkey expansionMonkey(expansions, coll, monkeyData);
   2208     UnicodeString testCase;
   2209     UnicodeString alternate;
   2210     UnicodeString pattern, altPattern;
   2211     UnicodeString prefix, altPrefix;
   2212     UnicodeString suffix, altSuffix;
   2213 
   2214     Monkey *monkeys[] = {
   2215         &letterMonkey,
   2216         &contractionMonkey,
   2217         &expansionMonkey,
   2218         &contractionMonkey,
   2219         &expansionMonkey,
   2220         &contractionMonkey,
   2221         &expansionMonkey,
   2222         &contractionMonkey,
   2223         &expansionMonkey};
   2224     int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]);
   2225     // int32_t nonMatchCount = 0;
   2226 
   2227     UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY};
   2228     const char *strengthNames[] = {"primary", "secondary", "tertiary"};
   2229     int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);
   2230     int32_t loopCount = quick? 1000 : 10000;
   2231     int32_t firstStrength = 0;
   2232     int32_t lastStrength  = strengthCount - 1; //*/ 0;
   2233 
   2234     if (params != NULL) {
   2235 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
   2236         UnicodeString p(params);
   2237 
   2238         loopCount = getIntParam("loop", p, loopCount);
   2239         m_seed    = getIntParam("seed", p, m_seed);
   2240 
   2241         RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status);
   2242         if (m.find()) {
   2243             UnicodeString breakType = m.group(1, status);
   2244 
   2245             for (int32_t s = 0; s < strengthCount; s += 1) {
   2246                 if (breakType == strengthNames[s]) {
   2247                     firstStrength = lastStrength = s;
   2248                     break;
   2249                 }
   2250             }
   2251 
   2252             m.reset();
   2253             p = m.replaceFirst("", status);
   2254         }
   2255 
   2256         if (RegexMatcher("\\S", p, 0, status).find()) {
   2257             // Each option is stripped out of the option string as it is processed.
   2258             // All options have been checked.  The option string should have been completely emptied..
   2259             char buf[100];
   2260             p.extract(buf, sizeof(buf), NULL, status);
   2261             buf[sizeof(buf)-1] = 0;
   2262             errln("Unrecognized or extra parameter:  %s\n", buf);
   2263             return;
   2264         }
   2265 #else
   2266         infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters.");
   2267 #endif
   2268     }
   2269 
   2270     for(int32_t s = firstStrength; s <= lastStrength; s += 1) {
   2271         int32_t notFoundCount = 0;
   2272 
   2273         logln("Setting strength to %s.", strengthNames[s]);
   2274         ucol_setStrength(coll, strengths[s]);
   2275 
   2276         // TODO: try alternate prefix and suffix too?
   2277         // TODO: alterntaes are only equal at primary strength. Is this OK?
   2278         for(int32_t t = 0; t < loopCount; t += 1) {
   2279             uint32_t seed = m_seed;
   2280             // int32_t  nmc = 0;
   2281 
   2282             generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern);
   2283             generateTestCase(coll, monkeys, monkeyCount, prefix,  altPrefix);
   2284             generateTestCase(coll, monkeys, monkeyCount, suffix,  altSuffix);
   2285 
   2286             // pattern
   2287             notFoundCount += monkeyTestCase(coll, pattern, pattern, altPattern, "pattern", strengthNames[s], seed);
   2288 
   2289             testCase.remove();
   2290             testCase.append(prefix);
   2291             testCase.append(/*alt*/pattern);
   2292 
   2293             // prefix + pattern
   2294             notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern", strengthNames[s], seed);
   2295 
   2296             testCase.append(suffix);
   2297 
   2298             // prefix + pattern + suffix
   2299             notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern + suffix", strengthNames[s], seed);
   2300 
   2301             testCase.remove();
   2302             testCase.append(pattern);
   2303             testCase.append(suffix);
   2304 
   2305             // pattern + suffix
   2306             notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "pattern + suffix", strengthNames[s], seed);
   2307         }
   2308 
   2309        logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount);
   2310     }
   2311 
   2312     uset_close(contractions);
   2313     uset_close(expansions);
   2314     uset_close(letters);
   2315 
   2316     CollData::close(monkeyData);
   2317 
   2318     ucol_close(coll);
   2319 }
   2320 
   2321 void SSearchTest::bmMonkeyTest(char *params)
   2322 {
   2323     static const UChar skipChars[] = { 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0xAAB5, 0xAAB6, 0xAAB9, 0xAABB, 0xAABC, 0 }; // for timebomb
   2324     // ook!
   2325     UErrorCode status = U_ZERO_ERROR;
   2326     UCollator *coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);
   2327 
   2328     if (U_FAILURE(status)) {
   2329         errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status));
   2330         return;
   2331     }
   2332 
   2333     CollData  *monkeyData = CollData::open(coll, status);
   2334 
   2335     USet *expansions   = uset_openEmpty();
   2336     USet *contractions = uset_openEmpty();
   2337 
   2338     ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
   2339 
   2340     U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
   2341     U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
   2342     USet *letters = uset_openPattern(letter_pattern, 39, &status);
   2343     SetMonkey letterMonkey(letters);
   2344     StringSetMonkey contractionMonkey(contractions, coll, monkeyData);
   2345     StringSetMonkey expansionMonkey(expansions, coll, monkeyData);
   2346     UnicodeString testCase;
   2347     UnicodeString alternate;
   2348     UnicodeString pattern, altPattern;
   2349     UnicodeString prefix, altPrefix;
   2350     UnicodeString suffix, altSuffix;
   2351 
   2352     Monkey *monkeys[] = {
   2353         &letterMonkey,
   2354         &contractionMonkey,
   2355         &expansionMonkey,
   2356         &contractionMonkey,
   2357         &expansionMonkey,
   2358         &contractionMonkey,
   2359         &expansionMonkey,
   2360         &contractionMonkey,
   2361         &expansionMonkey};
   2362     int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]);
   2363     // int32_t nonMatchCount = 0;
   2364 
   2365     UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY};
   2366     const char *strengthNames[] = {"primary", "secondary", "tertiary"};
   2367     int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);
   2368     int32_t loopCount = quick? 1000 : 10000;
   2369     int32_t firstStrength = 0;
   2370     int32_t lastStrength  = strengthCount - 1; //*/ 0;
   2371 
   2372     if (params != NULL) {
   2373 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
   2374         UnicodeString p(params);
   2375 
   2376         loopCount = getIntParam("loop", p, loopCount);
   2377         m_seed    = getIntParam("seed", p, m_seed);
   2378 
   2379         RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status);
   2380         if (m.find()) {
   2381             UnicodeString breakType = m.group(1, status);
   2382 
   2383             for (int32_t s = 0; s < strengthCount; s += 1) {
   2384                 if (breakType == strengthNames[s]) {
   2385                     firstStrength = lastStrength = s;
   2386                     break;
   2387                 }
   2388             }
   2389 
   2390             m.reset();
   2391             p = m.replaceFirst("", status);
   2392         }
   2393 
   2394         if (RegexMatcher("\\S", p, 0, status).find()) {
   2395             // Each option is stripped out of the option string as it is processed.
   2396             // All options have been checked.  The option string should have been completely emptied..
   2397             char buf[100];
   2398             p.extract(buf, sizeof(buf), NULL, status);
   2399             buf[sizeof(buf)-1] = 0;
   2400             errln("Unrecognized or extra parameter:  %s\n", buf);
   2401             return;
   2402         }
   2403 #else
   2404         infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters.");
   2405 #endif
   2406     }
   2407 
   2408     for(int32_t s = firstStrength; s <= lastStrength; s += 1) {
   2409         int32_t notFoundCount = 0;
   2410 
   2411         logln("Setting strength to %s.", strengthNames[s]);
   2412         ucol_setStrength(coll, strengths[s]);
   2413 
   2414         CollData *data = CollData::open(coll, status);
   2415 
   2416         UnicodeSet skipSet;
   2417         if(isICUVersionBefore(51, 1)) {
   2418             // timebomb until ticket #9156 (was #8081) is resolved
   2419             UnicodeString skipString(skipChars);
   2420             skipSet.addAll(skipString);
   2421         }
   2422         if(isICUVersionBefore(51, 1)) {
   2423             // Time bomb until ticket #9490 is fixed.
   2424             skipSet.add(0x12327);
   2425             skipSet.add(0x1311b);
   2426             skipSet.add(0x1200d);
   2427         }
   2428         skipSet.freeze();
   2429         // TODO: try alternate prefix and suffix too?
   2430         // TODO: alternates are only equal at primary strength. Is this OK?
   2431         for(int32_t t = 0; t < loopCount; t += 1) {
   2432             uint32_t seed = m_seed;
   2433             // int32_t  nmc = 0;
   2434 
   2435             generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern);
   2436             generateTestCase(coll, monkeys, monkeyCount, prefix,  altPrefix);
   2437             generateTestCase(coll, monkeys, monkeyCount, suffix,  altSuffix);
   2438 
   2439             if (skipSet.containsSome(pattern)) {
   2440                 continue; // time bomb
   2441             }
   2442 
   2443             BoyerMooreSearch pat(data, pattern, NULL, status);
   2444             BoyerMooreSearch alt(data, altPattern, NULL, status);
   2445 
   2446             // **** need a better way to deal with this ****
   2447 #if 0
   2448             if (pat.empty() ||
   2449                 alt.empty()) {
   2450                     continue;
   2451             }
   2452 #endif
   2453 
   2454             // pattern
   2455             notFoundCount += bmMonkeyTestCase(coll, pattern, pattern, altPattern, &pat, &alt, "pattern", strengthNames[s], seed);
   2456 
   2457             testCase.remove();
   2458             testCase.append(prefix);
   2459             testCase.append(/*alt*/pattern);
   2460 
   2461             // prefix + pattern
   2462             notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern", strengthNames[s], seed);
   2463 
   2464             testCase.append(suffix);
   2465 
   2466             // prefix + pattern + suffix
   2467             notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern + suffix", strengthNames[s], seed);
   2468 
   2469             testCase.remove();
   2470             testCase.append(pattern);
   2471             testCase.append(suffix);
   2472 
   2473             // pattern + suffix
   2474             notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "pattern + suffix", strengthNames[s], seed);
   2475         }
   2476 
   2477         CollData::close(data);
   2478 
   2479         logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount);
   2480     }
   2481 
   2482     uset_close(contractions);
   2483     uset_close(expansions);
   2484     uset_close(letters);
   2485 
   2486     CollData::close(monkeyData);
   2487 
   2488     ucol_close(coll);
   2489 }
   2490 
   2491 void SSearchTest::stringListTest(){
   2492     UErrorCode status = U_ZERO_ERROR;
   2493     StringList *sl = new StringList(status);
   2494     if(U_FAILURE(status)){
   2495         errln("ERROR: stringListTest: Could not start StringList");
   2496     }
   2497 
   2498     const UChar chars[] = {
   2499             0x0000
   2500     };
   2501     sl->add(chars, (int32_t) 0, status);
   2502     if(U_FAILURE(status)){
   2503         errln("ERROR: stringListTest: StringList::add");
   2504     }
   2505 
   2506     if(sl->getDynamicClassID() != StringList::getStaticClassID()){
   2507         errln("ERROR: stringListTest: getDynamicClassID and getStaticClassID does not match");
   2508     }
   2509     delete sl;
   2510 }
   2511 
   2512 #endif
   2513 
   2514 #endif
   2515