Home | History | Annotate | Download | only in search
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 2000-2015, International Business Machines Corporation and    *
      7  * others. All Rights Reserved.                                                *
      8  *******************************************************************************
      9  */
     10 
     11 /**
     12  * Port From:   ICU4C v2.1 : collate/StringSearchTest
     13  * Source File: $ICU4CRoot/source/test/intltest/srchtest.cpp
     14  **/
     15 
     16 package android.icu.dev.test.search;
     17 
     18 import static android.icu.text.Collator.IDENTICAL;
     19 import static android.icu.text.Collator.PRIMARY;
     20 import static android.icu.text.Collator.QUATERNARY;
     21 import static android.icu.text.Collator.SECONDARY;
     22 import static android.icu.text.Collator.TERTIARY;
     23 import static android.icu.text.SearchIterator.ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD;
     24 import static android.icu.text.SearchIterator.ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD;
     25 import static android.icu.text.SearchIterator.ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
     26 
     27 import java.text.CharacterIterator;
     28 import java.text.StringCharacterIterator;
     29 import java.util.Locale;
     30 
     31 import org.junit.Before;
     32 import org.junit.Test;
     33 import org.junit.runner.RunWith;
     34 import org.junit.runners.JUnit4;
     35 
     36 import android.icu.dev.test.TestFmwk;
     37 import android.icu.text.BreakIterator;
     38 import android.icu.text.Collator;
     39 import android.icu.text.RuleBasedCollator;
     40 import android.icu.text.SearchIterator;
     41 import android.icu.text.SearchIterator.ElementComparisonType;
     42 import android.icu.text.StringSearch;
     43 import android.icu.util.ULocale;
     44 import android.icu.testsharding.MainTestShard;
     45 
     46 @MainTestShard
     47 @RunWith(JUnit4.class)
     48 public class SearchTest extends TestFmwk {
     49 
     50     //inner class
     51     static class SearchData {
     52         SearchData(String text, String pattern,
     53                     String coll, int strength, ElementComparisonType cmpType, String breaker,
     54                     int[] offset, int[] size) {
     55             this.text = text;
     56             this.pattern = pattern;
     57             this.collator = coll;
     58             this.strength = strength;
     59             this.cmpType = cmpType;
     60             this.breaker = breaker;
     61             this.offset = offset;
     62             this.size = size;
     63         }
     64         String              text;
     65         String              pattern;
     66         String              collator;
     67         int                 strength;
     68         ElementComparisonType   cmpType;
     69         String              breaker;
     70         int[]               offset;
     71         int[]               size;
     72     }
     73 
     74     RuleBasedCollator m_en_us_;
     75     RuleBasedCollator m_fr_fr_;
     76     RuleBasedCollator m_de_;
     77     RuleBasedCollator m_es_;
     78     BreakIterator     m_en_wordbreaker_;
     79     BreakIterator     m_en_characterbreaker_;
     80 
     81     // Just calling SearchData constructor, to make the test data source code
     82     // nice and short
     83     private static SearchData SD(String text, String pattern, String coll, int strength,
     84                     ElementComparisonType cmpType, String breaker, int[] offset, int[] size) {
     85         return new SearchData(text, pattern, coll, strength, cmpType, breaker, offset, size);
     86     }
     87 
     88     // Just returning int[], to make the test data nice and short
     89     private static int[] IA(int... elements) {
     90         return elements;
     91     }
     92 
     93     static SearchData[] BASIC = {
     94         SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
     95         SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)),
     96         SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)),
     97         SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)),
     98         SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)),
     99         SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
    100         SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)),
    101         SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    102         SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    103         SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    104         SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    105         SD("\u00c9", "e", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    106     };
    107 
    108     SearchData BREAKITERATOREXACT[] = {
    109         SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)),
    110         SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)),
    111         SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)),
    112         SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)),
    113         SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)),
    114         /* jitterbug 1745 */
    115         SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)),
    116         SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)),
    117         SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)),
    118     };
    119 
    120     SearchData BREAKITERATORCANONICAL[] = {
    121         SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)),
    122         SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)),
    123         SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)),
    124         SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)),
    125         SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)),
    126         /* jitterbug 1745 */
    127         SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)),
    128         SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)),
    129         SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)),
    130     };
    131 
    132     SearchData BASICCANONICAL[] = {
    133         SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    134         SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)),
    135         SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)),
    136         SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)),
    137         SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)),
    138         SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
    139         SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)),
    140 
    141         SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    142         SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    143         SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    144         SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    145         SD("a\u0300\u0325b", "\u0300b", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    146         SD("\u0325\u0300A\u0325\u0300", "\u0300A\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    147         SD("\u0325\u0300A\u0325\u0300", "\u0325A\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    148         SD("a\u0300\u0325b\u0300\u0325c \u0325b\u0300 \u0300b\u0325", "\u0300b\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    149 
    150         SD("\u00c4\u0323", "A\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
    151         SD("\u0308\u0323", "\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
    152     };
    153 
    154     SearchData COLLATOR[] = {
    155         /* english */
    156         SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
    157         /* tailored */
    158         SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)),
    159     };
    160 
    161     String TESTCOLLATORRULE = "& o,O ; p,P";
    162     String EXTRACOLLATIONRULE = " & ae ; \u00e4 & AE ; \u00c4 & oe ; \u00f6 & OE ; \u00d6 & ue ; \u00fc & UE ; \u00dc";
    163 
    164     SearchData COLLATORCANONICAL[] = {
    165         /* english */
    166         SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
    167         /* tailored */
    168         SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)),
    169     };
    170 
    171     SearchData COMPOSITEBOUNDARIES[] = {
    172         SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    173         SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    174         SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
    175         SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    176         SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    177         SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    178 
    179         /* first one matches only because it's at the start of the text */
    180         SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    181 
    182         /* \\u0300 blocked by \\u0300 */
    183         SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    184 
    185         /* A + 030A + 0301 */
    186         SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    187         SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    188 
    189         SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    190         SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    191 
    192         SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    193 
    194         SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    195 
    196         /* blocked accent */
    197         SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    198         SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    199 
    200         SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    201         SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    202         SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    203 
    204         SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    205 
    206         SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    207         SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    208 
    209         SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    210 
    211         SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    212         SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    213         SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)),
    214     };
    215 
    216     SearchData COMPOSITEBOUNDARIESCANONICAL[] = {
    217         SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    218         SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    219         SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
    220         SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    221         SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    222         SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    223 
    224         /* first one matches only because it's at the start of the text */
    225         SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    226 
    227         /* \u0300 blocked by \u0300 */
    228         SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    229 
    230         /* A + 030A + 0301 */
    231         SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    232         SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    233 
    234         SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    235         SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    236 
    237         SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    238 
    239         SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    240 
    241         /* blocked accent */
    242         SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    243         SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    244 
    245         SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    246         SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    247         SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    248 
    249         SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    250 
    251         SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    252         SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    253 
    254         SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
    255 
    256         SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    257         SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    258 
    259         SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)),
    260     };
    261 
    262     SearchData SUPPLEMENTARY[] = {
    263         SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00",
    264                 "\uD800\uDC00", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)),
    265         SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)),
    266         SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
    267         SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
    268         SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
    269         SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
    270     };
    271 
    272     String CONTRACTIONRULE = "&z = ab/c < AB < X\u0300 < ABC < X\u0300\u0315";
    273 
    274     SearchData CONTRACTION[] = {
    275         /* common discontiguous */
    276         SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    277 
    278         SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    279 
    280         /* contraction prefix */
    281         SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    282 
    283         SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    284         SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    285 
    286         /*
    287          * discontiguous problem here for backwards iteration. accents not found because discontiguous stores all
    288          * information
    289          */
    290         SD("X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    291         /* ends not with a contraction character */
    292         SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    293         SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
    294         SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    295         /* blocked discontiguous */
    296         SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    297 
    298         /*
    299          * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the
    300          * match fails because it ends in the middle of an expansion...
    301          */
    302         SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    303     };
    304 
    305     SearchData CONTRACTIONCANONICAL[] = {
    306         /* common discontiguous */
    307         SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    308         SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    309 
    310         /* contraction prefix */
    311         SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    312 
    313         SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    314         SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    315 
    316         /*
    317          * discontiguous problem here for backwards iteration. forwards gives 0, 4 but backwards give 1, 3
    318          */
    319         /*
    320          * {"X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, {0, -1), {4}),
    321          */
    322 
    323         /* ends not with a contraction character */
    324         SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    325         SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
    326 
    327         SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    328 
    329         /* blocked discontiguous */
    330         SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    331 
    332         /*
    333          * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the
    334          * match fails because it ends in the middle of an expansion...
    335          */
    336         SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(2)),
    337     };
    338 
    339     SearchData MATCH[] = {
    340         SD("a busy bee is a very busy beeee", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, -1), IA(3, 3)),
    341         /*  012345678901234567890123456789012345678901234567890 */
    342         SD("a busy bee is a very busy beeee with no bee life", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, 40, -1), IA(3, 3, 3)),
    343     };
    344 
    345     String IGNORABLERULE = "&a = \u0300";
    346 
    347     SearchData IGNORABLE[] = {
    348         /*
    349          * This isn't much of a test when matches have to be on grapheme boundiaries. The match at 0 only works because it's
    350          * at the start of the text.
    351          */
    352         SD("\u0300\u0315 \u0300\u0315 ", "\u0300", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
    353     };
    354 
    355     SearchData DIACTRICMATCH[] = {
    356         SD("\u0061\u0061\u00E1", "\u0061\u00E1", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(2)),
    357         SD("\u0020\u00C2\u0303\u0020\u0041\u0061\u1EAA\u0041\u0302\u0303\u00C2\u0303\u1EAB\u0061\u0302\u0303\u00E2\u0303\uD806\uDC01\u0300\u0020", "\u00C2\u0303",
    358             null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 4, 5, 6, 7, 10, 12, 13, 16, -1), IA(2, 1, 1, 1, 3, 2, 1, 3, 2)),
    359         SD("\u03BA\u03B1\u03B9\u0300\u0020\u03BA\u03B1\u1F76", "\u03BA\u03B1\u03B9", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 5, -1), IA(4, 3)),
    360     };
    361 
    362     SearchData NORMCANONICAL[] = {
    363         SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    364         SD("\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    365         SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    366         SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    367         SD("a\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    368         SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    369     };
    370 
    371     SearchData NORMEXACT[] = {
    372         SD("a\u0300\u0325", "a\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
    373     };
    374 
    375     SearchData NONNORMEXACT[] = {
    376         SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    377     };
    378 
    379     SearchData OVERLAP[] = {
    380         SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)),
    381     };
    382 
    383     SearchData NONOVERLAP[] = {
    384         SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)),
    385     };
    386 
    387     SearchData OVERLAPCANONICAL[] = {
    388         SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)),
    389     };
    390 
    391     SearchData NONOVERLAPCANONICAL[] = {
    392         SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)),
    393     };
    394 
    395     SearchData PATTERNCANONICAL[] = {
    396         SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)),
    397         SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
    398     };
    399 
    400     SearchData PATTERN[] = {
    401         SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)),
    402         SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
    403     };
    404 
    405     String PECHE_WITH_ACCENTS = "un p\u00E9ch\u00E9, "
    406                                 + "\u00E7a p\u00E8che par, "
    407                                 + "p\u00E9cher, "
    408                                 + "une p\u00EAche, "
    409                                 + "un p\u00EAcher, "
    410                                 + "j\u2019ai p\u00EAch\u00E9, "
    411                                 + "un p\u00E9cheur, "
    412                                 + "\u201Cp\u00E9che\u201D, "
    413                                 + "decomp peche\u0301, "
    414                                 + "base peche";
    415     // in the above, the interesting words and their offsets are:
    416     //    3 pe<301>che<301>
    417     //    13 pe<300>che
    418     //    24 pe<301>cher
    419     //    36 pe<302>che
    420     //    46 pe<302>cher
    421     //    59 pe<302>che<301>
    422     //    69 pe<301>cheur
    423     //    79 pe<301>che
    424     //    94 peche<+301>
    425     //    107 peche
    426 
    427     SearchData STRENGTH[] = {
    428         /*  012345678901234567890123456789012345678901234567890123456789 */
    429         SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
    430         SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)),
    431         SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
    432                 "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)),
    433         SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)),
    434         SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)),
    435         SD("\u00c0 should match but not A", "A\u0300", "en", IDENTICAL, STANDARD_ELEMENT_COMPARISON,  null, IA(0, -1), IA(1, 0)),
    436 
    437         /* some tests for modified element comparison, ticket #7093 */
    438         SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
    439         SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
    440         SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)),
    441         SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
    442         SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
    443         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)),
    444         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)),
    445         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)),
    446         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)),
    447         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)),
    448         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)),
    449         SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
    450         SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
    451         SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
    452         SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
    453         SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
    454         SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
    455         SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
    456         SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
    457 
    458         /* more tests for modified element comparison (with fr), ticket #7093 */
    459         SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
    460         SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
    461         SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)),
    462         SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
    463         SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
    464         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)),
    465         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)),
    466         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)),
    467         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)),
    468         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)),
    469         SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)),
    470         SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
    471         SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
    472         SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
    473         SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
    474         SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
    475         SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
    476         SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
    477         SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
    478 
    479     };
    480 
    481     SearchData STRENGTHCANONICAL[] = {
    482         /*  012345678901234567890123456789012345678901234567890123456789 */
    483         SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
    484         SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)),
    485         SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
    486                 "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)),
    487         SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)),
    488         SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)),
    489     };
    490 
    491     SearchData SUPPLEMENTARYCANONICAL[] = {
    492         /*  012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
    493         SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00", "\uD800\uDC00",
    494             null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)),
    495         SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)),
    496         SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
    497         SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
    498         SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
    499         SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
    500     };
    501 
    502     static SearchData VARIABLE[] = {
    503         /*  012345678901234567890123456789012345678901234567890123456789 */
    504         SD("blackbirds black blackbirds blackbird black-bird", "blackbird", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 17, 28, 38, -1), IA(9, 9, 9, 10)),
    505 
    506         /*
    507          * to see that it doesn't go into an infinite loop if the start of text is a ignorable character
    508          */
    509         SD(" on", "go", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    510         SD("abcdefghijklmnopqrstuvwxyz", "   ",
    511             null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null,
    512             IA(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1),
    513             IA(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
    514 
    515         /* testing tightest match */
    516         SD(" abc  a bc   ab c    a  bc     ab  c", "abc", null, QUATERNARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(3)),
    517         /*  012345678901234567890123456789012345678901234567890123456789 */
    518         SD(" abc  a bc   ab c    a  bc     ab  c", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 6, 13, 21, 31, -1), IA(3, 4, 4, 5, 5)),
    519 
    520         /* totally ignorable text */
    521         SD("           ---------------", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
    522     };
    523 
    524     static SearchData TEXTCANONICAL[] = {
    525         SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)),
    526         SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3)),
    527     };
    528 
    529     static SearchData INDICPREFIXMATCH[] = {
    530         SD("\u0915\u0020\u0915\u0901\u0020\u0915\u0902\u0020\u0915\u0903\u0020\u0915\u0940\u0020\u0915\u093F\u0020\u0915\u0943\u0020\u0915\u093C\u0020\u0958",
    531                 "\u0915", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 5, 8, 11, 14, 17, 20, 23,-1), IA(1, 2, 2, 2, 1, 1, 1, 2, 1)),
    532         SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947",
    533                 "\u0915\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 3, 7, 11, -1), IA(2, 2, 2, 2)),
    534         SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947",
    535                 "\u0915\u0943\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 19, -1), IA(3, 3)),
    536     };
    537 
    538     /**
    539      * Constructor
    540      */
    541     public SearchTest()
    542     {
    543 
    544     }
    545 
    546     @Before
    547     public void init() throws Exception {
    548         m_en_us_ = (RuleBasedCollator)Collator.getInstance(Locale.US);
    549         m_fr_fr_ = (RuleBasedCollator)Collator.getInstance(Locale.FRANCE);
    550         m_de_ = (RuleBasedCollator)Collator.getInstance(new Locale("de", "DE"));
    551         m_es_ = (RuleBasedCollator)Collator.getInstance(new Locale("es", "ES"));
    552         m_en_wordbreaker_ = BreakIterator.getWordInstance();
    553         m_en_characterbreaker_ = BreakIterator.getCharacterInstance();
    554         String rules = m_de_.getRules() + EXTRACOLLATIONRULE;
    555         m_de_ = new RuleBasedCollator(rules);
    556         rules = m_es_.getRules() + EXTRACOLLATIONRULE;
    557         m_es_ = new RuleBasedCollator(rules);
    558 
    559     }
    560 
    561     RuleBasedCollator getCollator(String collator) {
    562         if (collator == null) {
    563             return m_en_us_;
    564         } if (collator.equals("fr")) {
    565             return m_fr_fr_;
    566         } else if (collator.equals("de")) {
    567             return m_de_;
    568         } else if (collator.equals("es")) {
    569             return m_es_;
    570         } else {
    571             return m_en_us_;
    572         }
    573     }
    574 
    575     BreakIterator getBreakIterator(String breaker) {
    576         if (breaker == null) {
    577             return null;
    578         } if (breaker.equals("wordbreaker")) {
    579             return m_en_wordbreaker_;
    580         } else {
    581             return m_en_characterbreaker_;
    582         }
    583     }
    584 
    585     boolean assertCanonicalEqual(SearchData search) {
    586         Collator      collator = getCollator(search.collator);
    587         BreakIterator breaker  = getBreakIterator(search.breaker);
    588         StringSearch  strsrch;
    589 
    590         String text = search.text;
    591         String  pattern = search.pattern;
    592 
    593         if (breaker != null) {
    594             breaker.setText(text);
    595         }
    596         collator.setStrength(search.strength);
    597         collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    598         try {
    599             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
    600             strsrch.setElementComparisonType(search.cmpType);
    601             strsrch.setCanonical(true);
    602         } catch (Exception e) {
    603             errln("Error opening string search" + e.getMessage());
    604             return false;
    605         }
    606 
    607         if (!assertEqualWithStringSearch(strsrch, search)) {
    608             collator.setStrength(TERTIARY);
    609             collator.setDecomposition(Collator.NO_DECOMPOSITION);
    610             return false;
    611         }
    612         collator.setStrength(TERTIARY);
    613         collator.setDecomposition(Collator.NO_DECOMPOSITION);
    614         return true;
    615     }
    616 
    617     boolean assertEqual(SearchData search) {
    618         Collator      collator = getCollator(search.collator);
    619         BreakIterator breaker  = getBreakIterator(search.breaker);
    620         StringSearch  strsrch;
    621 
    622         String text = search.text;
    623         String  pattern = search.pattern;
    624 
    625         if (breaker != null) {
    626             breaker.setText(text);
    627         }
    628         collator.setStrength(search.strength);
    629         try {
    630             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
    631             strsrch.setElementComparisonType(search.cmpType);
    632         } catch (Exception e) {
    633             errln("Error opening string search " + e.getMessage());
    634             return false;
    635         }
    636 
    637         if (!assertEqualWithStringSearch(strsrch, search)) {
    638             collator.setStrength(TERTIARY);
    639             return false;
    640         }
    641         collator.setStrength(TERTIARY);
    642         return true;
    643     }
    644 
    645     boolean assertEqualWithAttribute(SearchData search, boolean canonical, boolean overlap) {
    646         Collator      collator = getCollator(search.collator);
    647         BreakIterator breaker  = getBreakIterator(search.breaker);
    648         StringSearch  strsrch;
    649 
    650         String text = search.text;
    651         String  pattern = search.pattern;
    652 
    653         if (breaker != null) {
    654             breaker.setText(text);
    655         }
    656         collator.setStrength(search.strength);
    657         try {
    658             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
    659             strsrch.setCanonical(canonical);
    660             strsrch.setOverlapping(overlap);
    661             strsrch.setElementComparisonType(search.cmpType);
    662         } catch (Exception e) {
    663             errln("Error opening string search " + e.getMessage());
    664             return false;
    665         }
    666 
    667         if (!assertEqualWithStringSearch(strsrch, search)) {
    668             collator.setStrength(TERTIARY);
    669             return false;
    670         }
    671         collator.setStrength(TERTIARY);
    672         return true;
    673     }
    674 
    675     boolean assertEqualWithStringSearch(StringSearch strsrch, SearchData search) {
    676         int           count       = 0;
    677         int   matchindex  = search.offset[count];
    678         String matchtext;
    679 
    680         if (strsrch.getMatchStart() != SearchIterator.DONE ||
    681             strsrch.getMatchLength() != 0) {
    682             errln("Error with the initialization of match start and length");
    683         }
    684         // start of following matches
    685         while (matchindex >= 0) {
    686             int matchlength = search.size[count];
    687             strsrch.next();
    688             //int x = strsrch.getMatchStart();
    689             if (matchindex != strsrch.getMatchStart() ||
    690                 matchlength != strsrch.getMatchLength()) {
    691                 errln("Text: " + search.text);
    692                 errln("Searching forward for pattern: " + strsrch.getPattern());
    693                 errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
    694                 return false;
    695             }
    696             count ++;
    697 
    698             matchtext = strsrch.getMatchedText();
    699             String targetText = search.text;
    700             if (matchlength > 0 &&
    701                 targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) {
    702                 errln("Error getting following matched text");
    703             }
    704 
    705             matchindex = search.offset[count];
    706         }
    707         strsrch.next();
    708         if (strsrch.getMatchStart() != SearchIterator.DONE ||
    709             strsrch.getMatchLength() != 0) {
    710                 errln("Text: " + search.text);
    711                 errln("Searching forward for pattern: " + strsrch.getPattern());
    712                 errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
    713                 return false;
    714         }
    715         // start of preceding matches
    716         count = count == 0 ? 0 : count - 1;
    717         matchindex = search.offset[count];
    718         while (matchindex >= 0) {
    719             int matchlength = search.size[count];
    720             strsrch.previous();
    721             if (matchindex != strsrch.getMatchStart() ||
    722                 matchlength != strsrch.getMatchLength()) {
    723                 errln("Text: " + search.text);
    724                 errln("Searching backward for pattern: " + strsrch.getPattern());
    725                 errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
    726                 return false;
    727             }
    728 
    729             matchtext = strsrch.getMatchedText();
    730             String targetText = search.text;
    731             if (matchlength > 0 &&
    732                 targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) {
    733                 errln("Error getting following matched text");
    734             }
    735 
    736             matchindex = count > 0 ? search.offset[count - 1] : -1;
    737             count --;
    738         }
    739         strsrch.previous();
    740         if (strsrch.getMatchStart() != SearchIterator.DONE ||
    741             strsrch.getMatchLength() != 0) {
    742                 errln("Text: " + search.text);
    743                 errln("Searching backward for pattern: " + strsrch.getPattern());
    744                 errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
    745                 return false;
    746         }
    747         return true;
    748     }
    749 
    750     @Test
    751     public void TestConstructor()
    752     {
    753         String pattern = "pattern";
    754         String text = "text";
    755         StringCharacterIterator textiter = new StringCharacterIterator(text);
    756         Collator defaultcollator = Collator.getInstance();
    757         BreakIterator breaker = BreakIterator.getCharacterInstance();
    758         breaker.setText(text);
    759         StringSearch search = new StringSearch(pattern, text);
    760         if (!search.getPattern().equals(pattern)
    761             || !search.getTarget().equals(textiter)
    762             || !search.getCollator().equals(defaultcollator)
    763             /*|| !search.getBreakIterator().equals(breaker)*/) {
    764             errln("StringSearch(String, String) error");
    765         }
    766         search = new StringSearch(pattern, textiter, m_fr_fr_);
    767         if (!search.getPattern().equals(pattern)
    768             || !search.getTarget().equals(textiter)
    769             || !search.getCollator().equals(m_fr_fr_)
    770             /*|| !search.getBreakIterator().equals(breaker)*/) {
    771             errln("StringSearch(String, StringCharacterIterator, "
    772                   + "RuleBasedCollator) error");
    773         }
    774         Locale de = new Locale("de", "DE");
    775         breaker = BreakIterator.getCharacterInstance(de);
    776         breaker.setText(text);
    777         search = new StringSearch(pattern, textiter, de);
    778         if (!search.getPattern().equals(pattern)
    779             || !search.getTarget().equals(textiter)
    780             || !search.getCollator().equals(Collator.getInstance(de))
    781             /*|| !search.getBreakIterator().equals(breaker)*/) {
    782             errln("StringSearch(String, StringCharacterIterator, Locale) "
    783                   + "error");
    784         }
    785 
    786         search = new StringSearch(pattern, textiter, m_fr_fr_,
    787                                   m_en_wordbreaker_);
    788         if (!search.getPattern().equals(pattern)
    789             || !search.getTarget().equals(textiter)
    790             || !search.getCollator().equals(m_fr_fr_)
    791             || !search.getBreakIterator().equals(m_en_wordbreaker_)) {
    792             errln("StringSearch(String, StringCharacterIterator, Locale) "
    793                   + "error");
    794         }
    795     }
    796 
    797     @Test
    798     public void TestBasic() {
    799         for (int count = 0; count < BASIC.length; count++) {
    800             if (!assertEqual(BASIC[count])) {
    801                 errln("Error at test number " + count);
    802             }
    803         }
    804     }
    805 
    806     @Test
    807     public void TestBreakIterator() {
    808 
    809         String text = BREAKITERATOREXACT[0].text;
    810         String pattern = BREAKITERATOREXACT[0].pattern;
    811         StringSearch strsrch = null;
    812         try {
    813             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
    814         } catch (Exception e) {
    815             errln("Error opening string search");
    816             return;
    817         }
    818 
    819         strsrch.setBreakIterator(null);
    820         if (strsrch.getBreakIterator() != null) {
    821             errln("Error usearch_getBreakIterator returned wrong object");
    822         }
    823 
    824         strsrch.setBreakIterator(m_en_characterbreaker_);
    825         if (!strsrch.getBreakIterator().equals(m_en_characterbreaker_)) {
    826             errln("Error usearch_getBreakIterator returned wrong object");
    827         }
    828 
    829         strsrch.setBreakIterator(m_en_wordbreaker_);
    830         if (!strsrch.getBreakIterator().equals(m_en_wordbreaker_)) {
    831             errln("Error usearch_getBreakIterator returned wrong object");
    832         }
    833 
    834         int count = 0;
    835         while (count < 4) {
    836             // special purposes for tests numbers 0-3
    837             SearchData        search   = BREAKITERATOREXACT[count];
    838             RuleBasedCollator collator = getCollator(search.collator);
    839             BreakIterator     breaker  = getBreakIterator(search.breaker);
    840                   //StringSearch      strsrch;
    841 
    842             text = search.text;
    843             pattern = search.pattern;
    844             if (breaker != null) {
    845                 breaker.setText(text);
    846             }
    847             collator.setStrength(search.strength);
    848             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker);
    849             if (strsrch.getBreakIterator() != breaker) {
    850                 errln("Error setting break iterator");
    851             }
    852             if (!assertEqualWithStringSearch(strsrch, search)) {
    853                 collator.setStrength(TERTIARY);
    854             }
    855             search   = BREAKITERATOREXACT[count + 1];
    856             breaker  = getBreakIterator(search.breaker);
    857             if (breaker != null) {
    858                 breaker.setText(text);
    859             }
    860             strsrch.setBreakIterator(breaker);
    861             if (strsrch.getBreakIterator() != breaker) {
    862                 errln("Error setting break iterator");
    863             }
    864             strsrch.reset();
    865             if (!assertEqualWithStringSearch(strsrch, search)) {
    866                  errln("Error at test number " + count);
    867             }
    868             count += 2;
    869         }
    870         for (count = 0; count < BREAKITERATOREXACT.length; count++) {
    871             if (!assertEqual(BREAKITERATOREXACT[count])) {
    872                 errln("Error at test number " + count);
    873             }
    874         }
    875     }
    876 
    877     @Test
    878     public void TestBreakIteratorCanonical() {
    879         int        count  = 0;
    880         while (count < 4) {
    881             // special purposes for tests numbers 0-3
    882             SearchData     search   = BREAKITERATORCANONICAL[count];
    883 
    884             String text = search.text;
    885             String pattern = search.pattern;
    886             RuleBasedCollator collator = getCollator(search.collator);
    887             collator.setStrength(search.strength);
    888 
    889             BreakIterator breaker = getBreakIterator(search.breaker);
    890             StringSearch  strsrch = null;
    891             try {
    892                 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker);
    893             } catch (Exception e) {
    894                 errln("Error creating string search data");
    895                 return;
    896             }
    897             strsrch.setCanonical(true);
    898             if (!strsrch.getBreakIterator().equals(breaker)) {
    899                 errln("Error setting break iterator");
    900                 return;
    901             }
    902             if (!assertEqualWithStringSearch(strsrch, search)) {
    903                 collator.setStrength(TERTIARY);
    904                 return;
    905             }
    906             search  = BREAKITERATOREXACT[count + 1];
    907             breaker = getBreakIterator(search.breaker);
    908             breaker.setText(strsrch.getTarget());
    909             strsrch.setBreakIterator(breaker);
    910             if (!strsrch.getBreakIterator().equals(breaker)) {
    911                 errln("Error setting break iterator");
    912                 return;
    913             }
    914             strsrch.reset();
    915             strsrch.setCanonical(true);
    916             if (!assertEqualWithStringSearch(strsrch, search)) {
    917                  errln("Error at test number " + count);
    918                  return;
    919             }
    920             count += 2;
    921         }
    922 
    923         for (count = 0; count < BREAKITERATORCANONICAL.length; count++) {
    924              if (!assertEqual(BREAKITERATORCANONICAL[count])) {
    925                  errln("Error at test number " + count);
    926                  return;
    927              }
    928         }
    929     }
    930 
    931     @Test
    932     public void TestCanonical() {
    933         for (int count = 0; count < BASICCANONICAL.length; count++) {
    934             if (!assertCanonicalEqual(BASICCANONICAL[count])) {
    935                 errln("Error at test number " + count);
    936             }
    937         }
    938     }
    939 
    940     @Test
    941     public void TestCollator() {
    942         // test collator that thinks "o" and "p" are the same thing
    943         String text = COLLATOR[0].text;
    944         String pattern  = COLLATOR[0].pattern;
    945         StringSearch strsrch = null;
    946         try {
    947             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
    948         } catch (Exception e) {
    949             errln("Error opening string search ");
    950             return;
    951         }
    952         if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
    953             return;
    954         }
    955         String rules = TESTCOLLATORRULE;
    956         RuleBasedCollator tailored = null;
    957         try {
    958             tailored = new RuleBasedCollator(rules);
    959             tailored.setStrength(COLLATOR[1].strength);
    960         } catch (Exception e) {
    961             errln("Error opening rule based collator ");
    962             return;
    963         }
    964 
    965         strsrch.setCollator(tailored);
    966         if (!strsrch.getCollator().equals(tailored)) {
    967             errln("Error setting rule based collator");
    968         }
    969         strsrch.reset();
    970         if (!assertEqualWithStringSearch(strsrch, COLLATOR[1])) {
    971             return;
    972         }
    973         strsrch.setCollator(m_en_us_);
    974         strsrch.reset();
    975         if (!strsrch.getCollator().equals(m_en_us_)) {
    976             errln("Error setting rule based collator");
    977         }
    978         if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
    979            errln("Error searching collator test");
    980         }
    981     }
    982 
    983     @Test
    984     public void TestCollatorCanonical() {
    985         /* test collator that thinks "o" and "p" are the same thing */
    986         String text = COLLATORCANONICAL[0].text;
    987         String pattern = COLLATORCANONICAL[0].pattern;
    988 
    989         StringSearch strsrch = null;
    990         try {
    991             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
    992             strsrch.setCanonical(true);
    993         } catch (Exception e) {
    994             errln("Error opening string search ");
    995         }
    996 
    997         if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) {
    998             return;
    999         }
   1000 
   1001         String rules = TESTCOLLATORRULE;
   1002         RuleBasedCollator tailored = null;
   1003         try {
   1004             tailored = new RuleBasedCollator(rules);
   1005             tailored.setStrength(COLLATORCANONICAL[1].strength);
   1006             tailored.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
   1007         } catch (Exception e) {
   1008             errln("Error opening rule based collator ");
   1009         }
   1010 
   1011         strsrch.setCollator(tailored);
   1012         if (!strsrch.getCollator().equals(tailored)) {
   1013             errln("Error setting rule based collator");
   1014         }
   1015         strsrch.reset();
   1016         strsrch.setCanonical(true);
   1017         if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[1])) {
   1018             logln("COLLATORCANONICAL[1] failed");  // Error should already be reported.
   1019         }
   1020         strsrch.setCollator(m_en_us_);
   1021         strsrch.reset();
   1022         if (!strsrch.getCollator().equals(m_en_us_)) {
   1023             errln("Error setting rule based collator");
   1024         }
   1025         if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) {
   1026             logln("COLLATORCANONICAL[0] failed");  // Error should already be reported.
   1027         }
   1028     }
   1029 
   1030     @Test
   1031     public void TestCompositeBoundaries() {
   1032         for (int count = 0; count < COMPOSITEBOUNDARIES.length; count++) {
   1033             // logln("composite " + count);
   1034             if (!assertEqual(COMPOSITEBOUNDARIES[count])) {
   1035                 errln("Error at test number " + count);
   1036             }
   1037         }
   1038     }
   1039 
   1040     @Test
   1041     public void TestCompositeBoundariesCanonical() {
   1042         for (int count = 0; count < COMPOSITEBOUNDARIESCANONICAL.length; count++) {
   1043             // logln("composite " + count);
   1044             if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) {
   1045                 errln("Error at test number " + count);
   1046             }
   1047         }
   1048     }
   1049 
   1050     @Test
   1051     public void TestContraction() {
   1052         String rules = CONTRACTIONRULE;
   1053         RuleBasedCollator collator = null;
   1054         try {
   1055             collator = new RuleBasedCollator(rules);
   1056             collator.setStrength(TERTIARY);
   1057             collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
   1058         } catch (Exception e) {
   1059             errln("Error opening collator ");
   1060         }
   1061         String text = "text";
   1062         String pattern = "pattern";
   1063         StringSearch strsrch = null;
   1064         try {
   1065             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
   1066         } catch (Exception e) {
   1067             errln("Error opening string search ");
   1068         }
   1069 
   1070         for (int count = 0; count< CONTRACTION.length; count++) {
   1071             text = CONTRACTION[count].text;
   1072             pattern = CONTRACTION[count].pattern;
   1073             strsrch.setTarget(new StringCharacterIterator(text));
   1074             strsrch.setPattern(pattern);
   1075             if (!assertEqualWithStringSearch(strsrch, CONTRACTION[count])) {
   1076                 errln("Error at test number " + count);
   1077             }
   1078         }
   1079     }
   1080 
   1081     @Test
   1082     public void TestContractionCanonical() {
   1083         String rules = CONTRACTIONRULE;
   1084         RuleBasedCollator collator = null;
   1085         try {
   1086             collator = new RuleBasedCollator(rules);
   1087             collator.setStrength(TERTIARY);
   1088             collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
   1089         } catch (Exception e) {
   1090             errln("Error opening collator ");
   1091         }
   1092         String text = "text";
   1093         String pattern = "pattern";
   1094         StringSearch strsrch = null;
   1095         try {
   1096             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
   1097             strsrch.setCanonical(true);
   1098         } catch (Exception e) {
   1099             errln("Error opening string search");
   1100         }
   1101 
   1102         for (int count = 0; count < CONTRACTIONCANONICAL.length; count++) {
   1103             text = CONTRACTIONCANONICAL[count].text;
   1104             pattern = CONTRACTIONCANONICAL[count].pattern;
   1105             strsrch.setTarget(new StringCharacterIterator(text));
   1106             strsrch.setPattern(pattern);
   1107             if (!assertEqualWithStringSearch(strsrch, CONTRACTIONCANONICAL[count])) {
   1108                 errln("Error at test number " + count);
   1109             }
   1110         }
   1111     }
   1112 
   1113     @Test
   1114     public void TestGetMatch() {
   1115         SearchData search = MATCH[0];
   1116         String text = search.text;
   1117         String pattern = search.pattern;
   1118 
   1119         StringSearch strsrch = null;
   1120         try {
   1121             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
   1122         } catch (Exception e) {
   1123             errln("Error opening string search ");
   1124             return;
   1125         }
   1126 
   1127         int           count      = 0;
   1128         int   matchindex = search.offset[count];
   1129         String matchtext;
   1130         while (matchindex >= 0) {
   1131             int matchlength = search.size[count];
   1132             strsrch.next();
   1133             if (matchindex != strsrch.getMatchStart() ||
   1134                 matchlength != strsrch.getMatchLength()) {
   1135                 errln("Text: " + search.text);
   1136                 errln("Pattern: " + strsrch.getPattern());
   1137                 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
   1138                 return;
   1139             }
   1140             count++;
   1141 
   1142             matchtext = strsrch.getMatchedText();
   1143             if (matchtext.length() != matchlength){
   1144                 errln("Error getting match text");
   1145             }
   1146             matchindex = search.offset[count];
   1147         }
   1148         strsrch.next();
   1149         if (strsrch.getMatchStart()  != StringSearch.DONE ||
   1150             strsrch.getMatchLength() != 0) {
   1151             errln("Error end of match not found");
   1152         }
   1153         matchtext = strsrch.getMatchedText();
   1154         if (matchtext != null) {
   1155             errln("Error getting null matches");
   1156         }
   1157     }
   1158 
   1159     @Test
   1160     public void TestGetSetAttribute() {
   1161         String  pattern = "pattern";
   1162         String  text = "text";
   1163         StringSearch  strsrch = null;
   1164         try {
   1165             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
   1166         } catch (Exception e) {
   1167             errln("Error opening search");
   1168             return;
   1169         }
   1170 
   1171         if (strsrch.isOverlapping()) {
   1172             errln("Error default overlaping should be false");
   1173         }
   1174         strsrch.setOverlapping(true);
   1175         if (!strsrch.isOverlapping()) {
   1176             errln("Error setting overlap true");
   1177         }
   1178         strsrch.setOverlapping(false);
   1179         if (strsrch.isOverlapping()) {
   1180             errln("Error setting overlap false");
   1181         }
   1182 
   1183         strsrch.setCanonical(true);
   1184         if (!strsrch.isCanonical()) {
   1185             errln("Error setting canonical match true");
   1186         }
   1187         strsrch.setCanonical(false);
   1188         if (strsrch.isCanonical()) {
   1189             errln("Error setting canonical match false");
   1190         }
   1191 
   1192         if (strsrch.getElementComparisonType() != STANDARD_ELEMENT_COMPARISON) {
   1193             errln("Error default element comparison type should be STANDARD_ELEMENT_COMPARISON");
   1194         }
   1195         strsrch.setElementComparisonType(ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD);
   1196         if (strsrch.getElementComparisonType() != ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD) {
   1197             errln("Error setting element comparison type PATTERN_BASE_WEIGHT_IS_WILDCARD");
   1198         }
   1199     }
   1200 
   1201     @Test
   1202     public void TestGetSetOffset() {
   1203         String  pattern = "1234567890123456";
   1204         String  text  = "12345678901234567890123456789012";
   1205         StringSearch  strsrch = null;
   1206         try {
   1207             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
   1208         } catch (Exception e) {
   1209             errln("Error opening search");
   1210 
   1211             return;
   1212         }
   1213 
   1214         /* testing out of bounds error */
   1215         try {
   1216             strsrch.setIndex(-1);
   1217             errln("Error expecting set offset error");
   1218         } catch (IndexOutOfBoundsException e) {
   1219             logln("PASS: strsrch.setIndex(-1) failed as expected");
   1220         }
   1221 
   1222         try {
   1223             strsrch.setIndex(128);
   1224             errln("Error expecting set offset error");
   1225         } catch (IndexOutOfBoundsException e) {
   1226             logln("PASS: strsrch.setIndex(128) failed as expected");
   1227         }
   1228 
   1229         for (int index = 0; index < BASIC.length; index++) {
   1230             SearchData  search      = BASIC[index];
   1231 
   1232             text =search.text;
   1233             pattern = search.pattern;
   1234             strsrch.setTarget(new StringCharacterIterator(text));
   1235             strsrch.setPattern(pattern);
   1236             strsrch.getCollator().setStrength(search.strength);
   1237             strsrch.reset();
   1238 
   1239             int count = 0;
   1240             int matchindex  = search.offset[count];
   1241 
   1242             while (matchindex >= 0) {
   1243                 int matchlength = search.size[count];
   1244                 strsrch.next();
   1245                 if (matchindex != strsrch.getMatchStart() ||
   1246                     matchlength != strsrch.getMatchLength()) {
   1247                     errln("Text: " + text);
   1248                     errln("Pattern: " + strsrch.getPattern());
   1249                     errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
   1250                     return;
   1251                 }
   1252                 matchindex = search.offset[count + 1] == -1 ? -1 :
   1253                              search.offset[count + 2];
   1254                 if (search.offset[count + 1] != -1) {
   1255                     strsrch.setIndex(search.offset[count + 1] + 1);
   1256                     if (strsrch.getIndex() != search.offset[count + 1] + 1) {
   1257                         errln("Error setting offset\n");
   1258                         return;
   1259                     }
   1260                 }
   1261 
   1262                 count += 2;
   1263             }
   1264             strsrch.next();
   1265             if (strsrch.getMatchStart() != StringSearch.DONE) {
   1266                 errln("Text: " + text);
   1267                 errln("Pattern: " + strsrch.getPattern());
   1268                 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
   1269                 return;
   1270             }
   1271         }
   1272         strsrch.getCollator().setStrength(TERTIARY);
   1273     }
   1274 
   1275     @Test
   1276     public void TestGetSetOffsetCanonical() {
   1277 
   1278         String  text = "text";
   1279         String  pattern = "pattern";
   1280         StringSearch  strsrch = null;
   1281         try {
   1282             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
   1283         } catch (Exception e) {
   1284             errln("Fail to open StringSearch!");
   1285             return;
   1286         }
   1287         strsrch.setCanonical(true);
   1288         //TODO: setCanonical is not sufficient for canonical match. See #10725
   1289         strsrch.getCollator().setDecomposition(Collator.CANONICAL_DECOMPOSITION);
   1290         /* testing out of bounds error */
   1291         try {
   1292             strsrch.setIndex(-1);
   1293             errln("Error expecting set offset error");
   1294         } catch (IndexOutOfBoundsException e) {
   1295             logln("PASS: strsrch.setIndex(-1) failed as expected");
   1296         }
   1297         try {
   1298             strsrch.setIndex(128);
   1299             errln("Error expecting set offset error");
   1300         } catch (IndexOutOfBoundsException e) {
   1301             logln("PASS: strsrch.setIndex(128) failed as expected");
   1302         }
   1303 
   1304         for (int index = 0; index < BASICCANONICAL.length; index++) {
   1305             SearchData  search      = BASICCANONICAL[index];
   1306             text = search.text;
   1307             pattern = search.pattern;
   1308             strsrch.setTarget(new StringCharacterIterator(text));
   1309             strsrch.setPattern(pattern);
   1310             int         count       = 0;
   1311             int matchindex  = search.offset[count];
   1312             while (matchindex >= 0) {
   1313                 int matchlength = search.size[count];
   1314                 strsrch.next();
   1315                 if (matchindex != strsrch.getMatchStart() ||
   1316                     matchlength != strsrch.getMatchLength()) {
   1317                     errln("Text: " + text);
   1318                     errln("Pattern: " + strsrch.getPattern());
   1319                     errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
   1320                     return;
   1321                 }
   1322                 matchindex = search.offset[count + 1] == -1 ? -1 :
   1323                              search.offset[count + 2];
   1324                 if (search.offset[count + 1] != -1) {
   1325                     strsrch.setIndex(search.offset[count + 1] + 1);
   1326                     if (strsrch.getIndex() != search.offset[count + 1] + 1) {
   1327                         errln("Error setting offset");
   1328                         return;
   1329                     }
   1330                 }
   1331 
   1332                 count += 2;
   1333             }
   1334             strsrch.next();
   1335             if (strsrch.getMatchStart() != StringSearch.DONE) {
   1336                 errln("Text: " + text);
   1337                 errln("Pattern: %s" + strsrch.getPattern());
   1338                 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
   1339                 return;
   1340             }
   1341         }
   1342         strsrch.getCollator().setStrength(TERTIARY);
   1343         strsrch.getCollator().setDecomposition(Collator.NO_DECOMPOSITION);
   1344     }
   1345 
   1346     @Test
   1347     public void TestIgnorable() {
   1348         String rules = IGNORABLERULE;
   1349         int        count  = 0;
   1350         RuleBasedCollator collator = null;
   1351         try {
   1352             collator = new RuleBasedCollator(rules);
   1353             collator.setStrength(IGNORABLE[count].strength);
   1354             collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
   1355         } catch (Exception e) {
   1356             errln("Error opening collator ");
   1357             return;
   1358         }
   1359         String pattern = "pattern";
   1360         String text = "text";
   1361         StringSearch strsrch = null;
   1362         try {
   1363             strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
   1364         } catch (Exception e) {
   1365             errln("Error opening string search ");
   1366             return;
   1367         }
   1368 
   1369         for (; count < IGNORABLE.length; count++) {
   1370             text = IGNORABLE[count].text;
   1371             pattern = IGNORABLE[count].pattern;
   1372             strsrch.setTarget(new StringCharacterIterator(text));
   1373             strsrch.setPattern(pattern);
   1374             if (!assertEqualWithStringSearch(strsrch, IGNORABLE[count])) {
   1375                 errln("Error at test number " + count);
   1376             }
   1377         }
   1378     }
   1379 
   1380     @Test
   1381     public void TestInitialization() {
   1382         String  pattern;
   1383         String  text;
   1384         String  temp = "a";
   1385         StringSearch  result;
   1386 
   1387         /* simple test on the pattern ce construction */
   1388         pattern = temp + temp;
   1389         text = temp + temp + temp;
   1390         try {
   1391             result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
   1392         } catch (Exception e) {
   1393             errln("Error opening search ");
   1394             return;
   1395         }
   1396 
   1397         /* testing if an extremely large pattern will fail the initialization */
   1398         pattern = "";
   1399         for (int count = 0; count < 512; count ++) {
   1400             pattern += temp;
   1401         }
   1402         try {
   1403             result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
   1404             logln("pattern:" + result.getPattern());
   1405         } catch (Exception e) {
   1406             errln("Fail: an extremely large pattern will fail the initialization");
   1407             return;
   1408         }
   1409     }
   1410 
   1411     @Test
   1412     public void TestNormCanonical() {
   1413         m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
   1414         for (int count = 0; count < NORMCANONICAL.length; count++) {
   1415             if (!assertCanonicalEqual(NORMCANONICAL[count])) {
   1416                 errln("Error at test number " + count);
   1417             }
   1418         }
   1419         m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
   1420     }
   1421 
   1422     @Test
   1423     public void TestNormExact() {
   1424         int count;
   1425 
   1426         m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
   1427         for (count = 0; count < BASIC.length; count++) {
   1428             if (!assertEqual(BASIC[count])) {
   1429                 errln("Error at test number " + count);
   1430             }
   1431         }
   1432         for (count = 0; count < NORMEXACT.length; count++) {
   1433             if (!assertEqual(NORMEXACT[count])) {
   1434                 errln("Error at test number " + count);
   1435             }
   1436         }
   1437         m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
   1438         for (count = 0; count < NONNORMEXACT.length; count++) {
   1439             if (!assertEqual(NONNORMEXACT[count])) {
   1440                 errln("Error at test number " + count);
   1441             }
   1442         }
   1443     }
   1444 
   1445     @Test
   1446     public void TestOpenClose() {
   1447         StringSearch            result;
   1448         BreakIterator           breakiter = m_en_wordbreaker_;
   1449         String           pattern = "";
   1450         String           text = "";
   1451         String           temp  = "a";
   1452         StringCharacterIterator  chariter= new StringCharacterIterator(text);
   1453 
   1454         /* testing null arguments */
   1455         try {
   1456             result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
   1457             errln("Error: null arguments should produce an error");
   1458         } catch (Exception e) {
   1459             logln("PASS: null arguments failed as expected");
   1460         }
   1461 
   1462         chariter.setText(text);
   1463         try {
   1464             result = new StringSearch(pattern, chariter, null, null);
   1465             errln("Error: null arguments should produce an error");
   1466         } catch (Exception e) {
   1467             logln("PASS: null arguments failed as expected");
   1468         }
   1469 
   1470         text  = String.valueOf(0x1);
   1471         try {
   1472             result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
   1473             errln("Error: Empty pattern should produce an error");
   1474         } catch (Exception e) {
   1475             logln("PASS: Empty pattern failed as expected");
   1476         }
   1477 
   1478         chariter.setText(text);
   1479         try {
   1480             result = new StringSearch(pattern, chariter, null, null);
   1481             errln("Error: Empty pattern should produce an error");
   1482         } catch (Exception e) {
   1483             logln("PASS: Empty pattern failed as expected");
   1484         }
   1485 
   1486         text = "";
   1487         pattern =temp;
   1488         try {
   1489             result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
   1490             errln("Error: Empty text should produce an error");
   1491         } catch (Exception e) {
   1492             logln("PASS: Empty text failed as expected");
   1493         }
   1494 
   1495         chariter.setText(text);
   1496         try {
   1497             result = new StringSearch(pattern, chariter, null, null);
   1498             errln("Error: Empty text should produce an error");
   1499         } catch (Exception e) {
   1500             logln("PASS: Empty text failed as expected");
   1501         }
   1502 
   1503         text += temp;
   1504         try {
   1505             result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
   1506             errln("Error: null arguments should produce an error");
   1507         } catch (Exception e) {
   1508             logln("PASS: null arguments failed as expected");
   1509         }
   1510 
   1511         chariter.setText(text);
   1512         try {
   1513             result = new StringSearch(pattern, chariter, null, null);
   1514             errln("Error: null arguments should produce an error");
   1515         } catch (Exception e) {
   1516             logln("PASS: null arguments failed as expected");
   1517         }
   1518 
   1519         try {
   1520             result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
   1521         } catch (Exception e) {
   1522             errln("Error: null break iterator is valid for opening search");
   1523         }
   1524 
   1525         try {
   1526             result = new StringSearch(pattern, chariter, m_en_us_, null);
   1527         } catch (Exception e) {
   1528             errln("Error: null break iterator is valid for opening search");
   1529         }
   1530 
   1531         try {
   1532             result = new StringSearch(pattern, new StringCharacterIterator(text), Locale.ENGLISH);
   1533         } catch (Exception e) {
   1534             errln("Error: null break iterator is valid for opening search");
   1535         }
   1536 
   1537         try {
   1538             result = new StringSearch(pattern, chariter, Locale.ENGLISH);
   1539         } catch (Exception e) {
   1540             errln("Error: null break iterator is valid for opening search");
   1541         }
   1542 
   1543         try {
   1544             result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, breakiter);
   1545         } catch (Exception e) {
   1546             errln("Error: Break iterator is valid for opening search");
   1547         }
   1548 
   1549         try {
   1550             result = new StringSearch(pattern, chariter, m_en_us_, null);
   1551             logln("pattern:" + result.getPattern());
   1552         } catch (Exception e) {
   1553             errln("Error: Break iterator is valid for opening search");
   1554         }
   1555     }
   1556 
   1557     @Test
   1558     public void TestOverlap() {
   1559         int count;
   1560 
   1561         for (count = 0; count < OVERLAP.length; count++) {
   1562             if (!assertEqualWithAttribute(OVERLAP[count], false, true)) {
   1563                 errln("Error at overlap test number " + count);
   1564             }
   1565         }
   1566 
   1567         for (count = 0; count < NONOVERLAP.length; count++) {
   1568             if (!assertEqual(NONOVERLAP[count])) {
   1569                 errln("Error at non overlap test number " + count);
   1570             }
   1571         }
   1572 
   1573         for (count = 0; count < OVERLAP.length && count < NONOVERLAP.length; count++) {
   1574             SearchData search = (OVERLAP[count]);
   1575             String text = search.text;
   1576             String pattern = search.pattern;
   1577 
   1578             RuleBasedCollator collator = getCollator(search.collator);
   1579             StringSearch strsrch = null;
   1580             try {
   1581                 strsrch  = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
   1582             } catch (Exception e) {
   1583                 errln("error open StringSearch");
   1584                 return;
   1585             }
   1586 
   1587             strsrch.setOverlapping(true);
   1588             if (!strsrch.isOverlapping()) {
   1589                 errln("Error setting overlap option");
   1590             }
   1591             if (!assertEqualWithStringSearch(strsrch, search)) {
   1592                 return;
   1593             }
   1594 
   1595             search = NONOVERLAP[count];
   1596             strsrch.setOverlapping(false);
   1597             if (strsrch.isOverlapping()) {
   1598                 errln("Error setting overlap option");
   1599             }
   1600             strsrch.reset();
   1601             if (!assertEqualWithStringSearch(strsrch, search)) {
   1602                 errln("Error at test number " + count);
   1603              }
   1604         }
   1605     }
   1606 
   1607     @Test
   1608     public void TestOverlapCanonical() {
   1609         int count;
   1610 
   1611         for (count = 0; count < OVERLAPCANONICAL.length; count++) {
   1612             if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], true, true)) {
   1613                 errln("Error at overlap test number %d" + count);
   1614             }
   1615         }
   1616 
   1617         for (count = 0; count < NONOVERLAP.length; count++) {
   1618             if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) {
   1619                 errln("Error at non overlap test number %d" + count);
   1620             }
   1621         }
   1622 
   1623         for (count = 0; count < OVERLAPCANONICAL.length && count < NONOVERLAPCANONICAL.length; count++) {
   1624             SearchData search = OVERLAPCANONICAL[count];
   1625             RuleBasedCollator collator = getCollator(search.collator);
   1626             StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), collator, null);
   1627             strsrch.setCanonical(true);
   1628             strsrch.setOverlapping(true);
   1629             if (strsrch.isOverlapping() != true) {
   1630                 errln("Error setting overlap option");
   1631             }
   1632             if (!assertEqualWithStringSearch(strsrch, search)) {
   1633                 strsrch = null;
   1634                 return;
   1635             }
   1636             search = NONOVERLAPCANONICAL[count];
   1637             strsrch.setOverlapping(false);
   1638             if (strsrch.isOverlapping() != false) {
   1639                 errln("Error setting overlap option");
   1640             }
   1641             strsrch.reset();
   1642             if (!assertEqualWithStringSearch(strsrch, search)) {
   1643                 strsrch = null;
   1644                 errln("Error at test number %d" + count);
   1645              }
   1646         }
   1647     }
   1648 
   1649     @Test
   1650     public void TestPattern() {
   1651         m_en_us_.setStrength(PATTERN[0].strength);
   1652         StringSearch strsrch = new StringSearch(PATTERN[0].pattern, new StringCharacterIterator(PATTERN[0].text), m_en_us_, null);
   1653 
   1654         if (strsrch.getPattern() != PATTERN[0].pattern) {
   1655             errln("Error setting pattern");
   1656         }
   1657         if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
   1658             m_en_us_.setStrength(TERTIARY);
   1659             if (strsrch != null) {
   1660                 strsrch = null;
   1661             }
   1662             return;
   1663         }
   1664 
   1665         strsrch.setPattern(PATTERN[1].pattern);
   1666         if (PATTERN[1].pattern != strsrch.getPattern()) {
   1667             errln("Error setting pattern");
   1668             m_en_us_.setStrength(TERTIARY);
   1669             if (strsrch != null) {
   1670                 strsrch = null;
   1671             }
   1672             return;
   1673         }
   1674         strsrch.reset();
   1675 
   1676         if (!assertEqualWithStringSearch(strsrch, PATTERN[1])) {
   1677             m_en_us_.setStrength(TERTIARY);
   1678             if (strsrch != null) {
   1679                 strsrch = null;
   1680             }
   1681             return;
   1682         }
   1683 
   1684         strsrch.setPattern(PATTERN[0].pattern);
   1685         if (PATTERN[0].pattern != strsrch.getPattern()) {
   1686             errln("Error setting pattern");
   1687             m_en_us_.setStrength(TERTIARY);
   1688             if (strsrch != null) {
   1689                 strsrch = null;
   1690             }
   1691             return;
   1692         }
   1693             strsrch.reset();
   1694 
   1695         if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
   1696             m_en_us_.setStrength(TERTIARY);
   1697             if (strsrch != null) {
   1698                 strsrch = null;
   1699             }
   1700             return;
   1701         }
   1702         /* enormous pattern size to see if this crashes */
   1703         String pattern = "";
   1704         for (int templength = 0; templength != 512; templength ++) {
   1705             pattern += 0x61;
   1706         }
   1707         try{
   1708             strsrch.setPattern(pattern);
   1709         }catch(Exception e) {
   1710             errln("Error setting pattern with size 512");
   1711         }
   1712 
   1713         m_en_us_.setStrength(TERTIARY);
   1714         if (strsrch != null) {
   1715             strsrch = null;
   1716         }
   1717     }
   1718 
   1719     @Test
   1720     public void TestPatternCanonical() {
   1721         //StringCharacterIterator text = new StringCharacterIterator(PATTERNCANONICAL[0].text);
   1722         m_en_us_.setStrength(PATTERNCANONICAL[0].strength);
   1723         StringSearch strsrch = new StringSearch(PATTERNCANONICAL[0].pattern, new StringCharacterIterator(PATTERNCANONICAL[0].text),
   1724                                                 m_en_us_, null);
   1725         strsrch.setCanonical(true);
   1726 
   1727         if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
   1728             errln("Error setting pattern");
   1729         }
   1730         if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
   1731             m_en_us_.setStrength(TERTIARY);
   1732             strsrch = null;
   1733             return;
   1734         }
   1735 
   1736         strsrch.setPattern(PATTERNCANONICAL[1].pattern);
   1737         if (PATTERNCANONICAL[1].pattern != strsrch.getPattern()) {
   1738             errln("Error setting pattern");
   1739             m_en_us_.setStrength(TERTIARY);
   1740             strsrch = null;
   1741             return;
   1742         }
   1743         strsrch.reset();
   1744         strsrch.setCanonical(true);
   1745 
   1746         if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[1])) {
   1747             m_en_us_.setStrength(TERTIARY);
   1748             strsrch = null;
   1749             return;
   1750         }
   1751 
   1752         strsrch.setPattern(PATTERNCANONICAL[0].pattern);
   1753         if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
   1754             errln("Error setting pattern");
   1755             m_en_us_.setStrength(TERTIARY);
   1756             strsrch = null;
   1757             return;
   1758         }
   1759 
   1760         strsrch.reset();
   1761         strsrch.setCanonical(true);
   1762         if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
   1763             m_en_us_.setStrength(TERTIARY);
   1764             strsrch = null;
   1765             return;
   1766         }
   1767     }
   1768 
   1769     @Test
   1770     public void TestReset() {
   1771         StringCharacterIterator text = new StringCharacterIterator("fish fish");
   1772         String pattern = "s";
   1773 
   1774         StringSearch  strsrch = new StringSearch(pattern, text, m_en_us_, null);
   1775         strsrch.setOverlapping(true);
   1776         strsrch.setCanonical(true);
   1777         strsrch.setIndex(9);
   1778         strsrch.reset();
   1779         if (strsrch.isCanonical() || strsrch.isOverlapping() ||
   1780             strsrch.getIndex() != 0 || strsrch.getMatchLength() != 0 ||
   1781             strsrch.getMatchStart() != SearchIterator.DONE) {
   1782                 errln("Error resetting string search");
   1783         }
   1784 
   1785         strsrch.previous();
   1786         if (strsrch.getMatchStart() != 7 || strsrch.getMatchLength() != 1) {
   1787             errln("Error resetting string search\n");
   1788         }
   1789     }
   1790 
   1791     @Test
   1792     public void TestSetMatch() {
   1793         for (int count = 0; count < MATCH.length; count++) {
   1794             SearchData     search = MATCH[count];
   1795             StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text),
   1796                                                     m_en_us_, null);
   1797 
   1798             int size = 0;
   1799             while (search.offset[size] != -1) {
   1800                 size ++;
   1801             }
   1802 
   1803             if (strsrch.first() != search.offset[0]) {
   1804                 errln("Error getting first match");
   1805             }
   1806             if (strsrch.last() != search.offset[size -1]) {
   1807                 errln("Error getting last match");
   1808             }
   1809 
   1810             int index = 0;
   1811             while (index < size) {
   1812                 if (index + 2 < size) {
   1813                     if (strsrch.following(search.offset[index + 2] - 1) != search.offset[index + 2]) {
   1814                         errln("Error getting following match at index " + (search.offset[index + 2]-1));
   1815                     }
   1816                 }
   1817                 if (index + 1 < size) {
   1818                     if (strsrch.preceding(search.offset[index + 1] + search.size[index + 1] + 1) != search.offset[index + 1]) {
   1819                         errln("Error getting preceeding match at index " + (search.offset[index + 1] + 1));
   1820                     }
   1821                 }
   1822                 index += 2;
   1823             }
   1824 
   1825             if (strsrch.following(search.text.length()) != SearchIterator.DONE) {
   1826                 errln("Error expecting out of bounds match");
   1827             }
   1828             if (strsrch.preceding(0) != SearchIterator.DONE) {
   1829                 errln("Error expecting out of bounds match");
   1830             }
   1831         }
   1832     }
   1833 
   1834     @Test
   1835     public void TestStrength() {
   1836         for (int count = 0; count < STRENGTH.length; count++) {
   1837             if (!assertEqual(STRENGTH[count])) {
   1838                 errln("Error at test number " + count);
   1839             }
   1840         }
   1841     }
   1842 
   1843     @Test
   1844     public void TestStrengthCanonical() {
   1845         for (int count = 0; count < STRENGTHCANONICAL.length; count++) {
   1846             if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) {
   1847                 errln("Error at test number" + count);
   1848             }
   1849         }
   1850     }
   1851 
   1852     @Test
   1853     public void TestSupplementary() {
   1854         for (int count = 0; count < SUPPLEMENTARY.length; count++) {
   1855             if (!assertEqual(SUPPLEMENTARY[count])) {
   1856                 errln("Error at test number " + count);
   1857             }
   1858         }
   1859     }
   1860 
   1861     @Test
   1862     public void TestSupplementaryCanonical() {
   1863         for (int count = 0; count < SUPPLEMENTARYCANONICAL.length; count++) {
   1864             if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) {
   1865                 errln("Error at test number" + count);
   1866             }
   1867         }
   1868     }
   1869 
   1870     @Test
   1871     public void TestText() {
   1872         SearchData TEXT[] = {
   1873             SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)),
   1874             SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3))
   1875         };
   1876         StringCharacterIterator t = new StringCharacterIterator(TEXT[0].text);
   1877         StringSearch strsrch = new StringSearch(TEXT[0].pattern, t, m_en_us_, null);
   1878 
   1879         if (!t.equals(strsrch.getTarget())) {
   1880             errln("Error setting text");
   1881         }
   1882         if (!assertEqualWithStringSearch(strsrch, TEXT[0])) {
   1883             errln("Error at assertEqualWithStringSearch");
   1884             return;
   1885         }
   1886 
   1887         t = new StringCharacterIterator(TEXT[1].text);
   1888         strsrch.setTarget(t);
   1889         if (!t.equals(strsrch.getTarget())) {
   1890             errln("Error setting text");
   1891             return;
   1892         }
   1893 
   1894         if (!assertEqualWithStringSearch(strsrch, TEXT[1])) {
   1895             errln("Error at assertEqualWithStringSearch");
   1896             return;
   1897         }
   1898     }
   1899 
   1900     @Test
   1901     public void TestTextCanonical() {
   1902         StringCharacterIterator t = new StringCharacterIterator(TEXTCANONICAL[0].text);
   1903         StringSearch strsrch = new StringSearch(TEXTCANONICAL[0].pattern, t, m_en_us_, null);
   1904         strsrch.setCanonical(true);
   1905 
   1906         if (!t.equals(strsrch.getTarget())) {
   1907             errln("Error setting text");
   1908         }
   1909         if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) {
   1910             strsrch = null;
   1911             return;
   1912         }
   1913 
   1914         t = new StringCharacterIterator(TEXTCANONICAL[1].text);
   1915         strsrch.setTarget(t);
   1916         if (!t.equals(strsrch.getTarget())) {
   1917             errln("Error setting text");
   1918             strsrch = null;
   1919             return;
   1920         }
   1921 
   1922         if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[1])) {
   1923             strsrch = null;
   1924             return;
   1925         }
   1926 
   1927         t = new StringCharacterIterator(TEXTCANONICAL[0].text);
   1928         strsrch.setTarget(t);
   1929         if (!t.equals(strsrch.getTarget())) {
   1930             errln("Error setting text");
   1931             strsrch = null;
   1932             return;
   1933         }
   1934 
   1935         if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) {
   1936             errln("Error at assertEqualWithStringSearch");
   1937             strsrch = null;
   1938             return;
   1939         }
   1940     }
   1941 
   1942     @Test
   1943     public void TestVariable() {
   1944         m_en_us_.setAlternateHandlingShifted(true);
   1945         for (int count = 0; count < VARIABLE.length; count++) {
   1946             // logln("variable" + count);
   1947             if (!assertEqual(VARIABLE[count])) {
   1948                 errln("Error at test number " + count);
   1949             }
   1950         }
   1951         m_en_us_.setAlternateHandlingShifted(false);
   1952     }
   1953 
   1954     @Test
   1955     public void TestVariableCanonical() {
   1956         m_en_us_.setAlternateHandlingShifted(true);
   1957         for (int count = 0; count < VARIABLE.length; count++) {
   1958             // logln("variable " + count);
   1959             if (!assertCanonicalEqual(VARIABLE[count])) {
   1960                 errln("Error at test number " + count);
   1961             }
   1962         }
   1963         m_en_us_.setAlternateHandlingShifted(false);
   1964     }
   1965 
   1966     @Test
   1967     public void TestSubClass()
   1968     {
   1969         class TestSearch extends SearchIterator
   1970         {
   1971             String pattern;
   1972             String text;
   1973 
   1974             TestSearch(StringCharacterIterator target, BreakIterator breaker,
   1975                        String pattern)
   1976             {
   1977                 super(target, breaker);
   1978                 this.pattern = pattern;
   1979                 StringBuffer buffer = new StringBuffer();
   1980                 while (targetText.getIndex() != targetText.getEndIndex()) {
   1981                     buffer.append(targetText.current());
   1982                     targetText.next();
   1983                 }
   1984                 text = buffer.toString();
   1985                 targetText.setIndex(targetText.getBeginIndex());
   1986             }
   1987             @Override
   1988             protected int handleNext(int start)
   1989             {
   1990                 int match = text.indexOf(pattern, start);
   1991                 if (match < 0) {
   1992                     targetText.last();
   1993                     return DONE;
   1994                 }
   1995                 targetText.setIndex(match);
   1996                 setMatchLength(pattern.length());
   1997                 return match;
   1998             }
   1999             @Override
   2000             protected int handlePrevious(int start)
   2001             {
   2002                 int match = text.lastIndexOf(pattern, start - 1);
   2003                 if (match < 0) {
   2004                     targetText.setIndex(0);
   2005                     return DONE;
   2006                 }
   2007                 targetText.setIndex(match);
   2008                 setMatchLength(pattern.length());
   2009                 return match;
   2010             }
   2011 
   2012             @Override
   2013             public int getIndex()
   2014             {
   2015                 int result = targetText.getIndex();
   2016                 if (result < 0 || result >= text.length()) {
   2017                     return DONE;
   2018                 }
   2019                 return result;
   2020             }
   2021         }
   2022 
   2023         TestSearch search = new TestSearch(
   2024                             new StringCharacterIterator("abc abcd abc"),
   2025                             null, "abc");
   2026         int expected[] = {0, 4, 9};
   2027         for (int i = 0; i < expected.length; i ++) {
   2028             if (search.next() != expected[i]) {
   2029                 errln("Error getting next match");
   2030             }
   2031             if (search.getMatchLength() != search.pattern.length()) {
   2032                 errln("Error getting next match length");
   2033             }
   2034         }
   2035         if (search.next() != SearchIterator.DONE) {
   2036             errln("Error should have reached the end of the iteration");
   2037         }
   2038         for (int i = expected.length - 1; i >= 0; i --) {
   2039             if (search.previous() != expected[i]) {
   2040                 errln("Error getting next match");
   2041             }
   2042             if (search.getMatchLength() != search.pattern.length()) {
   2043                 errln("Error getting next match length");
   2044             }
   2045         }
   2046         if (search.previous() != SearchIterator.DONE) {
   2047             errln("Error should have reached the start of the iteration");
   2048         }
   2049     }
   2050 
   2051     //Test for ticket 5024
   2052     @Test
   2053     public void TestDiactricMatch() {
   2054         String pattern = "pattern";
   2055         String text = "text";
   2056         StringSearch strsrch = null;
   2057         try {
   2058             strsrch = new StringSearch(pattern, text);
   2059         } catch (Exception e) {
   2060             errln("Error opening string search ");
   2061             return;
   2062         }
   2063 
   2064         for (int count = 0; count < DIACTRICMATCH.length; count++) {
   2065             strsrch.setCollator(getCollator(DIACTRICMATCH[count].collator));
   2066             strsrch.getCollator().setStrength(DIACTRICMATCH[count].strength);
   2067             strsrch.setBreakIterator(getBreakIterator(DIACTRICMATCH[count].breaker));
   2068             strsrch.reset();
   2069             text = DIACTRICMATCH[count].text;
   2070             pattern = DIACTRICMATCH[count].pattern;
   2071             strsrch.setTarget(new StringCharacterIterator(text));
   2072             strsrch.setPattern(pattern);
   2073             if (!assertEqualWithStringSearch(strsrch, DIACTRICMATCH[count])) {
   2074                 errln("Error at test number " + count);
   2075             }
   2076         }
   2077     }
   2078 
   2079     @Test
   2080     public void TestUsingSearchCollator() {
   2081         String scKoText =
   2082             " " +
   2083     /*01*/  "\uAC00 " +                   // simple LV Hangul
   2084     /*03*/  "\uAC01 " +                   // simple LVT Hangul
   2085     /*05*/  "\uAC0F " +                   // LVTT, last jamo expands for search
   2086     /*07*/  "\uAFFF " +                   // LLVVVTT, every jamo expands for search
   2087     /*09*/  "\u1100\u1161\u11A8 " +       // 0xAC01 as conjoining jamo
   2088     /*13*/  "\u1100\u1161\u1100 " +       // 0xAC01 as basic conjoining jamo (per search rules)
   2089     /*17*/  "\u3131\u314F\u3131 " +       // 0xAC01 as compatibility jamo
   2090     /*21*/  "\u1100\u1161\u11B6 " +       // 0xAC0F as conjoining jamo; last expands for search
   2091     /*25*/  "\u1100\u1161\u1105\u1112 " + // 0xAC0F as basic conjoining jamo; last expands for search
   2092     /*30*/  "\u1101\u1170\u11B6 " +       // 0xAFFF as conjoining jamo; all expand for search
   2093     /*34*/  "\u00E6 " +                   // small letter ae, expands
   2094     /*36*/  "\u1E4D " +                   // small letter o with tilde and acute, decomposes
   2095             "";
   2096 
   2097         String scKoPat0 = "\uAC01";
   2098         String scKoPat1 = "\u1100\u1161\u11A8"; // 0xAC01 as conjoining jamo
   2099         String scKoPat2 = "\uAC0F";
   2100         String scKoPat3 = "\u1100\u1161\u1105\u1112"; // 0xAC0F as basic conjoining jamo
   2101         String scKoPat4 = "\uAFFF";
   2102         String scKoPat5 = "\u1101\u1170\u11B6"; // 0xAFFF as conjoining jamo
   2103 
   2104         int[] scKoSrchOff01 = { 3,  9, 13 };
   2105         int[] scKoSrchOff23 = { 5, 21, 25 };
   2106         int[] scKoSrchOff45 = { 7, 30     };
   2107 
   2108         int[] scKoStndOff01 = { 3,  9 };
   2109         int[] scKoStndOff2  = { 5, 21 };
   2110         int[] scKoStndOff3  = { 25    };
   2111         int[] scKoStndOff45 = { 7, 30 };
   2112 
   2113         class PatternAndOffsets {
   2114             private String pattern;
   2115             private int[] offsets;
   2116             PatternAndOffsets(String pat, int[] offs) {
   2117                 pattern = pat;
   2118                 offsets = offs;
   2119             }
   2120             public String getPattern() { return pattern; }
   2121             public int[] getOffsets() { return offsets; }
   2122         }
   2123         final PatternAndOffsets[] scKoSrchPatternsOffsets = {
   2124             new PatternAndOffsets( scKoPat0, scKoSrchOff01 ),
   2125             new PatternAndOffsets( scKoPat1, scKoSrchOff01 ),
   2126             new PatternAndOffsets( scKoPat2, scKoSrchOff23 ),
   2127             new PatternAndOffsets( scKoPat3, scKoSrchOff23 ),
   2128             new PatternAndOffsets( scKoPat4, scKoSrchOff45 ),
   2129             new PatternAndOffsets( scKoPat5, scKoSrchOff45 ),
   2130         };
   2131         final PatternAndOffsets[] scKoStndPatternsOffsets = {
   2132             new PatternAndOffsets( scKoPat0, scKoStndOff01 ),
   2133             new PatternAndOffsets( scKoPat1, scKoStndOff01 ),
   2134             new PatternAndOffsets( scKoPat2, scKoStndOff2  ),
   2135             new PatternAndOffsets( scKoPat3, scKoStndOff3  ),
   2136             new PatternAndOffsets( scKoPat4, scKoStndOff45 ),
   2137             new PatternAndOffsets( scKoPat5, scKoStndOff45 ),
   2138         };
   2139 
   2140         class TUSCItem {
   2141             private String localeString;
   2142             private String text;
   2143             private PatternAndOffsets[] patternsAndOffsets;
   2144             TUSCItem(String locStr, String txt, PatternAndOffsets[] patsAndOffs) {
   2145                 localeString = locStr;
   2146                 text = txt;
   2147                 patternsAndOffsets = patsAndOffs;
   2148             }
   2149             public String getLocaleString() { return localeString; }
   2150             public String getText() { return text; }
   2151             public PatternAndOffsets[] getPatternsAndOffsets() { return patternsAndOffsets; }
   2152         }
   2153         final TUSCItem[] tuscItems = {
   2154             new TUSCItem( "root",                  scKoText, scKoStndPatternsOffsets ),
   2155             new TUSCItem( "root@collation=search", scKoText, scKoSrchPatternsOffsets ),
   2156             new TUSCItem( "ko@collation=search",   scKoText, scKoSrchPatternsOffsets ),
   2157         };
   2158 
   2159         String dummyPat = "a";
   2160 
   2161         for (TUSCItem tuscItem: tuscItems) {
   2162             String localeString = tuscItem.getLocaleString();
   2163             ULocale uloc = new ULocale(localeString);
   2164             RuleBasedCollator col = null;
   2165             try {
   2166                 col = (RuleBasedCollator)Collator.getInstance(uloc);
   2167             } catch (Exception e) {
   2168                 errln("Error: in locale " + localeString + ", err in Collator.getInstance");
   2169                 continue;
   2170             }
   2171             StringCharacterIterator ci = new StringCharacterIterator(tuscItem.getText());
   2172             StringSearch srch = new StringSearch(dummyPat, ci, col);
   2173             for ( PatternAndOffsets patternAndOffsets: tuscItem.getPatternsAndOffsets() ) {
   2174                 srch.setPattern(patternAndOffsets.getPattern());
   2175                 int[] offsets = patternAndOffsets.getOffsets();
   2176                 int ioff, noff = offsets.length;
   2177                 int offset;
   2178 
   2179                 srch.reset();
   2180                 ioff = 0;
   2181                 while (true) {
   2182                     offset = srch.next();
   2183                     if (offset == SearchIterator.DONE) {
   2184                         break;
   2185                     }
   2186                     if ( ioff < noff ) {
   2187                         if ( offset != offsets[ioff] ) {
   2188                             errln("Error: in locale " + localeString + ", expected SearchIterator.next() " + offsets[ioff] + ", got " + offset);
   2189                             //ioff = noff;
   2190                             //break;
   2191                         }
   2192                         ioff++;
   2193                     } else {
   2194                         errln("Error: in locale " + localeString + ", SearchIterator.next() returned more matches than expected");
   2195                     }
   2196                 }
   2197                 if ( ioff < noff ) {
   2198                     errln("Error: in locale " + localeString + ", SearchIterator.next() returned fewer matches than expected");
   2199                 }
   2200 
   2201                 srch.reset();
   2202                 ioff = noff;
   2203                 while (true) {
   2204                     offset = srch.previous();
   2205                     if (offset == SearchIterator.DONE) {
   2206                         break;
   2207                     }
   2208                     if ( ioff > 0 ) {
   2209                         ioff--;
   2210                         if ( offset != offsets[ioff] ) {
   2211                              errln("Error: in locale " + localeString + ", expected SearchIterator.previous() " + offsets[ioff] + ", got " + offset);
   2212                             //ioff = 0;
   2213                             // break;
   2214                         }
   2215                     } else {
   2216                         errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned more matches than expected");
   2217                     }
   2218                 }
   2219                 if ( ioff > 0 ) {
   2220                     errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned fewer matches than expected");
   2221                 }
   2222             }
   2223         }
   2224     }
   2225 
   2226     @Test
   2227     public void TestIndicPrefixMatch() {
   2228         for (int count = 0; count < INDICPREFIXMATCH.length; count++) {
   2229             if (!assertEqual(INDICPREFIXMATCH[count])) {
   2230                 errln("Error at test number" + count);
   2231             }
   2232         }
   2233     }
   2234 
   2235 
   2236     // Test case for ticket#12555
   2237     @Test
   2238     public void TestLongPattern() {
   2239         StringBuilder pattern = new StringBuilder();
   2240         for (int i = 0; i < 255; i++) {
   2241             pattern.append('a');
   2242         }
   2243         // appends a character producing multiple ce32 at
   2244         // index 256.
   2245         pattern.append('');
   2246 
   2247         CharacterIterator target = new StringCharacterIterator("not important");
   2248         try {
   2249             StringSearch ss = new StringSearch(pattern.toString(), target, Locale.ENGLISH);
   2250             assertNotNull("Non-null StringSearch instance", ss);
   2251         } catch (Exception e) {
   2252             errln("Error initializing a new StringSearch object");
   2253         }
   2254     }
   2255 }
   2256