1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2000-2015, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 11 /** 12 * Port From: ICU4C v2.1 : collate/StringSearchTest 13 * Source File: $ICU4CRoot/source/test/intltest/srchtest.cpp 14 **/ 15 16 package android.icu.dev.test.search; 17 18 import static android.icu.text.Collator.IDENTICAL; 19 import static android.icu.text.Collator.PRIMARY; 20 import static android.icu.text.Collator.QUATERNARY; 21 import static android.icu.text.Collator.SECONDARY; 22 import static android.icu.text.Collator.TERTIARY; 23 import static android.icu.text.SearchIterator.ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD; 24 import static android.icu.text.SearchIterator.ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD; 25 import static android.icu.text.SearchIterator.ElementComparisonType.STANDARD_ELEMENT_COMPARISON; 26 27 import java.text.CharacterIterator; 28 import java.text.StringCharacterIterator; 29 import java.util.Locale; 30 31 import org.junit.Before; 32 import org.junit.Test; 33 import org.junit.runner.RunWith; 34 import org.junit.runners.JUnit4; 35 36 import android.icu.dev.test.TestFmwk; 37 import android.icu.text.BreakIterator; 38 import android.icu.text.Collator; 39 import android.icu.text.RuleBasedCollator; 40 import android.icu.text.SearchIterator; 41 import android.icu.text.SearchIterator.ElementComparisonType; 42 import android.icu.text.StringSearch; 43 import android.icu.util.ULocale; 44 import android.icu.testsharding.MainTestShard; 45 46 @MainTestShard 47 @RunWith(JUnit4.class) 48 public class SearchTest extends TestFmwk { 49 50 //inner class 51 static class SearchData { 52 SearchData(String text, String pattern, 53 String coll, int strength, ElementComparisonType cmpType, String breaker, 54 int[] offset, int[] size) { 55 this.text = text; 56 this.pattern = pattern; 57 this.collator = coll; 58 this.strength = strength; 59 this.cmpType = cmpType; 60 this.breaker = breaker; 61 this.offset = offset; 62 this.size = size; 63 } 64 String text; 65 String pattern; 66 String collator; 67 int strength; 68 ElementComparisonType cmpType; 69 String breaker; 70 int[] offset; 71 int[] size; 72 } 73 74 RuleBasedCollator m_en_us_; 75 RuleBasedCollator m_fr_fr_; 76 RuleBasedCollator m_de_; 77 RuleBasedCollator m_es_; 78 BreakIterator m_en_wordbreaker_; 79 BreakIterator m_en_characterbreaker_; 80 81 // Just calling SearchData constructor, to make the test data source code 82 // nice and short 83 private static SearchData SD(String text, String pattern, String coll, int strength, 84 ElementComparisonType cmpType, String breaker, int[] offset, int[] size) { 85 return new SearchData(text, pattern, coll, strength, cmpType, breaker, offset, size); 86 } 87 88 // Just returning int[], to make the test data nice and short 89 private static int[] IA(int... elements) { 90 return elements; 91 } 92 93 static SearchData[] BASIC = { 94 SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 95 SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)), 96 SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)), 97 SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)), 98 SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)), 99 SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), 100 SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)), 101 SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 102 SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 103 SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 104 SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 105 SD("\u00c9", "e", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 106 }; 107 108 SearchData BREAKITERATOREXACT[] = { 109 SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)), 110 SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)), 111 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)), 112 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)), 113 SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)), 114 /* jitterbug 1745 */ 115 SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)), 116 SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)), 117 SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)), 118 }; 119 120 SearchData BREAKITERATORCANONICAL[] = { 121 SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)), 122 SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)), 123 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)), 124 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)), 125 SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)), 126 /* jitterbug 1745 */ 127 SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)), 128 SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)), 129 SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)), 130 }; 131 132 SearchData BASICCANONICAL[] = { 133 SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 134 SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)), 135 SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)), 136 SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)), 137 SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)), 138 SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), 139 SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)), 140 141 SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 142 SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 143 SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 144 SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 145 SD("a\u0300\u0325b", "\u0300b", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 146 SD("\u0325\u0300A\u0325\u0300", "\u0300A\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 147 SD("\u0325\u0300A\u0325\u0300", "\u0325A\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 148 SD("a\u0300\u0325b\u0300\u0325c \u0325b\u0300 \u0300b\u0325", "\u0300b\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 149 150 SD("\u00c4\u0323", "A\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)), 151 SD("\u0308\u0323", "\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)), 152 }; 153 154 SearchData COLLATOR[] = { 155 /* english */ 156 SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), 157 /* tailored */ 158 SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)), 159 }; 160 161 String TESTCOLLATORRULE = "& o,O ; p,P"; 162 String EXTRACOLLATIONRULE = " & ae ; \u00e4 & AE ; \u00c4 & oe ; \u00f6 & OE ; \u00d6 & ue ; \u00fc & UE ; \u00dc"; 163 164 SearchData COLLATORCANONICAL[] = { 165 /* english */ 166 SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), 167 /* tailored */ 168 SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)), 169 }; 170 171 SearchData COMPOSITEBOUNDARIES[] = { 172 SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 173 SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 174 SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), 175 SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 176 SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 177 SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 178 179 /* first one matches only because it's at the start of the text */ 180 SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 181 182 /* \\u0300 blocked by \\u0300 */ 183 SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 184 185 /* A + 030A + 0301 */ 186 SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 187 SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 188 189 SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 190 SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 191 192 SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 193 194 SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 195 196 /* blocked accent */ 197 SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 198 SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 199 200 SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 201 SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 202 SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 203 204 SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 205 206 SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 207 SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 208 209 SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 210 211 SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 212 SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 213 SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)), 214 }; 215 216 SearchData COMPOSITEBOUNDARIESCANONICAL[] = { 217 SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 218 SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 219 SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), 220 SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 221 SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 222 SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 223 224 /* first one matches only because it's at the start of the text */ 225 SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 226 227 /* \u0300 blocked by \u0300 */ 228 SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 229 230 /* A + 030A + 0301 */ 231 SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 232 SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 233 234 SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 235 SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 236 237 SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 238 239 SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 240 241 /* blocked accent */ 242 SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 243 SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 244 245 SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 246 SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 247 SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 248 249 SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 250 251 SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 252 SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 253 254 SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 255 256 SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 257 SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 258 259 SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)), 260 }; 261 262 SearchData SUPPLEMENTARY[] = { 263 SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00", 264 "\uD800\uDC00", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)), 265 SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)), 266 SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 267 SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 268 SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 269 SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 270 }; 271 272 String CONTRACTIONRULE = "&z = ab/c < AB < X\u0300 < ABC < X\u0300\u0315"; 273 274 SearchData CONTRACTION[] = { 275 /* common discontiguous */ 276 SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 277 278 SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 279 280 /* contraction prefix */ 281 SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 282 283 SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 284 SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 285 286 /* 287 * discontiguous problem here for backwards iteration. accents not found because discontiguous stores all 288 * information 289 */ 290 SD("X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 291 /* ends not with a contraction character */ 292 SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 293 SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), 294 SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 295 /* blocked discontiguous */ 296 SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 297 298 /* 299 * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the 300 * match fails because it ends in the middle of an expansion... 301 */ 302 SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 303 }; 304 305 SearchData CONTRACTIONCANONICAL[] = { 306 /* common discontiguous */ 307 SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 308 SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 309 310 /* contraction prefix */ 311 SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 312 313 SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 314 SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 315 316 /* 317 * discontiguous problem here for backwards iteration. forwards gives 0, 4 but backwards give 1, 3 318 */ 319 /* 320 * {"X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, {0, -1), {4}), 321 */ 322 323 /* ends not with a contraction character */ 324 SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 325 SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), 326 327 SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 328 329 /* blocked discontiguous */ 330 SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 331 332 /* 333 * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the 334 * match fails because it ends in the middle of an expansion... 335 */ 336 SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(2)), 337 }; 338 339 SearchData MATCH[] = { 340 SD("a busy bee is a very busy beeee", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, -1), IA(3, 3)), 341 /* 012345678901234567890123456789012345678901234567890 */ 342 SD("a busy bee is a very busy beeee with no bee life", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, 40, -1), IA(3, 3, 3)), 343 }; 344 345 String IGNORABLERULE = "&a = \u0300"; 346 347 SearchData IGNORABLE[] = { 348 /* 349 * This isn't much of a test when matches have to be on grapheme boundiaries. The match at 0 only works because it's 350 * at the start of the text. 351 */ 352 SD("\u0300\u0315 \u0300\u0315 ", "\u0300", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)), 353 }; 354 355 SearchData DIACTRICMATCH[] = { 356 SD("\u0061\u0061\u00E1", "\u0061\u00E1", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(2)), 357 SD("\u0020\u00C2\u0303\u0020\u0041\u0061\u1EAA\u0041\u0302\u0303\u00C2\u0303\u1EAB\u0061\u0302\u0303\u00E2\u0303\uD806\uDC01\u0300\u0020", "\u00C2\u0303", 358 null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 4, 5, 6, 7, 10, 12, 13, 16, -1), IA(2, 1, 1, 1, 3, 2, 1, 3, 2)), 359 SD("\u03BA\u03B1\u03B9\u0300\u0020\u03BA\u03B1\u1F76", "\u03BA\u03B1\u03B9", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 5, -1), IA(4, 3)), 360 }; 361 362 SearchData NORMCANONICAL[] = { 363 SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 364 SD("\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 365 SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 366 SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 367 SD("a\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 368 SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 369 }; 370 371 SearchData NORMEXACT[] = { 372 SD("a\u0300\u0325", "a\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), 373 }; 374 375 SearchData NONNORMEXACT[] = { 376 SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 377 }; 378 379 SearchData OVERLAP[] = { 380 SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)), 381 }; 382 383 SearchData NONOVERLAP[] = { 384 SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)), 385 }; 386 387 SearchData OVERLAPCANONICAL[] = { 388 SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)), 389 }; 390 391 SearchData NONOVERLAPCANONICAL[] = { 392 SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)), 393 }; 394 395 SearchData PATTERNCANONICAL[] = { 396 SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)), 397 SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), 398 }; 399 400 SearchData PATTERN[] = { 401 SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)), 402 SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), 403 }; 404 405 String PECHE_WITH_ACCENTS = "un p\u00E9ch\u00E9, " 406 + "\u00E7a p\u00E8che par, " 407 + "p\u00E9cher, " 408 + "une p\u00EAche, " 409 + "un p\u00EAcher, " 410 + "j\u2019ai p\u00EAch\u00E9, " 411 + "un p\u00E9cheur, " 412 + "\u201Cp\u00E9che\u201D, " 413 + "decomp peche\u0301, " 414 + "base peche"; 415 // in the above, the interesting words and their offsets are: 416 // 3 pe<301>che<301> 417 // 13 pe<300>che 418 // 24 pe<301>cher 419 // 36 pe<302>che 420 // 46 pe<302>cher 421 // 59 pe<302>che<301> 422 // 69 pe<301>cheur 423 // 79 pe<301>che 424 // 94 peche<+301> 425 // 107 peche 426 427 SearchData STRENGTH[] = { 428 /* 012345678901234567890123456789012345678901234567890123456789 */ 429 SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), 430 SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)), 431 SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", 432 "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)), 433 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)), 434 SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)), 435 SD("\u00c0 should match but not A", "A\u0300", "en", IDENTICAL, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1, 0)), 436 437 /* some tests for modified element comparison, ticket #7093 */ 438 SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 439 SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 440 SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)), 441 SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 442 SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 443 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)), 444 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)), 445 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)), 446 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)), 447 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)), 448 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)), 449 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), 450 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), 451 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 452 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 453 SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), 454 SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), 455 SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 456 SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 457 458 /* more tests for modified element comparison (with fr), ticket #7093 */ 459 SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 460 SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 461 SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)), 462 SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 463 SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 464 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)), 465 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)), 466 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)), 467 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)), 468 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)), 469 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)), 470 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), 471 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), 472 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 473 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 474 SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), 475 SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), 476 SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 477 SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 478 479 }; 480 481 SearchData STRENGTHCANONICAL[] = { 482 /* 012345678901234567890123456789012345678901234567890123456789 */ 483 SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), 484 SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)), 485 SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", 486 "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)), 487 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)), 488 SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)), 489 }; 490 491 SearchData SUPPLEMENTARYCANONICAL[] = { 492 /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ 493 SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00", "\uD800\uDC00", 494 null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)), 495 SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)), 496 SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 497 SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 498 SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 499 SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 500 }; 501 502 static SearchData VARIABLE[] = { 503 /* 012345678901234567890123456789012345678901234567890123456789 */ 504 SD("blackbirds black blackbirds blackbird black-bird", "blackbird", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 17, 28, 38, -1), IA(9, 9, 9, 10)), 505 506 /* 507 * to see that it doesn't go into an infinite loop if the start of text is a ignorable character 508 */ 509 SD(" on", "go", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 510 SD("abcdefghijklmnopqrstuvwxyz", " ", 511 null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, 512 IA(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1), 513 IA(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), 514 515 /* testing tightest match */ 516 SD(" abc a bc ab c a bc ab c", "abc", null, QUATERNARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(3)), 517 /* 012345678901234567890123456789012345678901234567890123456789 */ 518 SD(" abc a bc ab c a bc ab c", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 6, 13, 21, 31, -1), IA(3, 4, 4, 5, 5)), 519 520 /* totally ignorable text */ 521 SD(" ---------------", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 522 }; 523 524 static SearchData TEXTCANONICAL[] = { 525 SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)), 526 SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3)), 527 }; 528 529 static SearchData INDICPREFIXMATCH[] = { 530 SD("\u0915\u0020\u0915\u0901\u0020\u0915\u0902\u0020\u0915\u0903\u0020\u0915\u0940\u0020\u0915\u093F\u0020\u0915\u0943\u0020\u0915\u093C\u0020\u0958", 531 "\u0915", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 5, 8, 11, 14, 17, 20, 23,-1), IA(1, 2, 2, 2, 1, 1, 1, 2, 1)), 532 SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947", 533 "\u0915\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 3, 7, 11, -1), IA(2, 2, 2, 2)), 534 SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947", 535 "\u0915\u0943\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 19, -1), IA(3, 3)), 536 }; 537 538 /** 539 * Constructor 540 */ 541 public SearchTest() 542 { 543 544 } 545 546 @Before 547 public void init() throws Exception { 548 m_en_us_ = (RuleBasedCollator)Collator.getInstance(Locale.US); 549 m_fr_fr_ = (RuleBasedCollator)Collator.getInstance(Locale.FRANCE); 550 m_de_ = (RuleBasedCollator)Collator.getInstance(new Locale("de", "DE")); 551 m_es_ = (RuleBasedCollator)Collator.getInstance(new Locale("es", "ES")); 552 m_en_wordbreaker_ = BreakIterator.getWordInstance(); 553 m_en_characterbreaker_ = BreakIterator.getCharacterInstance(); 554 String rules = m_de_.getRules() + EXTRACOLLATIONRULE; 555 m_de_ = new RuleBasedCollator(rules); 556 rules = m_es_.getRules() + EXTRACOLLATIONRULE; 557 m_es_ = new RuleBasedCollator(rules); 558 559 } 560 561 RuleBasedCollator getCollator(String collator) { 562 if (collator == null) { 563 return m_en_us_; 564 } if (collator.equals("fr")) { 565 return m_fr_fr_; 566 } else if (collator.equals("de")) { 567 return m_de_; 568 } else if (collator.equals("es")) { 569 return m_es_; 570 } else { 571 return m_en_us_; 572 } 573 } 574 575 BreakIterator getBreakIterator(String breaker) { 576 if (breaker == null) { 577 return null; 578 } if (breaker.equals("wordbreaker")) { 579 return m_en_wordbreaker_; 580 } else { 581 return m_en_characterbreaker_; 582 } 583 } 584 585 boolean assertCanonicalEqual(SearchData search) { 586 Collator collator = getCollator(search.collator); 587 BreakIterator breaker = getBreakIterator(search.breaker); 588 StringSearch strsrch; 589 590 String text = search.text; 591 String pattern = search.pattern; 592 593 if (breaker != null) { 594 breaker.setText(text); 595 } 596 collator.setStrength(search.strength); 597 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 598 try { 599 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker); 600 strsrch.setElementComparisonType(search.cmpType); 601 strsrch.setCanonical(true); 602 } catch (Exception e) { 603 errln("Error opening string search" + e.getMessage()); 604 return false; 605 } 606 607 if (!assertEqualWithStringSearch(strsrch, search)) { 608 collator.setStrength(TERTIARY); 609 collator.setDecomposition(Collator.NO_DECOMPOSITION); 610 return false; 611 } 612 collator.setStrength(TERTIARY); 613 collator.setDecomposition(Collator.NO_DECOMPOSITION); 614 return true; 615 } 616 617 boolean assertEqual(SearchData search) { 618 Collator collator = getCollator(search.collator); 619 BreakIterator breaker = getBreakIterator(search.breaker); 620 StringSearch strsrch; 621 622 String text = search.text; 623 String pattern = search.pattern; 624 625 if (breaker != null) { 626 breaker.setText(text); 627 } 628 collator.setStrength(search.strength); 629 try { 630 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker); 631 strsrch.setElementComparisonType(search.cmpType); 632 } catch (Exception e) { 633 errln("Error opening string search " + e.getMessage()); 634 return false; 635 } 636 637 if (!assertEqualWithStringSearch(strsrch, search)) { 638 collator.setStrength(TERTIARY); 639 return false; 640 } 641 collator.setStrength(TERTIARY); 642 return true; 643 } 644 645 boolean assertEqualWithAttribute(SearchData search, boolean canonical, boolean overlap) { 646 Collator collator = getCollator(search.collator); 647 BreakIterator breaker = getBreakIterator(search.breaker); 648 StringSearch strsrch; 649 650 String text = search.text; 651 String pattern = search.pattern; 652 653 if (breaker != null) { 654 breaker.setText(text); 655 } 656 collator.setStrength(search.strength); 657 try { 658 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker); 659 strsrch.setCanonical(canonical); 660 strsrch.setOverlapping(overlap); 661 strsrch.setElementComparisonType(search.cmpType); 662 } catch (Exception e) { 663 errln("Error opening string search " + e.getMessage()); 664 return false; 665 } 666 667 if (!assertEqualWithStringSearch(strsrch, search)) { 668 collator.setStrength(TERTIARY); 669 return false; 670 } 671 collator.setStrength(TERTIARY); 672 return true; 673 } 674 675 boolean assertEqualWithStringSearch(StringSearch strsrch, SearchData search) { 676 int count = 0; 677 int matchindex = search.offset[count]; 678 String matchtext; 679 680 if (strsrch.getMatchStart() != SearchIterator.DONE || 681 strsrch.getMatchLength() != 0) { 682 errln("Error with the initialization of match start and length"); 683 } 684 // start of following matches 685 while (matchindex >= 0) { 686 int matchlength = search.size[count]; 687 strsrch.next(); 688 //int x = strsrch.getMatchStart(); 689 if (matchindex != strsrch.getMatchStart() || 690 matchlength != strsrch.getMatchLength()) { 691 errln("Text: " + search.text); 692 errln("Searching forward for pattern: " + strsrch.getPattern()); 693 errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 694 return false; 695 } 696 count ++; 697 698 matchtext = strsrch.getMatchedText(); 699 String targetText = search.text; 700 if (matchlength > 0 && 701 targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) { 702 errln("Error getting following matched text"); 703 } 704 705 matchindex = search.offset[count]; 706 } 707 strsrch.next(); 708 if (strsrch.getMatchStart() != SearchIterator.DONE || 709 strsrch.getMatchLength() != 0) { 710 errln("Text: " + search.text); 711 errln("Searching forward for pattern: " + strsrch.getPattern()); 712 errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 713 return false; 714 } 715 // start of preceding matches 716 count = count == 0 ? 0 : count - 1; 717 matchindex = search.offset[count]; 718 while (matchindex >= 0) { 719 int matchlength = search.size[count]; 720 strsrch.previous(); 721 if (matchindex != strsrch.getMatchStart() || 722 matchlength != strsrch.getMatchLength()) { 723 errln("Text: " + search.text); 724 errln("Searching backward for pattern: " + strsrch.getPattern()); 725 errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 726 return false; 727 } 728 729 matchtext = strsrch.getMatchedText(); 730 String targetText = search.text; 731 if (matchlength > 0 && 732 targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) { 733 errln("Error getting following matched text"); 734 } 735 736 matchindex = count > 0 ? search.offset[count - 1] : -1; 737 count --; 738 } 739 strsrch.previous(); 740 if (strsrch.getMatchStart() != SearchIterator.DONE || 741 strsrch.getMatchLength() != 0) { 742 errln("Text: " + search.text); 743 errln("Searching backward for pattern: " + strsrch.getPattern()); 744 errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 745 return false; 746 } 747 return true; 748 } 749 750 @Test 751 public void TestConstructor() 752 { 753 String pattern = "pattern"; 754 String text = "text"; 755 StringCharacterIterator textiter = new StringCharacterIterator(text); 756 Collator defaultcollator = Collator.getInstance(); 757 BreakIterator breaker = BreakIterator.getCharacterInstance(); 758 breaker.setText(text); 759 StringSearch search = new StringSearch(pattern, text); 760 if (!search.getPattern().equals(pattern) 761 || !search.getTarget().equals(textiter) 762 || !search.getCollator().equals(defaultcollator) 763 /*|| !search.getBreakIterator().equals(breaker)*/) { 764 errln("StringSearch(String, String) error"); 765 } 766 search = new StringSearch(pattern, textiter, m_fr_fr_); 767 if (!search.getPattern().equals(pattern) 768 || !search.getTarget().equals(textiter) 769 || !search.getCollator().equals(m_fr_fr_) 770 /*|| !search.getBreakIterator().equals(breaker)*/) { 771 errln("StringSearch(String, StringCharacterIterator, " 772 + "RuleBasedCollator) error"); 773 } 774 Locale de = new Locale("de", "DE"); 775 breaker = BreakIterator.getCharacterInstance(de); 776 breaker.setText(text); 777 search = new StringSearch(pattern, textiter, de); 778 if (!search.getPattern().equals(pattern) 779 || !search.getTarget().equals(textiter) 780 || !search.getCollator().equals(Collator.getInstance(de)) 781 /*|| !search.getBreakIterator().equals(breaker)*/) { 782 errln("StringSearch(String, StringCharacterIterator, Locale) " 783 + "error"); 784 } 785 786 search = new StringSearch(pattern, textiter, m_fr_fr_, 787 m_en_wordbreaker_); 788 if (!search.getPattern().equals(pattern) 789 || !search.getTarget().equals(textiter) 790 || !search.getCollator().equals(m_fr_fr_) 791 || !search.getBreakIterator().equals(m_en_wordbreaker_)) { 792 errln("StringSearch(String, StringCharacterIterator, Locale) " 793 + "error"); 794 } 795 } 796 797 @Test 798 public void TestBasic() { 799 for (int count = 0; count < BASIC.length; count++) { 800 if (!assertEqual(BASIC[count])) { 801 errln("Error at test number " + count); 802 } 803 } 804 } 805 806 @Test 807 public void TestBreakIterator() { 808 809 String text = BREAKITERATOREXACT[0].text; 810 String pattern = BREAKITERATOREXACT[0].pattern; 811 StringSearch strsrch = null; 812 try { 813 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 814 } catch (Exception e) { 815 errln("Error opening string search"); 816 return; 817 } 818 819 strsrch.setBreakIterator(null); 820 if (strsrch.getBreakIterator() != null) { 821 errln("Error usearch_getBreakIterator returned wrong object"); 822 } 823 824 strsrch.setBreakIterator(m_en_characterbreaker_); 825 if (!strsrch.getBreakIterator().equals(m_en_characterbreaker_)) { 826 errln("Error usearch_getBreakIterator returned wrong object"); 827 } 828 829 strsrch.setBreakIterator(m_en_wordbreaker_); 830 if (!strsrch.getBreakIterator().equals(m_en_wordbreaker_)) { 831 errln("Error usearch_getBreakIterator returned wrong object"); 832 } 833 834 int count = 0; 835 while (count < 4) { 836 // special purposes for tests numbers 0-3 837 SearchData search = BREAKITERATOREXACT[count]; 838 RuleBasedCollator collator = getCollator(search.collator); 839 BreakIterator breaker = getBreakIterator(search.breaker); 840 //StringSearch strsrch; 841 842 text = search.text; 843 pattern = search.pattern; 844 if (breaker != null) { 845 breaker.setText(text); 846 } 847 collator.setStrength(search.strength); 848 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker); 849 if (strsrch.getBreakIterator() != breaker) { 850 errln("Error setting break iterator"); 851 } 852 if (!assertEqualWithStringSearch(strsrch, search)) { 853 collator.setStrength(TERTIARY); 854 } 855 search = BREAKITERATOREXACT[count + 1]; 856 breaker = getBreakIterator(search.breaker); 857 if (breaker != null) { 858 breaker.setText(text); 859 } 860 strsrch.setBreakIterator(breaker); 861 if (strsrch.getBreakIterator() != breaker) { 862 errln("Error setting break iterator"); 863 } 864 strsrch.reset(); 865 if (!assertEqualWithStringSearch(strsrch, search)) { 866 errln("Error at test number " + count); 867 } 868 count += 2; 869 } 870 for (count = 0; count < BREAKITERATOREXACT.length; count++) { 871 if (!assertEqual(BREAKITERATOREXACT[count])) { 872 errln("Error at test number " + count); 873 } 874 } 875 } 876 877 @Test 878 public void TestBreakIteratorCanonical() { 879 int count = 0; 880 while (count < 4) { 881 // special purposes for tests numbers 0-3 882 SearchData search = BREAKITERATORCANONICAL[count]; 883 884 String text = search.text; 885 String pattern = search.pattern; 886 RuleBasedCollator collator = getCollator(search.collator); 887 collator.setStrength(search.strength); 888 889 BreakIterator breaker = getBreakIterator(search.breaker); 890 StringSearch strsrch = null; 891 try { 892 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker); 893 } catch (Exception e) { 894 errln("Error creating string search data"); 895 return; 896 } 897 strsrch.setCanonical(true); 898 if (!strsrch.getBreakIterator().equals(breaker)) { 899 errln("Error setting break iterator"); 900 return; 901 } 902 if (!assertEqualWithStringSearch(strsrch, search)) { 903 collator.setStrength(TERTIARY); 904 return; 905 } 906 search = BREAKITERATOREXACT[count + 1]; 907 breaker = getBreakIterator(search.breaker); 908 breaker.setText(strsrch.getTarget()); 909 strsrch.setBreakIterator(breaker); 910 if (!strsrch.getBreakIterator().equals(breaker)) { 911 errln("Error setting break iterator"); 912 return; 913 } 914 strsrch.reset(); 915 strsrch.setCanonical(true); 916 if (!assertEqualWithStringSearch(strsrch, search)) { 917 errln("Error at test number " + count); 918 return; 919 } 920 count += 2; 921 } 922 923 for (count = 0; count < BREAKITERATORCANONICAL.length; count++) { 924 if (!assertEqual(BREAKITERATORCANONICAL[count])) { 925 errln("Error at test number " + count); 926 return; 927 } 928 } 929 } 930 931 @Test 932 public void TestCanonical() { 933 for (int count = 0; count < BASICCANONICAL.length; count++) { 934 if (!assertCanonicalEqual(BASICCANONICAL[count])) { 935 errln("Error at test number " + count); 936 } 937 } 938 } 939 940 @Test 941 public void TestCollator() { 942 // test collator that thinks "o" and "p" are the same thing 943 String text = COLLATOR[0].text; 944 String pattern = COLLATOR[0].pattern; 945 StringSearch strsrch = null; 946 try { 947 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 948 } catch (Exception e) { 949 errln("Error opening string search "); 950 return; 951 } 952 if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) { 953 return; 954 } 955 String rules = TESTCOLLATORRULE; 956 RuleBasedCollator tailored = null; 957 try { 958 tailored = new RuleBasedCollator(rules); 959 tailored.setStrength(COLLATOR[1].strength); 960 } catch (Exception e) { 961 errln("Error opening rule based collator "); 962 return; 963 } 964 965 strsrch.setCollator(tailored); 966 if (!strsrch.getCollator().equals(tailored)) { 967 errln("Error setting rule based collator"); 968 } 969 strsrch.reset(); 970 if (!assertEqualWithStringSearch(strsrch, COLLATOR[1])) { 971 return; 972 } 973 strsrch.setCollator(m_en_us_); 974 strsrch.reset(); 975 if (!strsrch.getCollator().equals(m_en_us_)) { 976 errln("Error setting rule based collator"); 977 } 978 if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) { 979 errln("Error searching collator test"); 980 } 981 } 982 983 @Test 984 public void TestCollatorCanonical() { 985 /* test collator that thinks "o" and "p" are the same thing */ 986 String text = COLLATORCANONICAL[0].text; 987 String pattern = COLLATORCANONICAL[0].pattern; 988 989 StringSearch strsrch = null; 990 try { 991 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 992 strsrch.setCanonical(true); 993 } catch (Exception e) { 994 errln("Error opening string search "); 995 } 996 997 if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) { 998 return; 999 } 1000 1001 String rules = TESTCOLLATORRULE; 1002 RuleBasedCollator tailored = null; 1003 try { 1004 tailored = new RuleBasedCollator(rules); 1005 tailored.setStrength(COLLATORCANONICAL[1].strength); 1006 tailored.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1007 } catch (Exception e) { 1008 errln("Error opening rule based collator "); 1009 } 1010 1011 strsrch.setCollator(tailored); 1012 if (!strsrch.getCollator().equals(tailored)) { 1013 errln("Error setting rule based collator"); 1014 } 1015 strsrch.reset(); 1016 strsrch.setCanonical(true); 1017 if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[1])) { 1018 logln("COLLATORCANONICAL[1] failed"); // Error should already be reported. 1019 } 1020 strsrch.setCollator(m_en_us_); 1021 strsrch.reset(); 1022 if (!strsrch.getCollator().equals(m_en_us_)) { 1023 errln("Error setting rule based collator"); 1024 } 1025 if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) { 1026 logln("COLLATORCANONICAL[0] failed"); // Error should already be reported. 1027 } 1028 } 1029 1030 @Test 1031 public void TestCompositeBoundaries() { 1032 for (int count = 0; count < COMPOSITEBOUNDARIES.length; count++) { 1033 // logln("composite " + count); 1034 if (!assertEqual(COMPOSITEBOUNDARIES[count])) { 1035 errln("Error at test number " + count); 1036 } 1037 } 1038 } 1039 1040 @Test 1041 public void TestCompositeBoundariesCanonical() { 1042 for (int count = 0; count < COMPOSITEBOUNDARIESCANONICAL.length; count++) { 1043 // logln("composite " + count); 1044 if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) { 1045 errln("Error at test number " + count); 1046 } 1047 } 1048 } 1049 1050 @Test 1051 public void TestContraction() { 1052 String rules = CONTRACTIONRULE; 1053 RuleBasedCollator collator = null; 1054 try { 1055 collator = new RuleBasedCollator(rules); 1056 collator.setStrength(TERTIARY); 1057 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1058 } catch (Exception e) { 1059 errln("Error opening collator "); 1060 } 1061 String text = "text"; 1062 String pattern = "pattern"; 1063 StringSearch strsrch = null; 1064 try { 1065 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); 1066 } catch (Exception e) { 1067 errln("Error opening string search "); 1068 } 1069 1070 for (int count = 0; count< CONTRACTION.length; count++) { 1071 text = CONTRACTION[count].text; 1072 pattern = CONTRACTION[count].pattern; 1073 strsrch.setTarget(new StringCharacterIterator(text)); 1074 strsrch.setPattern(pattern); 1075 if (!assertEqualWithStringSearch(strsrch, CONTRACTION[count])) { 1076 errln("Error at test number " + count); 1077 } 1078 } 1079 } 1080 1081 @Test 1082 public void TestContractionCanonical() { 1083 String rules = CONTRACTIONRULE; 1084 RuleBasedCollator collator = null; 1085 try { 1086 collator = new RuleBasedCollator(rules); 1087 collator.setStrength(TERTIARY); 1088 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1089 } catch (Exception e) { 1090 errln("Error opening collator "); 1091 } 1092 String text = "text"; 1093 String pattern = "pattern"; 1094 StringSearch strsrch = null; 1095 try { 1096 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); 1097 strsrch.setCanonical(true); 1098 } catch (Exception e) { 1099 errln("Error opening string search"); 1100 } 1101 1102 for (int count = 0; count < CONTRACTIONCANONICAL.length; count++) { 1103 text = CONTRACTIONCANONICAL[count].text; 1104 pattern = CONTRACTIONCANONICAL[count].pattern; 1105 strsrch.setTarget(new StringCharacterIterator(text)); 1106 strsrch.setPattern(pattern); 1107 if (!assertEqualWithStringSearch(strsrch, CONTRACTIONCANONICAL[count])) { 1108 errln("Error at test number " + count); 1109 } 1110 } 1111 } 1112 1113 @Test 1114 public void TestGetMatch() { 1115 SearchData search = MATCH[0]; 1116 String text = search.text; 1117 String pattern = search.pattern; 1118 1119 StringSearch strsrch = null; 1120 try { 1121 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1122 } catch (Exception e) { 1123 errln("Error opening string search "); 1124 return; 1125 } 1126 1127 int count = 0; 1128 int matchindex = search.offset[count]; 1129 String matchtext; 1130 while (matchindex >= 0) { 1131 int matchlength = search.size[count]; 1132 strsrch.next(); 1133 if (matchindex != strsrch.getMatchStart() || 1134 matchlength != strsrch.getMatchLength()) { 1135 errln("Text: " + search.text); 1136 errln("Pattern: " + strsrch.getPattern()); 1137 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 1138 return; 1139 } 1140 count++; 1141 1142 matchtext = strsrch.getMatchedText(); 1143 if (matchtext.length() != matchlength){ 1144 errln("Error getting match text"); 1145 } 1146 matchindex = search.offset[count]; 1147 } 1148 strsrch.next(); 1149 if (strsrch.getMatchStart() != StringSearch.DONE || 1150 strsrch.getMatchLength() != 0) { 1151 errln("Error end of match not found"); 1152 } 1153 matchtext = strsrch.getMatchedText(); 1154 if (matchtext != null) { 1155 errln("Error getting null matches"); 1156 } 1157 } 1158 1159 @Test 1160 public void TestGetSetAttribute() { 1161 String pattern = "pattern"; 1162 String text = "text"; 1163 StringSearch strsrch = null; 1164 try { 1165 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1166 } catch (Exception e) { 1167 errln("Error opening search"); 1168 return; 1169 } 1170 1171 if (strsrch.isOverlapping()) { 1172 errln("Error default overlaping should be false"); 1173 } 1174 strsrch.setOverlapping(true); 1175 if (!strsrch.isOverlapping()) { 1176 errln("Error setting overlap true"); 1177 } 1178 strsrch.setOverlapping(false); 1179 if (strsrch.isOverlapping()) { 1180 errln("Error setting overlap false"); 1181 } 1182 1183 strsrch.setCanonical(true); 1184 if (!strsrch.isCanonical()) { 1185 errln("Error setting canonical match true"); 1186 } 1187 strsrch.setCanonical(false); 1188 if (strsrch.isCanonical()) { 1189 errln("Error setting canonical match false"); 1190 } 1191 1192 if (strsrch.getElementComparisonType() != STANDARD_ELEMENT_COMPARISON) { 1193 errln("Error default element comparison type should be STANDARD_ELEMENT_COMPARISON"); 1194 } 1195 strsrch.setElementComparisonType(ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD); 1196 if (strsrch.getElementComparisonType() != ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD) { 1197 errln("Error setting element comparison type PATTERN_BASE_WEIGHT_IS_WILDCARD"); 1198 } 1199 } 1200 1201 @Test 1202 public void TestGetSetOffset() { 1203 String pattern = "1234567890123456"; 1204 String text = "12345678901234567890123456789012"; 1205 StringSearch strsrch = null; 1206 try { 1207 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1208 } catch (Exception e) { 1209 errln("Error opening search"); 1210 1211 return; 1212 } 1213 1214 /* testing out of bounds error */ 1215 try { 1216 strsrch.setIndex(-1); 1217 errln("Error expecting set offset error"); 1218 } catch (IndexOutOfBoundsException e) { 1219 logln("PASS: strsrch.setIndex(-1) failed as expected"); 1220 } 1221 1222 try { 1223 strsrch.setIndex(128); 1224 errln("Error expecting set offset error"); 1225 } catch (IndexOutOfBoundsException e) { 1226 logln("PASS: strsrch.setIndex(128) failed as expected"); 1227 } 1228 1229 for (int index = 0; index < BASIC.length; index++) { 1230 SearchData search = BASIC[index]; 1231 1232 text =search.text; 1233 pattern = search.pattern; 1234 strsrch.setTarget(new StringCharacterIterator(text)); 1235 strsrch.setPattern(pattern); 1236 strsrch.getCollator().setStrength(search.strength); 1237 strsrch.reset(); 1238 1239 int count = 0; 1240 int matchindex = search.offset[count]; 1241 1242 while (matchindex >= 0) { 1243 int matchlength = search.size[count]; 1244 strsrch.next(); 1245 if (matchindex != strsrch.getMatchStart() || 1246 matchlength != strsrch.getMatchLength()) { 1247 errln("Text: " + text); 1248 errln("Pattern: " + strsrch.getPattern()); 1249 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 1250 return; 1251 } 1252 matchindex = search.offset[count + 1] == -1 ? -1 : 1253 search.offset[count + 2]; 1254 if (search.offset[count + 1] != -1) { 1255 strsrch.setIndex(search.offset[count + 1] + 1); 1256 if (strsrch.getIndex() != search.offset[count + 1] + 1) { 1257 errln("Error setting offset\n"); 1258 return; 1259 } 1260 } 1261 1262 count += 2; 1263 } 1264 strsrch.next(); 1265 if (strsrch.getMatchStart() != StringSearch.DONE) { 1266 errln("Text: " + text); 1267 errln("Pattern: " + strsrch.getPattern()); 1268 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 1269 return; 1270 } 1271 } 1272 strsrch.getCollator().setStrength(TERTIARY); 1273 } 1274 1275 @Test 1276 public void TestGetSetOffsetCanonical() { 1277 1278 String text = "text"; 1279 String pattern = "pattern"; 1280 StringSearch strsrch = null; 1281 try { 1282 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1283 } catch (Exception e) { 1284 errln("Fail to open StringSearch!"); 1285 return; 1286 } 1287 strsrch.setCanonical(true); 1288 //TODO: setCanonical is not sufficient for canonical match. See #10725 1289 strsrch.getCollator().setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1290 /* testing out of bounds error */ 1291 try { 1292 strsrch.setIndex(-1); 1293 errln("Error expecting set offset error"); 1294 } catch (IndexOutOfBoundsException e) { 1295 logln("PASS: strsrch.setIndex(-1) failed as expected"); 1296 } 1297 try { 1298 strsrch.setIndex(128); 1299 errln("Error expecting set offset error"); 1300 } catch (IndexOutOfBoundsException e) { 1301 logln("PASS: strsrch.setIndex(128) failed as expected"); 1302 } 1303 1304 for (int index = 0; index < BASICCANONICAL.length; index++) { 1305 SearchData search = BASICCANONICAL[index]; 1306 text = search.text; 1307 pattern = search.pattern; 1308 strsrch.setTarget(new StringCharacterIterator(text)); 1309 strsrch.setPattern(pattern); 1310 int count = 0; 1311 int matchindex = search.offset[count]; 1312 while (matchindex >= 0) { 1313 int matchlength = search.size[count]; 1314 strsrch.next(); 1315 if (matchindex != strsrch.getMatchStart() || 1316 matchlength != strsrch.getMatchLength()) { 1317 errln("Text: " + text); 1318 errln("Pattern: " + strsrch.getPattern()); 1319 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 1320 return; 1321 } 1322 matchindex = search.offset[count + 1] == -1 ? -1 : 1323 search.offset[count + 2]; 1324 if (search.offset[count + 1] != -1) { 1325 strsrch.setIndex(search.offset[count + 1] + 1); 1326 if (strsrch.getIndex() != search.offset[count + 1] + 1) { 1327 errln("Error setting offset"); 1328 return; 1329 } 1330 } 1331 1332 count += 2; 1333 } 1334 strsrch.next(); 1335 if (strsrch.getMatchStart() != StringSearch.DONE) { 1336 errln("Text: " + text); 1337 errln("Pattern: %s" + strsrch.getPattern()); 1338 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 1339 return; 1340 } 1341 } 1342 strsrch.getCollator().setStrength(TERTIARY); 1343 strsrch.getCollator().setDecomposition(Collator.NO_DECOMPOSITION); 1344 } 1345 1346 @Test 1347 public void TestIgnorable() { 1348 String rules = IGNORABLERULE; 1349 int count = 0; 1350 RuleBasedCollator collator = null; 1351 try { 1352 collator = new RuleBasedCollator(rules); 1353 collator.setStrength(IGNORABLE[count].strength); 1354 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1355 } catch (Exception e) { 1356 errln("Error opening collator "); 1357 return; 1358 } 1359 String pattern = "pattern"; 1360 String text = "text"; 1361 StringSearch strsrch = null; 1362 try { 1363 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); 1364 } catch (Exception e) { 1365 errln("Error opening string search "); 1366 return; 1367 } 1368 1369 for (; count < IGNORABLE.length; count++) { 1370 text = IGNORABLE[count].text; 1371 pattern = IGNORABLE[count].pattern; 1372 strsrch.setTarget(new StringCharacterIterator(text)); 1373 strsrch.setPattern(pattern); 1374 if (!assertEqualWithStringSearch(strsrch, IGNORABLE[count])) { 1375 errln("Error at test number " + count); 1376 } 1377 } 1378 } 1379 1380 @Test 1381 public void TestInitialization() { 1382 String pattern; 1383 String text; 1384 String temp = "a"; 1385 StringSearch result; 1386 1387 /* simple test on the pattern ce construction */ 1388 pattern = temp + temp; 1389 text = temp + temp + temp; 1390 try { 1391 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1392 } catch (Exception e) { 1393 errln("Error opening search "); 1394 return; 1395 } 1396 1397 /* testing if an extremely large pattern will fail the initialization */ 1398 pattern = ""; 1399 for (int count = 0; count < 512; count ++) { 1400 pattern += temp; 1401 } 1402 try { 1403 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1404 logln("pattern:" + result.getPattern()); 1405 } catch (Exception e) { 1406 errln("Fail: an extremely large pattern will fail the initialization"); 1407 return; 1408 } 1409 } 1410 1411 @Test 1412 public void TestNormCanonical() { 1413 m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1414 for (int count = 0; count < NORMCANONICAL.length; count++) { 1415 if (!assertCanonicalEqual(NORMCANONICAL[count])) { 1416 errln("Error at test number " + count); 1417 } 1418 } 1419 m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION); 1420 } 1421 1422 @Test 1423 public void TestNormExact() { 1424 int count; 1425 1426 m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1427 for (count = 0; count < BASIC.length; count++) { 1428 if (!assertEqual(BASIC[count])) { 1429 errln("Error at test number " + count); 1430 } 1431 } 1432 for (count = 0; count < NORMEXACT.length; count++) { 1433 if (!assertEqual(NORMEXACT[count])) { 1434 errln("Error at test number " + count); 1435 } 1436 } 1437 m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION); 1438 for (count = 0; count < NONNORMEXACT.length; count++) { 1439 if (!assertEqual(NONNORMEXACT[count])) { 1440 errln("Error at test number " + count); 1441 } 1442 } 1443 } 1444 1445 @Test 1446 public void TestOpenClose() { 1447 StringSearch result; 1448 BreakIterator breakiter = m_en_wordbreaker_; 1449 String pattern = ""; 1450 String text = ""; 1451 String temp = "a"; 1452 StringCharacterIterator chariter= new StringCharacterIterator(text); 1453 1454 /* testing null arguments */ 1455 try { 1456 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); 1457 errln("Error: null arguments should produce an error"); 1458 } catch (Exception e) { 1459 logln("PASS: null arguments failed as expected"); 1460 } 1461 1462 chariter.setText(text); 1463 try { 1464 result = new StringSearch(pattern, chariter, null, null); 1465 errln("Error: null arguments should produce an error"); 1466 } catch (Exception e) { 1467 logln("PASS: null arguments failed as expected"); 1468 } 1469 1470 text = String.valueOf(0x1); 1471 try { 1472 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); 1473 errln("Error: Empty pattern should produce an error"); 1474 } catch (Exception e) { 1475 logln("PASS: Empty pattern failed as expected"); 1476 } 1477 1478 chariter.setText(text); 1479 try { 1480 result = new StringSearch(pattern, chariter, null, null); 1481 errln("Error: Empty pattern should produce an error"); 1482 } catch (Exception e) { 1483 logln("PASS: Empty pattern failed as expected"); 1484 } 1485 1486 text = ""; 1487 pattern =temp; 1488 try { 1489 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); 1490 errln("Error: Empty text should produce an error"); 1491 } catch (Exception e) { 1492 logln("PASS: Empty text failed as expected"); 1493 } 1494 1495 chariter.setText(text); 1496 try { 1497 result = new StringSearch(pattern, chariter, null, null); 1498 errln("Error: Empty text should produce an error"); 1499 } catch (Exception e) { 1500 logln("PASS: Empty text failed as expected"); 1501 } 1502 1503 text += temp; 1504 try { 1505 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); 1506 errln("Error: null arguments should produce an error"); 1507 } catch (Exception e) { 1508 logln("PASS: null arguments failed as expected"); 1509 } 1510 1511 chariter.setText(text); 1512 try { 1513 result = new StringSearch(pattern, chariter, null, null); 1514 errln("Error: null arguments should produce an error"); 1515 } catch (Exception e) { 1516 logln("PASS: null arguments failed as expected"); 1517 } 1518 1519 try { 1520 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1521 } catch (Exception e) { 1522 errln("Error: null break iterator is valid for opening search"); 1523 } 1524 1525 try { 1526 result = new StringSearch(pattern, chariter, m_en_us_, null); 1527 } catch (Exception e) { 1528 errln("Error: null break iterator is valid for opening search"); 1529 } 1530 1531 try { 1532 result = new StringSearch(pattern, new StringCharacterIterator(text), Locale.ENGLISH); 1533 } catch (Exception e) { 1534 errln("Error: null break iterator is valid for opening search"); 1535 } 1536 1537 try { 1538 result = new StringSearch(pattern, chariter, Locale.ENGLISH); 1539 } catch (Exception e) { 1540 errln("Error: null break iterator is valid for opening search"); 1541 } 1542 1543 try { 1544 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, breakiter); 1545 } catch (Exception e) { 1546 errln("Error: Break iterator is valid for opening search"); 1547 } 1548 1549 try { 1550 result = new StringSearch(pattern, chariter, m_en_us_, null); 1551 logln("pattern:" + result.getPattern()); 1552 } catch (Exception e) { 1553 errln("Error: Break iterator is valid for opening search"); 1554 } 1555 } 1556 1557 @Test 1558 public void TestOverlap() { 1559 int count; 1560 1561 for (count = 0; count < OVERLAP.length; count++) { 1562 if (!assertEqualWithAttribute(OVERLAP[count], false, true)) { 1563 errln("Error at overlap test number " + count); 1564 } 1565 } 1566 1567 for (count = 0; count < NONOVERLAP.length; count++) { 1568 if (!assertEqual(NONOVERLAP[count])) { 1569 errln("Error at non overlap test number " + count); 1570 } 1571 } 1572 1573 for (count = 0; count < OVERLAP.length && count < NONOVERLAP.length; count++) { 1574 SearchData search = (OVERLAP[count]); 1575 String text = search.text; 1576 String pattern = search.pattern; 1577 1578 RuleBasedCollator collator = getCollator(search.collator); 1579 StringSearch strsrch = null; 1580 try { 1581 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); 1582 } catch (Exception e) { 1583 errln("error open StringSearch"); 1584 return; 1585 } 1586 1587 strsrch.setOverlapping(true); 1588 if (!strsrch.isOverlapping()) { 1589 errln("Error setting overlap option"); 1590 } 1591 if (!assertEqualWithStringSearch(strsrch, search)) { 1592 return; 1593 } 1594 1595 search = NONOVERLAP[count]; 1596 strsrch.setOverlapping(false); 1597 if (strsrch.isOverlapping()) { 1598 errln("Error setting overlap option"); 1599 } 1600 strsrch.reset(); 1601 if (!assertEqualWithStringSearch(strsrch, search)) { 1602 errln("Error at test number " + count); 1603 } 1604 } 1605 } 1606 1607 @Test 1608 public void TestOverlapCanonical() { 1609 int count; 1610 1611 for (count = 0; count < OVERLAPCANONICAL.length; count++) { 1612 if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], true, true)) { 1613 errln("Error at overlap test number %d" + count); 1614 } 1615 } 1616 1617 for (count = 0; count < NONOVERLAP.length; count++) { 1618 if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) { 1619 errln("Error at non overlap test number %d" + count); 1620 } 1621 } 1622 1623 for (count = 0; count < OVERLAPCANONICAL.length && count < NONOVERLAPCANONICAL.length; count++) { 1624 SearchData search = OVERLAPCANONICAL[count]; 1625 RuleBasedCollator collator = getCollator(search.collator); 1626 StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), collator, null); 1627 strsrch.setCanonical(true); 1628 strsrch.setOverlapping(true); 1629 if (strsrch.isOverlapping() != true) { 1630 errln("Error setting overlap option"); 1631 } 1632 if (!assertEqualWithStringSearch(strsrch, search)) { 1633 strsrch = null; 1634 return; 1635 } 1636 search = NONOVERLAPCANONICAL[count]; 1637 strsrch.setOverlapping(false); 1638 if (strsrch.isOverlapping() != false) { 1639 errln("Error setting overlap option"); 1640 } 1641 strsrch.reset(); 1642 if (!assertEqualWithStringSearch(strsrch, search)) { 1643 strsrch = null; 1644 errln("Error at test number %d" + count); 1645 } 1646 } 1647 } 1648 1649 @Test 1650 public void TestPattern() { 1651 m_en_us_.setStrength(PATTERN[0].strength); 1652 StringSearch strsrch = new StringSearch(PATTERN[0].pattern, new StringCharacterIterator(PATTERN[0].text), m_en_us_, null); 1653 1654 if (strsrch.getPattern() != PATTERN[0].pattern) { 1655 errln("Error setting pattern"); 1656 } 1657 if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) { 1658 m_en_us_.setStrength(TERTIARY); 1659 if (strsrch != null) { 1660 strsrch = null; 1661 } 1662 return; 1663 } 1664 1665 strsrch.setPattern(PATTERN[1].pattern); 1666 if (PATTERN[1].pattern != strsrch.getPattern()) { 1667 errln("Error setting pattern"); 1668 m_en_us_.setStrength(TERTIARY); 1669 if (strsrch != null) { 1670 strsrch = null; 1671 } 1672 return; 1673 } 1674 strsrch.reset(); 1675 1676 if (!assertEqualWithStringSearch(strsrch, PATTERN[1])) { 1677 m_en_us_.setStrength(TERTIARY); 1678 if (strsrch != null) { 1679 strsrch = null; 1680 } 1681 return; 1682 } 1683 1684 strsrch.setPattern(PATTERN[0].pattern); 1685 if (PATTERN[0].pattern != strsrch.getPattern()) { 1686 errln("Error setting pattern"); 1687 m_en_us_.setStrength(TERTIARY); 1688 if (strsrch != null) { 1689 strsrch = null; 1690 } 1691 return; 1692 } 1693 strsrch.reset(); 1694 1695 if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) { 1696 m_en_us_.setStrength(TERTIARY); 1697 if (strsrch != null) { 1698 strsrch = null; 1699 } 1700 return; 1701 } 1702 /* enormous pattern size to see if this crashes */ 1703 String pattern = ""; 1704 for (int templength = 0; templength != 512; templength ++) { 1705 pattern += 0x61; 1706 } 1707 try{ 1708 strsrch.setPattern(pattern); 1709 }catch(Exception e) { 1710 errln("Error setting pattern with size 512"); 1711 } 1712 1713 m_en_us_.setStrength(TERTIARY); 1714 if (strsrch != null) { 1715 strsrch = null; 1716 } 1717 } 1718 1719 @Test 1720 public void TestPatternCanonical() { 1721 //StringCharacterIterator text = new StringCharacterIterator(PATTERNCANONICAL[0].text); 1722 m_en_us_.setStrength(PATTERNCANONICAL[0].strength); 1723 StringSearch strsrch = new StringSearch(PATTERNCANONICAL[0].pattern, new StringCharacterIterator(PATTERNCANONICAL[0].text), 1724 m_en_us_, null); 1725 strsrch.setCanonical(true); 1726 1727 if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) { 1728 errln("Error setting pattern"); 1729 } 1730 if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) { 1731 m_en_us_.setStrength(TERTIARY); 1732 strsrch = null; 1733 return; 1734 } 1735 1736 strsrch.setPattern(PATTERNCANONICAL[1].pattern); 1737 if (PATTERNCANONICAL[1].pattern != strsrch.getPattern()) { 1738 errln("Error setting pattern"); 1739 m_en_us_.setStrength(TERTIARY); 1740 strsrch = null; 1741 return; 1742 } 1743 strsrch.reset(); 1744 strsrch.setCanonical(true); 1745 1746 if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[1])) { 1747 m_en_us_.setStrength(TERTIARY); 1748 strsrch = null; 1749 return; 1750 } 1751 1752 strsrch.setPattern(PATTERNCANONICAL[0].pattern); 1753 if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) { 1754 errln("Error setting pattern"); 1755 m_en_us_.setStrength(TERTIARY); 1756 strsrch = null; 1757 return; 1758 } 1759 1760 strsrch.reset(); 1761 strsrch.setCanonical(true); 1762 if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) { 1763 m_en_us_.setStrength(TERTIARY); 1764 strsrch = null; 1765 return; 1766 } 1767 } 1768 1769 @Test 1770 public void TestReset() { 1771 StringCharacterIterator text = new StringCharacterIterator("fish fish"); 1772 String pattern = "s"; 1773 1774 StringSearch strsrch = new StringSearch(pattern, text, m_en_us_, null); 1775 strsrch.setOverlapping(true); 1776 strsrch.setCanonical(true); 1777 strsrch.setIndex(9); 1778 strsrch.reset(); 1779 if (strsrch.isCanonical() || strsrch.isOverlapping() || 1780 strsrch.getIndex() != 0 || strsrch.getMatchLength() != 0 || 1781 strsrch.getMatchStart() != SearchIterator.DONE) { 1782 errln("Error resetting string search"); 1783 } 1784 1785 strsrch.previous(); 1786 if (strsrch.getMatchStart() != 7 || strsrch.getMatchLength() != 1) { 1787 errln("Error resetting string search\n"); 1788 } 1789 } 1790 1791 @Test 1792 public void TestSetMatch() { 1793 for (int count = 0; count < MATCH.length; count++) { 1794 SearchData search = MATCH[count]; 1795 StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), 1796 m_en_us_, null); 1797 1798 int size = 0; 1799 while (search.offset[size] != -1) { 1800 size ++; 1801 } 1802 1803 if (strsrch.first() != search.offset[0]) { 1804 errln("Error getting first match"); 1805 } 1806 if (strsrch.last() != search.offset[size -1]) { 1807 errln("Error getting last match"); 1808 } 1809 1810 int index = 0; 1811 while (index < size) { 1812 if (index + 2 < size) { 1813 if (strsrch.following(search.offset[index + 2] - 1) != search.offset[index + 2]) { 1814 errln("Error getting following match at index " + (search.offset[index + 2]-1)); 1815 } 1816 } 1817 if (index + 1 < size) { 1818 if (strsrch.preceding(search.offset[index + 1] + search.size[index + 1] + 1) != search.offset[index + 1]) { 1819 errln("Error getting preceeding match at index " + (search.offset[index + 1] + 1)); 1820 } 1821 } 1822 index += 2; 1823 } 1824 1825 if (strsrch.following(search.text.length()) != SearchIterator.DONE) { 1826 errln("Error expecting out of bounds match"); 1827 } 1828 if (strsrch.preceding(0) != SearchIterator.DONE) { 1829 errln("Error expecting out of bounds match"); 1830 } 1831 } 1832 } 1833 1834 @Test 1835 public void TestStrength() { 1836 for (int count = 0; count < STRENGTH.length; count++) { 1837 if (!assertEqual(STRENGTH[count])) { 1838 errln("Error at test number " + count); 1839 } 1840 } 1841 } 1842 1843 @Test 1844 public void TestStrengthCanonical() { 1845 for (int count = 0; count < STRENGTHCANONICAL.length; count++) { 1846 if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) { 1847 errln("Error at test number" + count); 1848 } 1849 } 1850 } 1851 1852 @Test 1853 public void TestSupplementary() { 1854 for (int count = 0; count < SUPPLEMENTARY.length; count++) { 1855 if (!assertEqual(SUPPLEMENTARY[count])) { 1856 errln("Error at test number " + count); 1857 } 1858 } 1859 } 1860 1861 @Test 1862 public void TestSupplementaryCanonical() { 1863 for (int count = 0; count < SUPPLEMENTARYCANONICAL.length; count++) { 1864 if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) { 1865 errln("Error at test number" + count); 1866 } 1867 } 1868 } 1869 1870 @Test 1871 public void TestText() { 1872 SearchData TEXT[] = { 1873 SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)), 1874 SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3)) 1875 }; 1876 StringCharacterIterator t = new StringCharacterIterator(TEXT[0].text); 1877 StringSearch strsrch = new StringSearch(TEXT[0].pattern, t, m_en_us_, null); 1878 1879 if (!t.equals(strsrch.getTarget())) { 1880 errln("Error setting text"); 1881 } 1882 if (!assertEqualWithStringSearch(strsrch, TEXT[0])) { 1883 errln("Error at assertEqualWithStringSearch"); 1884 return; 1885 } 1886 1887 t = new StringCharacterIterator(TEXT[1].text); 1888 strsrch.setTarget(t); 1889 if (!t.equals(strsrch.getTarget())) { 1890 errln("Error setting text"); 1891 return; 1892 } 1893 1894 if (!assertEqualWithStringSearch(strsrch, TEXT[1])) { 1895 errln("Error at assertEqualWithStringSearch"); 1896 return; 1897 } 1898 } 1899 1900 @Test 1901 public void TestTextCanonical() { 1902 StringCharacterIterator t = new StringCharacterIterator(TEXTCANONICAL[0].text); 1903 StringSearch strsrch = new StringSearch(TEXTCANONICAL[0].pattern, t, m_en_us_, null); 1904 strsrch.setCanonical(true); 1905 1906 if (!t.equals(strsrch.getTarget())) { 1907 errln("Error setting text"); 1908 } 1909 if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) { 1910 strsrch = null; 1911 return; 1912 } 1913 1914 t = new StringCharacterIterator(TEXTCANONICAL[1].text); 1915 strsrch.setTarget(t); 1916 if (!t.equals(strsrch.getTarget())) { 1917 errln("Error setting text"); 1918 strsrch = null; 1919 return; 1920 } 1921 1922 if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[1])) { 1923 strsrch = null; 1924 return; 1925 } 1926 1927 t = new StringCharacterIterator(TEXTCANONICAL[0].text); 1928 strsrch.setTarget(t); 1929 if (!t.equals(strsrch.getTarget())) { 1930 errln("Error setting text"); 1931 strsrch = null; 1932 return; 1933 } 1934 1935 if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) { 1936 errln("Error at assertEqualWithStringSearch"); 1937 strsrch = null; 1938 return; 1939 } 1940 } 1941 1942 @Test 1943 public void TestVariable() { 1944 m_en_us_.setAlternateHandlingShifted(true); 1945 for (int count = 0; count < VARIABLE.length; count++) { 1946 // logln("variable" + count); 1947 if (!assertEqual(VARIABLE[count])) { 1948 errln("Error at test number " + count); 1949 } 1950 } 1951 m_en_us_.setAlternateHandlingShifted(false); 1952 } 1953 1954 @Test 1955 public void TestVariableCanonical() { 1956 m_en_us_.setAlternateHandlingShifted(true); 1957 for (int count = 0; count < VARIABLE.length; count++) { 1958 // logln("variable " + count); 1959 if (!assertCanonicalEqual(VARIABLE[count])) { 1960 errln("Error at test number " + count); 1961 } 1962 } 1963 m_en_us_.setAlternateHandlingShifted(false); 1964 } 1965 1966 @Test 1967 public void TestSubClass() 1968 { 1969 class TestSearch extends SearchIterator 1970 { 1971 String pattern; 1972 String text; 1973 1974 TestSearch(StringCharacterIterator target, BreakIterator breaker, 1975 String pattern) 1976 { 1977 super(target, breaker); 1978 this.pattern = pattern; 1979 StringBuffer buffer = new StringBuffer(); 1980 while (targetText.getIndex() != targetText.getEndIndex()) { 1981 buffer.append(targetText.current()); 1982 targetText.next(); 1983 } 1984 text = buffer.toString(); 1985 targetText.setIndex(targetText.getBeginIndex()); 1986 } 1987 @Override 1988 protected int handleNext(int start) 1989 { 1990 int match = text.indexOf(pattern, start); 1991 if (match < 0) { 1992 targetText.last(); 1993 return DONE; 1994 } 1995 targetText.setIndex(match); 1996 setMatchLength(pattern.length()); 1997 return match; 1998 } 1999 @Override 2000 protected int handlePrevious(int start) 2001 { 2002 int match = text.lastIndexOf(pattern, start - 1); 2003 if (match < 0) { 2004 targetText.setIndex(0); 2005 return DONE; 2006 } 2007 targetText.setIndex(match); 2008 setMatchLength(pattern.length()); 2009 return match; 2010 } 2011 2012 @Override 2013 public int getIndex() 2014 { 2015 int result = targetText.getIndex(); 2016 if (result < 0 || result >= text.length()) { 2017 return DONE; 2018 } 2019 return result; 2020 } 2021 } 2022 2023 TestSearch search = new TestSearch( 2024 new StringCharacterIterator("abc abcd abc"), 2025 null, "abc"); 2026 int expected[] = {0, 4, 9}; 2027 for (int i = 0; i < expected.length; i ++) { 2028 if (search.next() != expected[i]) { 2029 errln("Error getting next match"); 2030 } 2031 if (search.getMatchLength() != search.pattern.length()) { 2032 errln("Error getting next match length"); 2033 } 2034 } 2035 if (search.next() != SearchIterator.DONE) { 2036 errln("Error should have reached the end of the iteration"); 2037 } 2038 for (int i = expected.length - 1; i >= 0; i --) { 2039 if (search.previous() != expected[i]) { 2040 errln("Error getting next match"); 2041 } 2042 if (search.getMatchLength() != search.pattern.length()) { 2043 errln("Error getting next match length"); 2044 } 2045 } 2046 if (search.previous() != SearchIterator.DONE) { 2047 errln("Error should have reached the start of the iteration"); 2048 } 2049 } 2050 2051 //Test for ticket 5024 2052 @Test 2053 public void TestDiactricMatch() { 2054 String pattern = "pattern"; 2055 String text = "text"; 2056 StringSearch strsrch = null; 2057 try { 2058 strsrch = new StringSearch(pattern, text); 2059 } catch (Exception e) { 2060 errln("Error opening string search "); 2061 return; 2062 } 2063 2064 for (int count = 0; count < DIACTRICMATCH.length; count++) { 2065 strsrch.setCollator(getCollator(DIACTRICMATCH[count].collator)); 2066 strsrch.getCollator().setStrength(DIACTRICMATCH[count].strength); 2067 strsrch.setBreakIterator(getBreakIterator(DIACTRICMATCH[count].breaker)); 2068 strsrch.reset(); 2069 text = DIACTRICMATCH[count].text; 2070 pattern = DIACTRICMATCH[count].pattern; 2071 strsrch.setTarget(new StringCharacterIterator(text)); 2072 strsrch.setPattern(pattern); 2073 if (!assertEqualWithStringSearch(strsrch, DIACTRICMATCH[count])) { 2074 errln("Error at test number " + count); 2075 } 2076 } 2077 } 2078 2079 @Test 2080 public void TestUsingSearchCollator() { 2081 String scKoText = 2082 " " + 2083 /*01*/ "\uAC00 " + // simple LV Hangul 2084 /*03*/ "\uAC01 " + // simple LVT Hangul 2085 /*05*/ "\uAC0F " + // LVTT, last jamo expands for search 2086 /*07*/ "\uAFFF " + // LLVVVTT, every jamo expands for search 2087 /*09*/ "\u1100\u1161\u11A8 " + // 0xAC01 as conjoining jamo 2088 /*13*/ "\u1100\u1161\u1100 " + // 0xAC01 as basic conjoining jamo (per search rules) 2089 /*17*/ "\u3131\u314F\u3131 " + // 0xAC01 as compatibility jamo 2090 /*21*/ "\u1100\u1161\u11B6 " + // 0xAC0F as conjoining jamo; last expands for search 2091 /*25*/ "\u1100\u1161\u1105\u1112 " + // 0xAC0F as basic conjoining jamo; last expands for search 2092 /*30*/ "\u1101\u1170\u11B6 " + // 0xAFFF as conjoining jamo; all expand for search 2093 /*34*/ "\u00E6 " + // small letter ae, expands 2094 /*36*/ "\u1E4D " + // small letter o with tilde and acute, decomposes 2095 ""; 2096 2097 String scKoPat0 = "\uAC01"; 2098 String scKoPat1 = "\u1100\u1161\u11A8"; // 0xAC01 as conjoining jamo 2099 String scKoPat2 = "\uAC0F"; 2100 String scKoPat3 = "\u1100\u1161\u1105\u1112"; // 0xAC0F as basic conjoining jamo 2101 String scKoPat4 = "\uAFFF"; 2102 String scKoPat5 = "\u1101\u1170\u11B6"; // 0xAFFF as conjoining jamo 2103 2104 int[] scKoSrchOff01 = { 3, 9, 13 }; 2105 int[] scKoSrchOff23 = { 5, 21, 25 }; 2106 int[] scKoSrchOff45 = { 7, 30 }; 2107 2108 int[] scKoStndOff01 = { 3, 9 }; 2109 int[] scKoStndOff2 = { 5, 21 }; 2110 int[] scKoStndOff3 = { 25 }; 2111 int[] scKoStndOff45 = { 7, 30 }; 2112 2113 class PatternAndOffsets { 2114 private String pattern; 2115 private int[] offsets; 2116 PatternAndOffsets(String pat, int[] offs) { 2117 pattern = pat; 2118 offsets = offs; 2119 } 2120 public String getPattern() { return pattern; } 2121 public int[] getOffsets() { return offsets; } 2122 } 2123 final PatternAndOffsets[] scKoSrchPatternsOffsets = { 2124 new PatternAndOffsets( scKoPat0, scKoSrchOff01 ), 2125 new PatternAndOffsets( scKoPat1, scKoSrchOff01 ), 2126 new PatternAndOffsets( scKoPat2, scKoSrchOff23 ), 2127 new PatternAndOffsets( scKoPat3, scKoSrchOff23 ), 2128 new PatternAndOffsets( scKoPat4, scKoSrchOff45 ), 2129 new PatternAndOffsets( scKoPat5, scKoSrchOff45 ), 2130 }; 2131 final PatternAndOffsets[] scKoStndPatternsOffsets = { 2132 new PatternAndOffsets( scKoPat0, scKoStndOff01 ), 2133 new PatternAndOffsets( scKoPat1, scKoStndOff01 ), 2134 new PatternAndOffsets( scKoPat2, scKoStndOff2 ), 2135 new PatternAndOffsets( scKoPat3, scKoStndOff3 ), 2136 new PatternAndOffsets( scKoPat4, scKoStndOff45 ), 2137 new PatternAndOffsets( scKoPat5, scKoStndOff45 ), 2138 }; 2139 2140 class TUSCItem { 2141 private String localeString; 2142 private String text; 2143 private PatternAndOffsets[] patternsAndOffsets; 2144 TUSCItem(String locStr, String txt, PatternAndOffsets[] patsAndOffs) { 2145 localeString = locStr; 2146 text = txt; 2147 patternsAndOffsets = patsAndOffs; 2148 } 2149 public String getLocaleString() { return localeString; } 2150 public String getText() { return text; } 2151 public PatternAndOffsets[] getPatternsAndOffsets() { return patternsAndOffsets; } 2152 } 2153 final TUSCItem[] tuscItems = { 2154 new TUSCItem( "root", scKoText, scKoStndPatternsOffsets ), 2155 new TUSCItem( "root@collation=search", scKoText, scKoSrchPatternsOffsets ), 2156 new TUSCItem( "ko@collation=search", scKoText, scKoSrchPatternsOffsets ), 2157 }; 2158 2159 String dummyPat = "a"; 2160 2161 for (TUSCItem tuscItem: tuscItems) { 2162 String localeString = tuscItem.getLocaleString(); 2163 ULocale uloc = new ULocale(localeString); 2164 RuleBasedCollator col = null; 2165 try { 2166 col = (RuleBasedCollator)Collator.getInstance(uloc); 2167 } catch (Exception e) { 2168 errln("Error: in locale " + localeString + ", err in Collator.getInstance"); 2169 continue; 2170 } 2171 StringCharacterIterator ci = new StringCharacterIterator(tuscItem.getText()); 2172 StringSearch srch = new StringSearch(dummyPat, ci, col); 2173 for ( PatternAndOffsets patternAndOffsets: tuscItem.getPatternsAndOffsets() ) { 2174 srch.setPattern(patternAndOffsets.getPattern()); 2175 int[] offsets = patternAndOffsets.getOffsets(); 2176 int ioff, noff = offsets.length; 2177 int offset; 2178 2179 srch.reset(); 2180 ioff = 0; 2181 while (true) { 2182 offset = srch.next(); 2183 if (offset == SearchIterator.DONE) { 2184 break; 2185 } 2186 if ( ioff < noff ) { 2187 if ( offset != offsets[ioff] ) { 2188 errln("Error: in locale " + localeString + ", expected SearchIterator.next() " + offsets[ioff] + ", got " + offset); 2189 //ioff = noff; 2190 //break; 2191 } 2192 ioff++; 2193 } else { 2194 errln("Error: in locale " + localeString + ", SearchIterator.next() returned more matches than expected"); 2195 } 2196 } 2197 if ( ioff < noff ) { 2198 errln("Error: in locale " + localeString + ", SearchIterator.next() returned fewer matches than expected"); 2199 } 2200 2201 srch.reset(); 2202 ioff = noff; 2203 while (true) { 2204 offset = srch.previous(); 2205 if (offset == SearchIterator.DONE) { 2206 break; 2207 } 2208 if ( ioff > 0 ) { 2209 ioff--; 2210 if ( offset != offsets[ioff] ) { 2211 errln("Error: in locale " + localeString + ", expected SearchIterator.previous() " + offsets[ioff] + ", got " + offset); 2212 //ioff = 0; 2213 // break; 2214 } 2215 } else { 2216 errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned more matches than expected"); 2217 } 2218 } 2219 if ( ioff > 0 ) { 2220 errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned fewer matches than expected"); 2221 } 2222 } 2223 } 2224 } 2225 2226 @Test 2227 public void TestIndicPrefixMatch() { 2228 for (int count = 0; count < INDICPREFIXMATCH.length; count++) { 2229 if (!assertEqual(INDICPREFIXMATCH[count])) { 2230 errln("Error at test number" + count); 2231 } 2232 } 2233 } 2234 2235 2236 // Test case for ticket#12555 2237 @Test 2238 public void TestLongPattern() { 2239 StringBuilder pattern = new StringBuilder(); 2240 for (int i = 0; i < 255; i++) { 2241 pattern.append('a'); 2242 } 2243 // appends a character producing multiple ce32 at 2244 // index 256. 2245 pattern.append(''); 2246 2247 CharacterIterator target = new StringCharacterIterator("not important"); 2248 try { 2249 StringSearch ss = new StringSearch(pattern.toString(), target, Locale.ENGLISH); 2250 assertNotNull("Non-null StringSearch instance", ss); 2251 } catch (Exception e) { 2252 errln("Error initializing a new StringSearch object"); 2253 } 2254 } 2255 } 2256