1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2014-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package android.icu.impl; 11 12 import java.text.CharacterIterator; 13 import java.util.HashSet; 14 import java.util.Locale; 15 16 import android.icu.impl.ICUResourceBundle.OpenType; 17 import android.icu.text.BreakIterator; 18 import android.icu.text.FilteredBreakIteratorBuilder; 19 import android.icu.text.UCharacterIterator; 20 import android.icu.util.BytesTrie; 21 import android.icu.util.CharsTrie; 22 import android.icu.util.CharsTrieBuilder; 23 import android.icu.util.StringTrieBuilder; 24 import android.icu.util.ULocale; 25 26 /** 27 * @author tomzhang 28 * @hide Only a subset of ICU is exposed in Android 29 */ 30 public class SimpleFilteredSentenceBreakIterator extends BreakIterator { 31 32 private BreakIterator delegate; 33 private UCharacterIterator text; // TODO(Tom): suffice to move into the local scope in next() ? 34 private CharsTrie backwardsTrie; // i.e. ".srM" for Mrs. 35 private CharsTrie forwardsPartialTrie; // Has ".a" for "a.M." 36 37 /** 38 * @param adoptBreakIterator 39 * break iterator to adopt 40 * @param forwardsPartialTrie 41 * forward & partial char trie to adopt 42 * @param backwardsTrie 43 * backward trie to adopt 44 */ 45 public SimpleFilteredSentenceBreakIterator(BreakIterator adoptBreakIterator, CharsTrie forwardsPartialTrie, 46 CharsTrie backwardsTrie) { 47 this.delegate = adoptBreakIterator; 48 this.forwardsPartialTrie = forwardsPartialTrie; 49 this.backwardsTrie = backwardsTrie; 50 } 51 52 53 /** 54 * Reset the filter from the delegate. 55 */ 56 private final void resetState() { 57 text = UCharacterIterator.getInstance((CharacterIterator) delegate.getText().clone()); 58 } 59 60 /** 61 * Is there an exception at this point? 62 * 63 * @param n the location of the possible break 64 * @return 65 */ 66 private final boolean breakExceptionAt(int n) { 67 // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt() 68 69 int bestPosn = -1; 70 int bestValue = -1; 71 72 // loops while 'n' points to an exception 73 text.setIndex(n); 74 backwardsTrie.reset(); 75 int uch; 76 77 78 79 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") 80 if ((uch = text.previousCodePoint()) == ' ') { // TODO: skip a class of chars here?? 81 // TODO only do this the 1st time? 82 } else { 83 uch = text.nextCodePoint(); 84 } 85 86 BytesTrie.Result r = BytesTrie.Result.INTERMEDIATE_VALUE; 87 88 while ((uch = text.previousCodePoint()) != UCharacterIterator.DONE && // more to consume backwards and.. 89 ((r = backwardsTrie.nextForCodePoint(uch)).hasNext())) {// more in the trie 90 if (r.hasValue()) { // remember the best match so far 91 bestPosn = text.getIndex(); 92 bestValue = backwardsTrie.getValue(); 93 } 94 } 95 96 if (r.matches()) { // exact match? 97 bestValue = backwardsTrie.getValue(); 98 bestPosn = text.getIndex(); 99 } 100 101 if (bestPosn >= 0) { 102 if (bestValue == Builder.MATCH) { // exact match! 103 return true; // Exception here. 104 } else if (bestValue == Builder.PARTIAL && forwardsPartialTrie != null) { 105 // make sure there's a forward trie 106 // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie 107 // to see if it matches something going forward. 108 forwardsPartialTrie.reset(); 109 110 BytesTrie.Result rfwd = BytesTrie.Result.INTERMEDIATE_VALUE; 111 text.setIndex(bestPosn); // hope that's close .. 112 while ((uch = text.nextCodePoint()) != BreakIterator.DONE 113 && ((rfwd = forwardsPartialTrie.nextForCodePoint(uch)).hasNext())) { 114 } 115 if (rfwd.matches()) { 116 // Exception here 117 return true; 118 } // else fall through 119 } // else fall through 120 } // else fall through 121 return false; // No exception here. 122 } 123 124 /** 125 * Given that the delegate has already given its "initial" answer, 126 * find the NEXT actual (non-suppressed) break. 127 * @param n initial position from delegate 128 * @return new break position or BreakIterator.DONE 129 */ 130 private final int internalNext(int n) { 131 if (n == BreakIterator.DONE || // at end or 132 backwardsTrie == null) { // .. no backwards table loaded == no exceptions 133 return n; 134 } 135 resetState(); 136 137 final int textLen = text.getLength(); 138 139 while (n != BreakIterator.DONE && n != textLen) { 140 // outer loop runs once per underlying break (from fDelegate). 141 // loops while 'n' points to an exception. 142 143 if (breakExceptionAt(n)) { 144 // n points to a break exception 145 n = delegate.next(); 146 } else { 147 // no exception at this spot 148 return n; 149 } 150 } 151 return n; //hit underlying DONE or break at end of text 152 } 153 154 /** 155 * Given that the delegate has already given its "initial" answer, 156 * find the PREV actual (non-suppressed) break. 157 * @param n initial position from delegate 158 * @return new break position or BreakIterator.DONE 159 */ 160 private final int internalPrev(int n) { 161 if (n == 0 || n == BreakIterator.DONE || // at end or 162 backwardsTrie == null) { // .. no backwards table loaded == no exceptions 163 return n; 164 } 165 resetState(); 166 167 while (n != BreakIterator.DONE && n != 0) { 168 // outer loop runs once per underlying break (from fDelegate). 169 // loops while 'n' points to an exception. 170 171 if (breakExceptionAt(n)) { 172 // n points to a break exception 173 n = delegate.previous(); 174 } else { 175 // no exception at this spot 176 return n; 177 } 178 } 179 return n; //hit underlying DONE or break at end of text 180 } 181 182 @Override 183 public boolean equals(Object obj) { 184 if (obj == null) 185 return false; 186 if (this == obj) 187 return true; 188 if (getClass() != obj.getClass()) 189 return false; 190 SimpleFilteredSentenceBreakIterator other = (SimpleFilteredSentenceBreakIterator) obj; 191 return delegate.equals(other.delegate) && text.equals(other.text) && backwardsTrie.equals(other.backwardsTrie) 192 && forwardsPartialTrie.equals(other.forwardsPartialTrie); 193 } 194 195 @Override 196 public int hashCode() { 197 return (forwardsPartialTrie.hashCode() * 39) + (backwardsTrie.hashCode() * 11) + delegate.hashCode(); 198 } 199 200 @Override 201 public Object clone() { 202 SimpleFilteredSentenceBreakIterator other = (SimpleFilteredSentenceBreakIterator) super.clone(); 203 return other; 204 } 205 206 207 @Override 208 public int first() { 209 // Don't suppress a break opportunity at the beginning of text. 210 return delegate.first(); 211 } 212 213 @Override 214 public int preceding(int offset) { 215 return internalPrev(delegate.preceding(offset)); 216 } 217 218 @Override 219 public int previous() { 220 return internalPrev(delegate.previous()); 221 } 222 223 @Override 224 public int current() { 225 return delegate.current(); 226 } 227 228 @Override 229 public boolean isBoundary(int offset) { 230 if(!delegate.isBoundary(offset)) { 231 return false; // No underlying break to suppress? 232 } 233 234 // delegate thinks there's a break 235 if(backwardsTrie == null) { 236 return true; // no data 237 } 238 239 resetState(); 240 return !breakExceptionAt(offset); // if there's an exception: no break. 241 } 242 243 @Override 244 public int next() { 245 return internalNext(delegate.next()); 246 } 247 248 @Override 249 public int next(int n) { 250 return internalNext(delegate.next(n)); 251 } 252 253 @Override 254 public int following(int offset) { 255 return internalNext(delegate.following(offset)); 256 } 257 258 @Override 259 public int last() { 260 // Don't suppress a break opportunity at the end of text. 261 return delegate.last(); 262 } 263 264 @Override 265 public CharacterIterator getText() { 266 return delegate.getText(); 267 } 268 269 @Override 270 public void setText(CharacterIterator newText) { 271 delegate.setText(newText); 272 } 273 274 public static class Builder extends FilteredBreakIteratorBuilder { 275 /** 276 * filter set to store all exceptions 277 */ 278 private HashSet<CharSequence> filterSet = new HashSet<CharSequence>(); 279 280 static final int PARTIAL = (1 << 0); // < partial - need to run through forward trie 281 static final int MATCH = (1 << 1); // < exact match - skip this one. 282 static final int SuppressInReverse = (1 << 0); 283 static final int AddToForward = (1 << 1); 284 285 public Builder(Locale loc) { 286 this(ULocale.forLocale(loc)); 287 } 288 /** 289 * Create SimpleFilteredBreakIteratorBuilder using given locale 290 * @param loc the locale to get filtered iterators 291 */ 292 public Builder(ULocale loc) { 293 ICUResourceBundle rb = ICUResourceBundle.getBundleInstance( 294 ICUData.ICU_BRKITR_BASE_NAME, loc, OpenType.LOCALE_ROOT); 295 296 ICUResourceBundle breaks = rb.findWithFallback("exceptions/SentenceBreak"); 297 298 if (breaks != null) { 299 for (int index = 0, size = breaks.getSize(); index < size; ++index) { 300 ICUResourceBundle b = (ICUResourceBundle) breaks.get(index); 301 String br = b.getString(); 302 filterSet.add(br); 303 } 304 } 305 } 306 307 /** 308 * Create SimpleFilteredBreakIteratorBuilder with no exception 309 */ 310 public Builder() { 311 } 312 313 @Override 314 public boolean suppressBreakAfter(CharSequence str) { 315 return filterSet.add(str); 316 } 317 318 @Override 319 public boolean unsuppressBreakAfter(CharSequence str) { 320 return filterSet.remove(str); 321 } 322 323 @Override 324 public BreakIterator wrapIteratorWithFilter(BreakIterator adoptBreakIterator) { 325 if( filterSet.isEmpty() ) { 326 // Short circuit - nothing to except. 327 return adoptBreakIterator; 328 } 329 330 CharsTrieBuilder builder = new CharsTrieBuilder(); 331 CharsTrieBuilder builder2 = new CharsTrieBuilder(); 332 333 int revCount = 0; 334 int fwdCount = 0; 335 336 int subCount = filterSet.size(); 337 CharSequence[] ustrs = new CharSequence[subCount]; 338 int[] partials = new int[subCount]; 339 340 CharsTrie backwardsTrie = null; // i.e. ".srM" for Mrs. 341 CharsTrie forwardsPartialTrie = null; // Has ".a" for "a.M." 342 343 int i = 0; 344 for (CharSequence s : filterSet) { 345 ustrs[i] = s; // copy by value? 346 partials[i] = 0; // default: no partial 347 i++; 348 } 349 350 for (i = 0; i < subCount; i++) { 351 String thisStr = ustrs[i].toString(); // TODO: don't cast to String? 352 int nn = thisStr.indexOf('.'); // TODO: non-'.' abbreviations 353 if (nn > -1 && (nn + 1) != thisStr.length()) { 354 // is partial. 355 // is it unique? 356 int sameAs = -1; 357 for (int j = 0; j < subCount; j++) { 358 if (j == i) 359 continue; 360 if (thisStr.regionMatches(0, ustrs[j].toString() /* TODO */, 0, nn + 1)) { 361 if (partials[j] == 0) { // hasn't been processed yet 362 partials[j] = SuppressInReverse | AddToForward; 363 } else if ((partials[j] & SuppressInReverse) != 0) { 364 sameAs = j; // the other entry is already in the reverse table. 365 } 366 } 367 } 368 369 if ((sameAs == -1) && (partials[i] == 0)) { 370 StringBuilder prefix = new StringBuilder(thisStr.substring(0, nn + 1)); 371 // first one - add the prefix to the reverse table. 372 prefix.reverse(); 373 builder.add(prefix, PARTIAL); 374 revCount++; 375 partials[i] = SuppressInReverse | AddToForward; 376 } 377 } 378 } 379 380 for (i = 0; i < subCount; i++) { 381 final String thisStr = ustrs[i].toString(); // TODO 382 if (partials[i] == 0) { 383 StringBuilder reversed = new StringBuilder(thisStr).reverse(); 384 builder.add(reversed, MATCH); 385 revCount++; 386 } else { 387 // an optimization would be to only add the portion after the '.' 388 // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the 389 // forward, 390 // instead of "Ph.D." since we already know the "Ph." part is a match. 391 // would need the trie to be able to hold 0-length strings, though. 392 builder2.add(thisStr, MATCH); // forward 393 fwdCount++; 394 } 395 } 396 397 if (revCount > 0) { 398 backwardsTrie = builder.build(StringTrieBuilder.Option.FAST); 399 } 400 401 if (fwdCount > 0) { 402 forwardsPartialTrie = builder2.build(StringTrieBuilder.Option.FAST); 403 } 404 return new SimpleFilteredSentenceBreakIterator(adoptBreakIterator, forwardsPartialTrie, backwardsTrie); 405 } 406 } 407 } 408