1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2012, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.text; 11 12 import java.text.CharacterIterator; 13 import java.text.StringCharacterIterator; 14 import java.util.Locale; 15 16 import com.ibm.icu.util.ULocale; 17 18 /** 19 * {@icuenhanced java.text.BreakIterator}.{@icu _usage_} 20 * 21 * <p>A class that locates boundaries in text. This class defines a protocol for 22 * objects that break up a piece of natural-language text according to a set 23 * of criteria. Instances or subclasses of BreakIterator can be provided, for 24 * example, to break a piece of text into words, sentences, or logical characters 25 * according to the conventions of some language or group of languages. 26 * 27 * We provide five built-in types of BreakIterator: 28 * <ul><li>getTitleInstance() returns a BreakIterator that locates boundaries 29 * between title breaks. 30 * <li>getSentenceInstance() returns a BreakIterator that locates boundaries 31 * between sentences. This is useful for triple-click selection, for example. 32 * <li>getWordInstance() returns a BreakIterator that locates boundaries between 33 * words. This is useful for double-click selection or "find whole words" searches. 34 * This type of BreakIterator makes sure there is a boundary position at the 35 * beginning and end of each legal word. (Numbers count as words, too.) Whitespace 36 * and punctuation are kept separate from real words. 37 * <li>getLineInstance() returns a BreakIterator that locates positions where it is 38 * legal for a text editor to wrap lines. This is similar to word breaking, but 39 * not the same: punctuation and whitespace are generally kept with words (you don't 40 * want a line to start with whitespace, for example), and some special characters 41 * can force a position to be considered a line-break position or prevent a position 42 * from being a line-break position. 43 * <li>getCharacterInstance() returns a BreakIterator that locates boundaries between 44 * logical characters. Because of the structure of the Unicode encoding, a logical 45 * character may be stored internally as more than one Unicode code point. (A with an 46 * umlaut may be stored as an a followed by a separate combining umlaut character, 47 * for example, but the user still thinks of it as one character.) This iterator allows 48 * various processes (especially text editors) to treat as characters the units of text 49 * that a user would think of as characters, rather than the units of text that the 50 * computer sees as "characters".</ul> 51 * 52 * BreakIterator's interface follows an "iterator" model (hence the name), meaning it 53 * has a concept of a "current position" and methods like first(), last(), next(), 54 * and previous() that update the current position. All BreakIterators uphold the 55 * following invariants: 56 * <ul><li>The beginning and end of the text are always treated as boundary positions. 57 * <li>The current position of the iterator is always a boundary position (random- 58 * access methods move the iterator to the nearest boundary position before or 59 * after the specified position, not _to_ the specified position). 60 * <li>DONE is used as a flag to indicate when iteration has stopped. DONE is only 61 * returned when the current position is the end of the text and the user calls next(), 62 * or when the current position is the beginning of the text and the user calls 63 * previous(). 64 * <li>Break positions are numbered by the positions of the characters that follow 65 * them. Thus, under normal circumstances, the position before the first character 66 * is 0, the position after the first character is 1, and the position after the 67 * last character is 1 plus the length of the string. 68 * <li>The client can change the position of an iterator, or the text it analyzes, 69 * at will, but cannot change the behavior. If the user wants different behavior, he 70 * must instantiate a new iterator.</ul> 71 * 72 * BreakIterator accesses the text it analyzes through a CharacterIterator, which makes 73 * it possible to use BreakIterator to analyze text in any text-storage vehicle that 74 * provides a CharacterIterator interface. 75 * 76 * <b>Note:</b> Some types of BreakIterator can take a long time to create, and 77 * instances of BreakIterator are not currently cached by the system. For 78 * optimal performance, keep instances of BreakIterator around as long as makes 79 * sense. For example, when word-wrapping a document, don't create and destroy a 80 * new BreakIterator for each line. Create one break iterator for the whole document 81 * (or whatever stretch of text you're wrapping) and use it to do the whole job of 82 * wrapping the text. 83 * 84 * <P> 85 * <strong>Examples</strong>:<P> 86 * Creating and using text boundaries 87 * <blockquote> 88 * <pre> 89 * public static void main(String args[]) { 90 * if (args.length == 1) { 91 * String stringToExamine = args[0]; 92 * //print each word in order 93 * BreakIterator boundary = BreakIterator.getWordInstance(); 94 * boundary.setText(stringToExamine); 95 * printEachForward(boundary, stringToExamine); 96 * //print each sentence in reverse order 97 * boundary = BreakIterator.getSentenceInstance(Locale.US); 98 * boundary.setText(stringToExamine); 99 * printEachBackward(boundary, stringToExamine); 100 * printFirst(boundary, stringToExamine); 101 * printLast(boundary, stringToExamine); 102 * } 103 * } 104 * </pre> 105 * </blockquote> 106 * 107 * Print each element in order 108 * <blockquote> 109 * <pre> 110 * public static void printEachForward(BreakIterator boundary, String source) { 111 * int start = boundary.first(); 112 * for (int end = boundary.next(); 113 * end != BreakIterator.DONE; 114 * start = end, end = boundary.next()) { 115 * System.out.println(source.substring(start,end)); 116 * } 117 * } 118 * </pre> 119 * </blockquote> 120 * 121 * Print each element in reverse order 122 * <blockquote> 123 * <pre> 124 * public static void printEachBackward(BreakIterator boundary, String source) { 125 * int end = boundary.last(); 126 * for (int start = boundary.previous(); 127 * start != BreakIterator.DONE; 128 * end = start, start = boundary.previous()) { 129 * System.out.println(source.substring(start,end)); 130 * } 131 * } 132 * </pre> 133 * </blockquote> 134 * 135 * Print first element 136 * <blockquote> 137 * <pre> 138 * public static void printFirst(BreakIterator boundary, String source) { 139 * int start = boundary.first(); 140 * int end = boundary.next(); 141 * System.out.println(source.substring(start,end)); 142 * } 143 * </pre> 144 * </blockquote> 145 * 146 * Print last element 147 * <blockquote> 148 * <pre> 149 * public static void printLast(BreakIterator boundary, String source) { 150 * int end = boundary.last(); 151 * int start = boundary.previous(); 152 * System.out.println(source.substring(start,end)); 153 * } 154 * </pre> 155 * </blockquote> 156 * 157 * Print the element at a specified position 158 * <blockquote> 159 * <pre> 160 * public static void printAt(BreakIterator boundary, int pos, String source) { 161 * int end = boundary.following(pos); 162 * int start = boundary.previous(); 163 * System.out.println(source.substring(start,end)); 164 * } 165 * </pre> 166 * </blockquote> 167 * 168 * Find the next word 169 * <blockquote> 170 * <pre> 171 * public static int nextWordStartAfter(int pos, String text) { 172 * BreakIterator wb = BreakIterator.getWordInstance(); 173 * wb.setText(text); 174 * int last = wb.following(pos); 175 * int current = wb.next(); 176 * while (current != BreakIterator.DONE) { 177 * for (int p = last; p < current; p++) { 178 * if (Character.isLetter(text.charAt(p))) 179 * return last; 180 * } 181 * last = current; 182 * current = wb.next(); 183 * } 184 * return BreakIterator.DONE; 185 * } 186 * </pre> 187 * (The iterator returned by BreakIterator.getWordInstance() is unique in that 188 * the break positions it returns don't represent both the start and end of the 189 * thing being iterated over. That is, a sentence-break iterator returns breaks 190 * that each represent the end of one sentence and the beginning of the next. 191 * With the word-break iterator, the characters between two boundaries might be a 192 * word, or they might be the punctuation or whitespace between two words. The 193 * above code uses a simple heuristic to determine which boundary is the beginning 194 * of a word: If the characters between this boundary and the next boundary 195 * include at least one letter (this can be an alphabetical letter, a CJK ideograph, 196 * a Hangul syllable, a Kana character, etc.), then the text between this boundary 197 * and the next is a word; otherwise, it's the material between words.) 198 * </blockquote> 199 * 200 * @see CharacterIterator 201 * @stable ICU 2.0 202 * 203 */ 204 205 public abstract class BreakIterator implements Cloneable 206 { 207 208 /** 209 * Default constructor. There is no state that is carried by this abstract 210 * base class. 211 * @stable ICU 2.0 212 */ 213 protected BreakIterator() 214 { 215 } 216 217 /** 218 * Clone method. Creates another BreakIterator with the same behavior and 219 * current state as this one. 220 * @return The clone. 221 * @stable ICU 2.0 222 */ 223 public Object clone() 224 { 225 try { 226 return super.clone(); 227 } 228 catch (CloneNotSupportedException e) { 229 ///CLOVER:OFF 230 throw new IllegalStateException(); 231 ///CLOVER:ON 232 } 233 } 234 235 /** 236 * DONE is returned by previous() and next() after all valid 237 * boundaries have been returned. 238 * @stable ICU 2.0 239 */ 240 public static final int DONE = -1; 241 242 /** 243 * Return the first boundary position. This is always the beginning 244 * index of the text this iterator iterates over. For example, if 245 * the iterator iterates over a whole string, this function will 246 * always return 0. This function also updates the iteration position 247 * to point to the beginning of the text. 248 * @return The character offset of the beginning of the stretch of text 249 * being broken. 250 * @stable ICU 2.0 251 */ 252 public abstract int first(); 253 254 /** 255 * Return the last boundary position. This is always the "past-the-end" 256 * index of the text this iterator iterates over. For example, if the 257 * iterator iterates over a whole string (call it "text"), this function 258 * will always return text.length(). This function also updated the 259 * iteration position to point to the end of the text. 260 * @return The character offset of the end of the stretch of text 261 * being broken. 262 * @stable ICU 2.0 263 */ 264 public abstract int last(); 265 266 /** 267 * Advances the specified number of steps forward in the text (a negative 268 * number, therefore, advances backwards). If this causes the iterator 269 * to advance off either end of the text, this function returns DONE; 270 * otherwise, this function returns the position of the appropriate 271 * boundary. Calling this function is equivalent to calling next() or 272 * previous() n times. 273 * @param n The number of boundaries to advance over (if positive, moves 274 * forward; if negative, moves backwards). 275 * @return The position of the boundary n boundaries from the current 276 * iteration position, or DONE if moving n boundaries causes the iterator 277 * to advance off either end of the text. 278 * @stable ICU 2.0 279 */ 280 public abstract int next(int n); 281 282 /** 283 * Advances the iterator forward one boundary. The current iteration 284 * position is updated to point to the next boundary position after the 285 * current position, and this is also the value that is returned. If 286 * the current position is equal to the value returned by last(), or to 287 * DONE, this function returns DONE and sets the current position to 288 * DONE. 289 * @return The position of the first boundary position following the 290 * iteration position. 291 * @stable ICU 2.0 292 */ 293 public abstract int next(); 294 295 /** 296 * Advances the iterator backward one boundary. The current iteration 297 * position is updated to point to the last boundary position before 298 * the current position, and this is also the value that is returned. If 299 * the current position is equal to the value returned by first(), or to 300 * DONE, this function returns DONE and sets the current position to 301 * DONE. 302 * @return The position of the last boundary position preceding the 303 * iteration position. 304 * @stable ICU 2.0 305 */ 306 public abstract int previous(); 307 308 /** 309 * Sets the iterator's current iteration position to be the first 310 * boundary position following the specified position. (Whether the 311 * specified position is itself a boundary position or not doesn't 312 * matter-- this function always moves the iteration position to the 313 * first boundary after the specified position.) If the specified 314 * position is the past-the-end position, returns DONE. 315 * @param offset The character position to start searching from. 316 * @return The position of the first boundary position following 317 * "offset" (whether or not "offset" itself is a boundary position), 318 * or DONE if "offset" is the past-the-end offset. 319 * @stable ICU 2.0 320 */ 321 public abstract int following(int offset); 322 323 /** 324 * Sets the iterator's current iteration position to be the last 325 * boundary position preceding the specified position. (Whether the 326 * specified position is itself a boundary position or not doesn't 327 * matter-- this function always moves the iteration position to the 328 * last boundary before the specified position.) If the specified 329 * position is the starting position, returns DONE. 330 * @param offset The character position to start searching from. 331 * @return The position of the last boundary position preceding 332 * "offset" (whether of not "offset" itself is a boundary position), 333 * or DONE if "offset" is the starting offset of the iterator. 334 * @stable ICU 2.0 335 */ 336 public int preceding(int offset) { 337 // NOTE: This implementation is here solely because we can't add new 338 // abstract methods to an existing class. There is almost ALWAYS a 339 // better, faster way to do this. 340 int pos = following(offset); 341 while (pos >= offset && pos != DONE) 342 pos = previous(); 343 return pos; 344 } 345 346 /** 347 * Return true if the specfied position is a boundary position. If the 348 * function returns true, the current iteration position is set to the 349 * specified position; if the function returns false, the current 350 * iteration position is set as though following() had been called. 351 * @param offset the offset to check. 352 * @return True if "offset" is a boundary position. 353 * @stable ICU 2.0 354 */ 355 public boolean isBoundary(int offset) { 356 // Again, this is the default implementation, which is provided solely because 357 // we couldn't add a new abstract method to an existing class. The real 358 // implementations will usually need to do a little more work. 359 if (offset == 0) { 360 return true; 361 } 362 else 363 return following(offset - 1) == offset; 364 } 365 366 /** 367 * Return the iterator's current position. 368 * @return The iterator's current position. 369 * @stable ICU 2.0 370 */ 371 public abstract int current(); 372 373 /** 374 * Returns a CharacterIterator over the text being analyzed. 375 * For at least some subclasses of BreakIterator, this is a reference 376 * to the <b>actual iterator being used</b> by the BreakIterator, 377 * and therefore, this function's return value should be treated as 378 * <tt>const</tt>. No guarantees are made about the current position 379 * of this iterator when it is returned. If you need to move that 380 * position to examine the text, clone this function's return value first. 381 * @return A CharacterIterator over the text being analyzed. 382 * @stable ICU 2.0 383 */ 384 public abstract CharacterIterator getText(); 385 386 /** 387 * Sets the iterator to analyze a new piece of text. The new 388 * piece of text is passed in as a String, and the current 389 * iteration position is reset to the beginning of the string. 390 * (The old text is dropped.) 391 * @param newText A String containing the text to analyze with 392 * this BreakIterator. 393 * @stable ICU 2.0 394 */ 395 public void setText(String newText) 396 { 397 setText(new StringCharacterIterator(newText)); 398 } 399 400 /** 401 * Sets the iterator to analyze a new piece of text. The 402 * BreakIterator is passed a CharacterIterator through which 403 * it will access the text itself. The current iteration 404 * position is reset to the CharacterIterator's start index. 405 * (The old iterator is dropped.) 406 * @param newText A CharacterIterator referring to the text 407 * to analyze with this BreakIterator (the iterator's current 408 * position is ignored, but its other state is significant). 409 * @stable ICU 2.0 410 */ 411 public abstract void setText(CharacterIterator newText); 412 413 /** 414 * {@icu} 415 * @stable ICU 2.4 416 */ 417 public static final int KIND_CHARACTER = 0; 418 /** 419 * {@icu} 420 * @stable ICU 2.4 421 */ 422 public static final int KIND_WORD = 1; 423 /** 424 * {@icu} 425 * @stable ICU 2.4 426 */ 427 public static final int KIND_LINE = 2; 428 /** 429 * {@icu} 430 * @stable ICU 2.4 431 */ 432 public static final int KIND_SENTENCE = 3; 433 // /** 434 // * {@icu} 435 // * @stable ICU 2.4 436 // */ 437 // public static final int KIND_TITLE = 4; 438 439 /** 440 * Returns a new instance of BreakIterator that locates word boundaries. 441 * This function assumes that the text being analyzed is in the default 442 * locale's language. 443 * @return An instance of BreakIterator that locates word boundaries. 444 * @stable ICU 2.0 445 */ 446 public static BreakIterator getWordInstance() 447 { 448 return getWordInstance(Locale.getDefault()); 449 } 450 451 /** 452 * Returns a new instance of BreakIterator that locates word boundaries. 453 * @param where A locale specifying the language of the text to be 454 * analyzed. 455 * @return An instance of BreakIterator that locates word boundaries. 456 * @stable ICU 2.0 457 */ 458 public static BreakIterator getWordInstance(Locale where) 459 { 460 return getBreakInstance(where, KIND_WORD); 461 } 462 463 /** 464 * {@icu} Returns a new instance of BreakIterator that locates word boundaries. 465 * @param where A locale specifying the language of the text to be 466 * analyzed. 467 * @return An instance of BreakIterator that locates word boundaries. 468 * @stable ICU 3.2 469 */ 470 public static BreakIterator getWordInstance(ULocale where) 471 { 472 return getBreakInstance(where.toLocale(), KIND_WORD); 473 } 474 475 /** 476 * Returns a new instance of BreakIterator that locates legal line- 477 * wrapping positions. This function assumes the text being broken 478 * is in the default locale's language. 479 * @return A new instance of BreakIterator that locates legal 480 * line-wrapping positions. 481 * @stable ICU 2.0 482 */ 483 public static BreakIterator getLineInstance() 484 { 485 return getLineInstance(Locale.getDefault()); 486 } 487 488 /** 489 * Returns a new instance of BreakIterator that locates legal line- 490 * wrapping positions. 491 * @param where A Locale specifying the language of the text being broken. 492 * @return A new instance of BreakIterator that locates legal 493 * line-wrapping positions. 494 * @stable ICU 2.0 495 */ 496 public static BreakIterator getLineInstance(Locale where) 497 { 498 return getBreakInstance(where, KIND_LINE); 499 } 500 501 /** 502 * {@icu} Returns a new instance of BreakIterator that locates legal line- 503 * wrapping positions. 504 * @param where A Locale specifying the language of the text being broken. 505 * @return A new instance of BreakIterator that locates legal 506 * line-wrapping positions. 507 * @stable ICU 3.2 508 */ 509 public static BreakIterator getLineInstance(ULocale where) 510 { 511 return getBreakInstance(where.toLocale(), KIND_LINE); 512 } 513 514 /** 515 * Returns a new instance of BreakIterator that locates logical-character 516 * boundaries. This function assumes that the text being analyzed is 517 * in the default locale's language. 518 * @return A new instance of BreakIterator that locates logical-character 519 * boundaries. 520 * @stable ICU 2.0 521 */ 522 public static BreakIterator getCharacterInstance() 523 { 524 return getCharacterInstance(Locale.getDefault()); 525 } 526 527 /** 528 * Returns a new instance of BreakIterator that locates logical-character 529 * boundaries. 530 * @param where A Locale specifying the language of the text being analyzed. 531 * @return A new instance of BreakIterator that locates logical-character 532 * boundaries. 533 * @stable ICU 2.0 534 */ 535 public static BreakIterator getCharacterInstance(Locale where) 536 { 537 return getBreakInstance(where, KIND_CHARACTER); 538 } 539 540 /** 541 * {@icu} Returns a new instance of BreakIterator that locates logical-character 542 * boundaries. 543 * @param where A Locale specifying the language of the text being analyzed. 544 * @return A new instance of BreakIterator that locates logical-character 545 * boundaries. 546 * @stable ICU 3.2 547 */ 548 public static BreakIterator getCharacterInstance(ULocale where) 549 { 550 return getBreakInstance(where.toLocale(), KIND_CHARACTER); 551 } 552 553 /** 554 * Returns a new instance of BreakIterator that locates sentence boundaries. 555 * This function assumes the text being analyzed is in the default locale's 556 * language. 557 * @return A new instance of BreakIterator that locates sentence boundaries. 558 * @stable ICU 2.0 559 */ 560 public static BreakIterator getSentenceInstance() 561 { 562 return getSentenceInstance(Locale.getDefault()); 563 } 564 565 /** 566 * Returns a new instance of BreakIterator that locates sentence boundaries. 567 * @param where A Locale specifying the language of the text being analyzed. 568 * @return A new instance of BreakIterator that locates sentence boundaries. 569 * @stable ICU 2.0 570 */ 571 public static BreakIterator getSentenceInstance(Locale where) 572 { 573 return getBreakInstance(where, KIND_SENTENCE); 574 } 575 576 /** 577 * {@icu} Returns a new instance of BreakIterator that locates sentence boundaries. 578 * @param where A Locale specifying the language of the text being analyzed. 579 * @return A new instance of BreakIterator that locates sentence boundaries. 580 * @stable ICU 3.2 581 */ 582 public static BreakIterator getSentenceInstance(ULocale where) 583 { 584 return getBreakInstance(where.toLocale(), KIND_SENTENCE); 585 } 586 587 // /** 588 // * {@icu} Returns a new instance of BreakIterator that locates title boundaries. 589 // * This function assumes the text being analyzed is in the default locale's 590 // * language. The iterator returned locates title boundaries as described for 591 // * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, 592 // * please use a word boundary iterator. {@link #getWordInstance} 593 // * @return A new instance of BreakIterator that locates title boundaries. 594 // * @stable ICU 2.0 595 // */ 596 // public static BreakIterator getTitleInstance() 597 // { 598 // return getTitleInstance(Locale.getDefault()); 599 // } 600 601 // /** 602 // * {@icu} Returns a new instance of BreakIterator that locates title boundaries. 603 // * The iterator returned locates title boundaries as described for 604 // * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, 605 // * please use Word Boundary iterator.{@link #getWordInstance} 606 // * @param where A Locale specifying the language of the text being analyzed. 607 // * @return A new instance of BreakIterator that locates title boundaries. 608 // * @stable ICU 2.0 609 // */ 610 // public static BreakIterator getTitleInstance(Locale where) 611 // { 612 // return getBreakInstance(where, KIND_TITLE); 613 // } 614 615 // /** 616 // * {@icu} Returns a new instance of BreakIterator that locates title boundaries. 617 // * The iterator returned locates title boundaries as described for 618 // * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, 619 // * please use Word Boundary iterator.{@link #getWordInstance} 620 // * @param where A Locale specifying the language of the text being analyzed. 621 // * @return A new instance of BreakIterator that locates title boundaries. 622 // * @stable ICU 3.2 623 //s */ 624 // public static BreakIterator getTitleInstance(ULocale where) 625 // { 626 // return getBreakInstance(where.toLocale(), KIND_TITLE); 627 // } 628 629 // /** 630 // * {@icu} Registers a new break iterator of the indicated kind, to use in the given 631 // * locale. Clones of the iterator will be returned if a request for a break iterator 632 // * of the given kind matches or falls back to this locale. 633 // * @param iter the BreakIterator instance to adopt. 634 // * @param locale the Locale for which this instance is to be registered 635 // * @param kind the type of iterator for which this instance is to be registered 636 // * @return a registry key that can be used to unregister this instance 637 // * @stable ICU 2.4 638 // */ 639 // public static Object registerInstance(BreakIterator iter, Locale locale, int kind) { 640 // throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); 641 // } 642 643 // /** 644 // * {@icu} Registers a new break iterator of the indicated kind, to use in the given 645 // * locale. Clones of the iterator will be returned if a request for a break iterator 646 // * of the given kind matches or falls back to this locale. 647 // * @param iter the BreakIterator instance to adopt. 648 // * @param locale the Locale for which this instance is to be registered 649 // * @param kind the type of iterator for which this instance is to be registered 650 // * @return a registry key that can be used to unregister this instance 651 // * @stable ICU 3.2 652 // */ 653 // public static Object registerInstance(BreakIterator iter, ULocale locale, int kind) { 654 // throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); 655 // } 656 657 // /** 658 // * {@icu} Unregisters a previously-registered BreakIterator using the key returned 659 // * from the register call. Key becomes invalid after this call and should not be used 660 // * again. 661 // * @param key the registry key returned by a previous call to registerInstance 662 // * @return true if the iterator for the key was successfully unregistered 663 // * @stable ICU 2.4 664 // */ 665 // public static boolean unregister(Object key) { 666 // throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); 667 // } 668 669 // end of registration 670 671 /** 672 * Returns a particular kind of BreakIterator for a locale. 673 * Avoids writing a switch statement with getXYZInstance(where) calls. 674 * @internal 675 * @deprecated This API is ICU internal only. 676 */ 677 public static BreakIterator getBreakInstance(ULocale where, int kind) { 678 return getBreakInstance(where.toLocale(), KIND_SENTENCE); 679 } 680 681 private static BreakIterator getBreakInstance(Locale where, int kind) { 682 java.text.BreakIterator br = null; 683 switch(kind) { 684 case KIND_CHARACTER: br = java.text.BreakIterator.getCharacterInstance(where); break; 685 case KIND_WORD: br = java.text.BreakIterator.getWordInstance(where); break; 686 case KIND_LINE: br = java.text.BreakIterator.getLineInstance(where); break; 687 case KIND_SENTENCE: br = java.text.BreakIterator.getSentenceInstance(where); break; 688 // case KIND_TITLE: throw new UnsupportedOperationException("Title break is not supported by com.ibm.icu.base"); 689 } 690 return new BreakIteratorHandle(br); 691 } 692 693 /** 694 * Returns a list of locales for which BreakIterators can be used. 695 * @return An array of Locales. All of the locales in the array can 696 * be used when creating a BreakIterator. 697 * @stable ICU 2.6 698 */ 699 public static synchronized Locale[] getAvailableLocales() { 700 return java.text.BreakIterator.getAvailableLocales(); 701 } 702 703 /** 704 * {@icu} Returns a list of locales for which BreakIterators can be used. 705 * @return An array of Locales. All of the locales in the array can 706 * be used when creating a BreakIterator. 707 * @draft ICU 3.2 (retain) 708 * @provisional This API might change or be removed in a future release. 709 */ 710 public static synchronized ULocale[] getAvailableULocales() { 711 Locale[] locales = java.text.BreakIterator.getAvailableLocales(); 712 ULocale[] ulocales = new ULocale[locales.length]; 713 for (int i = 0; i < locales.length; ++i) { 714 ulocales[i] = ULocale.forLocale(locales[i]); 715 } 716 return ulocales; 717 } 718 719 // /** 720 // * {@icu} Returns the locale that was used to create this object, or null. 721 // * This may may differ from the locale requested at the time of 722 // * this object's creation. For example, if an object is created 723 // * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be 724 // * drawn from <tt>en</tt> (the <i>actual</i> locale), and 725 // * <tt>en_US</tt> may be the most specific locale that exists (the 726 // * <i>valid</i> locale). 727 // * 728 // * <p>Note: The <i>actual</i> locale is returned correctly, but the <i>valid</i> 729 // * locale is not, in most cases. 730 // * @param type type of information requested, either {@link 731 // * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link 732 // * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}. 733 // * @return the information specified by <i>type</i>, or null if 734 // * this object was not constructed from locale data. 735 // * @see com.ibm.icu.util.ULocale 736 // * @see com.ibm.icu.util.ULocale#VALID_LOCALE 737 // * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE 738 // * @draft ICU 2.8 (retain) 739 // * @provisional This API might change or be removed in a future release. 740 // */ 741 // public final ULocale getLocale(ULocale.Type type) { 742 // throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); 743 // } 744 745 // forwarding implementation class 746 static final class BreakIteratorHandle extends BreakIterator { 747 /** 748 * @internal 749 */ 750 public final java.text.BreakIterator breakIterator; 751 752 /** 753 * @internal 754 * @param delegate the BreakIterator to which to delegate 755 */ 756 public BreakIteratorHandle(java.text.BreakIterator delegate) { 757 this.breakIterator = delegate; 758 } 759 760 public int first() { 761 return breakIterator.first(); 762 } 763 public int last() { 764 return breakIterator.last(); 765 } 766 public int next(int n) { 767 return breakIterator.next(n); 768 } 769 public int next() { 770 return breakIterator.next(); 771 } 772 public int previous() { 773 return breakIterator.previous(); 774 } 775 public int following(int offset) { 776 return breakIterator.following(offset); 777 } 778 public int preceding(int offset) { 779 return breakIterator.preceding(offset); 780 } 781 public boolean isBoundary(int offset) { 782 return breakIterator.isBoundary(offset); 783 } 784 public int current() { 785 return breakIterator.current(); 786 } 787 public CharacterIterator getText() { 788 return breakIterator.getText(); 789 } 790 public void setText(CharacterIterator newText) { 791 breakIterator.setText(newText); 792 } 793 794 /** 795 * Return a string suitable for debugging. 796 * @return a string suitable for debugging 797 * @stable ICU 3.4.3 798 */ 799 public String toString() { 800 return breakIterator.toString(); 801 } 802 803 /** 804 * Return a clone of this BreakIterator. 805 * @return a clone of this BreakIterator 806 * @stable ICU 3.4.3 807 */ 808 public Object clone() { 809 return new BreakIteratorHandle((java.text.BreakIterator)breakIterator.clone()); 810 } 811 812 /** 813 * Return true if rhs is a BreakIterator with the same break behavior as this. 814 * @return true if rhs equals this 815 * @stable ICU 3.4.3 816 */ 817 public boolean equals(Object rhs) { 818 try { 819 return breakIterator.equals(((BreakIteratorHandle)rhs).breakIterator); 820 } 821 catch (Exception e) { 822 return false; 823 } 824 } 825 826 /** 827 * Return a hashCode. 828 * @return a hashCode 829 * @stable ICU 3.4.3 830 */ 831 public int hashCode() { 832 return breakIterator.hashCode(); 833 } 834 } 835 } 836