Home | History | Annotate | Download | only in text
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 1996-2012, International Business Machines Corporation and    *
      6  * others. All Rights Reserved.                                                *
      7  *******************************************************************************
      8  */
      9 
     10 package com.ibm.icu.text;
     11 
     12 import java.text.CharacterIterator;
     13 import java.text.StringCharacterIterator;
     14 import java.util.Locale;
     15 
     16 import com.ibm.icu.util.ULocale;
     17 
     18 /**
     19  * {@icuenhanced java.text.BreakIterator}.{@icu _usage_}
     20  *
     21  * <p>A class that locates boundaries in text.  This class defines a protocol for
     22  * objects that break up a piece of natural-language text according to a set
     23  * of criteria.  Instances or subclasses of BreakIterator can be provided, for
     24  * example, to break a piece of text into words, sentences, or logical characters
     25  * according to the conventions of some language or group of languages.
     26  *
     27  * We provide five built-in types of BreakIterator:
     28  * <ul><li>getTitleInstance() returns a BreakIterator that locates boundaries
     29  * between title breaks.
     30  * <li>getSentenceInstance() returns a BreakIterator that locates boundaries
     31  * between sentences.  This is useful for triple-click selection, for example.
     32  * <li>getWordInstance() returns a BreakIterator that locates boundaries between
     33  * words.  This is useful for double-click selection or "find whole words" searches.
     34  * This type of BreakIterator makes sure there is a boundary position at the
     35  * beginning and end of each legal word.  (Numbers count as words, too.)  Whitespace
     36  * and punctuation are kept separate from real words.
     37  * <li>getLineInstance() returns a BreakIterator that locates positions where it is
     38  * legal for a text editor to wrap lines.  This is similar to word breaking, but
     39  * not the same: punctuation and whitespace are generally kept with words (you don't
     40  * want a line to start with whitespace, for example), and some special characters
     41  * can force a position to be considered a line-break position or prevent a position
     42  * from being a line-break position.
     43  * <li>getCharacterInstance() returns a BreakIterator that locates boundaries between
     44  * logical characters.  Because of the structure of the Unicode encoding, a logical
     45  * character may be stored internally as more than one Unicode code point.  (A with an
     46  * umlaut may be stored as an a followed by a separate combining umlaut character,
     47  * for example, but the user still thinks of it as one character.)  This iterator allows
     48  * various processes (especially text editors) to treat as characters the units of text
     49  * that a user would think of as characters, rather than the units of text that the
     50  * computer sees as "characters".</ul>
     51  *
     52  * BreakIterator's interface follows an "iterator" model (hence the name), meaning it
     53  * has a concept of a "current position" and methods like first(), last(), next(),
     54  * and previous() that update the current position.  All BreakIterators uphold the
     55  * following invariants:
     56  * <ul><li>The beginning and end of the text are always treated as boundary positions.
     57  * <li>The current position of the iterator is always a boundary position (random-
     58  * access methods move the iterator to the nearest boundary position before or
     59  * after the specified position, not _to_ the specified position).
     60  * <li>DONE is used as a flag to indicate when iteration has stopped.  DONE is only
     61  * returned when the current position is the end of the text and the user calls next(),
     62  * or when the current position is the beginning of the text and the user calls
     63  * previous().
     64  * <li>Break positions are numbered by the positions of the characters that follow
     65  * them.  Thus, under normal circumstances, the position before the first character
     66  * is 0, the position after the first character is 1, and the position after the
     67  * last character is 1 plus the length of the string.
     68  * <li>The client can change the position of an iterator, or the text it analyzes,
     69  * at will, but cannot change the behavior.  If the user wants different behavior, he
     70  * must instantiate a new iterator.</ul>
     71  *
     72  * BreakIterator accesses the text it analyzes through a CharacterIterator, which makes
     73  * it possible to use BreakIterator to analyze text in any text-storage vehicle that
     74  * provides a CharacterIterator interface.
     75  *
     76  * <b>Note:</b>  Some types of BreakIterator can take a long time to create, and
     77  * instances of BreakIterator are not currently cached by the system.  For
     78  * optimal performance, keep instances of BreakIterator around as long as makes
     79  * sense.  For example, when word-wrapping a document, don't create and destroy a
     80  * new BreakIterator for each line.  Create one break iterator for the whole document
     81  * (or whatever stretch of text you're wrapping) and use it to do the whole job of
     82  * wrapping the text.
     83  *
     84   * <P>
     85  * <strong>Examples</strong>:<P>
     86  * Creating and using text boundaries
     87  * <blockquote>
     88  * <pre>
     89  * public static void main(String args[]) {
     90  *      if (args.length == 1) {
     91  *          String stringToExamine = args[0];
     92  *          //print each word in order
     93  *          BreakIterator boundary = BreakIterator.getWordInstance();
     94  *          boundary.setText(stringToExamine);
     95  *          printEachForward(boundary, stringToExamine);
     96  *          //print each sentence in reverse order
     97  *          boundary = BreakIterator.getSentenceInstance(Locale.US);
     98  *          boundary.setText(stringToExamine);
     99  *          printEachBackward(boundary, stringToExamine);
    100  *          printFirst(boundary, stringToExamine);
    101  *          printLast(boundary, stringToExamine);
    102  *      }
    103  * }
    104  * </pre>
    105  * </blockquote>
    106  *
    107  * Print each element in order
    108  * <blockquote>
    109  * <pre>
    110  * public static void printEachForward(BreakIterator boundary, String source) {
    111  *     int start = boundary.first();
    112  *     for (int end = boundary.next();
    113  *          end != BreakIterator.DONE;
    114  *          start = end, end = boundary.next()) {
    115  *          System.out.println(source.substring(start,end));
    116  *     }
    117  * }
    118  * </pre>
    119  * </blockquote>
    120  *
    121  * Print each element in reverse order
    122  * <blockquote>
    123  * <pre>
    124  * public static void printEachBackward(BreakIterator boundary, String source) {
    125  *     int end = boundary.last();
    126  *     for (int start = boundary.previous();
    127  *          start != BreakIterator.DONE;
    128  *          end = start, start = boundary.previous()) {
    129  *         System.out.println(source.substring(start,end));
    130  *     }
    131  * }
    132  * </pre>
    133  * </blockquote>
    134  *
    135  * Print first element
    136  * <blockquote>
    137  * <pre>
    138  * public static void printFirst(BreakIterator boundary, String source) {
    139  *     int start = boundary.first();
    140  *     int end = boundary.next();
    141  *     System.out.println(source.substring(start,end));
    142  * }
    143  * </pre>
    144  * </blockquote>
    145  *
    146  * Print last element
    147  * <blockquote>
    148  * <pre>
    149  * public static void printLast(BreakIterator boundary, String source) {
    150  *     int end = boundary.last();
    151  *     int start = boundary.previous();
    152  *     System.out.println(source.substring(start,end));
    153  * }
    154  * </pre>
    155  * </blockquote>
    156  *
    157  * Print the element at a specified position
    158  * <blockquote>
    159  * <pre>
    160  * public static void printAt(BreakIterator boundary, int pos, String source) {
    161  *     int end = boundary.following(pos);
    162  *     int start = boundary.previous();
    163  *     System.out.println(source.substring(start,end));
    164  * }
    165  * </pre>
    166  * </blockquote>
    167  *
    168  * Find the next word
    169  * <blockquote>
    170  * <pre>
    171  * public static int nextWordStartAfter(int pos, String text) {
    172  *     BreakIterator wb = BreakIterator.getWordInstance();
    173  *     wb.setText(text);
    174  *     int last = wb.following(pos);
    175  *     int current = wb.next();
    176  *     while (current != BreakIterator.DONE) {
    177  *         for (int p = last; p < current; p++) {
    178  *             if (Character.isLetter(text.charAt(p)))
    179  *                 return last;
    180  *         }
    181  *         last = current;
    182  *         current = wb.next();
    183  *     }
    184  *     return BreakIterator.DONE;
    185  * }
    186  * </pre>
    187  * (The iterator returned by BreakIterator.getWordInstance() is unique in that
    188  * the break positions it returns don't represent both the start and end of the
    189  * thing being iterated over.  That is, a sentence-break iterator returns breaks
    190  * that each represent the end of one sentence and the beginning of the next.
    191  * With the word-break iterator, the characters between two boundaries might be a
    192  * word, or they might be the punctuation or whitespace between two words.  The
    193  * above code uses a simple heuristic to determine which boundary is the beginning
    194  * of a word: If the characters between this boundary and the next boundary
    195  * include at least one letter (this can be an alphabetical letter, a CJK ideograph,
    196  * a Hangul syllable, a Kana character, etc.), then the text between this boundary
    197  * and the next is a word; otherwise, it's the material between words.)
    198  * </blockquote>
    199  *
    200  * @see CharacterIterator
    201  * @stable ICU 2.0
    202  *
    203  */
    204 
    205 public abstract class BreakIterator implements Cloneable
    206 {
    207 
    208     /**
    209      * Default constructor.  There is no state that is carried by this abstract
    210      * base class.
    211      * @stable ICU 2.0
    212      */
    213     protected BreakIterator()
    214     {
    215     }
    216 
    217     /**
    218      * Clone method.  Creates another BreakIterator with the same behavior and
    219      * current state as this one.
    220      * @return The clone.
    221      * @stable ICU 2.0
    222      */
    223     public Object clone()
    224     {
    225         try {
    226             return super.clone();
    227         }
    228         catch (CloneNotSupportedException e) {
    229             ///CLOVER:OFF
    230             throw new IllegalStateException();
    231             ///CLOVER:ON
    232         }
    233     }
    234 
    235     /**
    236      * DONE is returned by previous() and next() after all valid
    237      * boundaries have been returned.
    238      * @stable ICU 2.0
    239      */
    240     public static final int DONE = -1;
    241 
    242     /**
    243      * Return the first boundary position.  This is always the beginning
    244      * index of the text this iterator iterates over.  For example, if
    245      * the iterator iterates over a whole string, this function will
    246      * always return 0.  This function also updates the iteration position
    247      * to point to the beginning of the text.
    248      * @return The character offset of the beginning of the stretch of text
    249      * being broken.
    250      * @stable ICU 2.0
    251      */
    252     public abstract int first();
    253 
    254     /**
    255      * Return the last boundary position.  This is always the "past-the-end"
    256      * index of the text this iterator iterates over.  For example, if the
    257      * iterator iterates over a whole string (call it "text"), this function
    258      * will always return text.length().  This function also updated the
    259      * iteration position to point to the end of the text.
    260      * @return The character offset of the end of the stretch of text
    261      * being broken.
    262      * @stable ICU 2.0
    263      */
    264     public abstract int last();
    265 
    266     /**
    267      * Advances the specified number of steps forward in the text (a negative
    268      * number, therefore, advances backwards).  If this causes the iterator
    269      * to advance off either end of the text, this function returns DONE;
    270      * otherwise, this function returns the position of the appropriate
    271      * boundary.  Calling this function is equivalent to calling next() or
    272      * previous() n times.
    273      * @param n The number of boundaries to advance over (if positive, moves
    274      * forward; if negative, moves backwards).
    275      * @return The position of the boundary n boundaries from the current
    276      * iteration position, or DONE if moving n boundaries causes the iterator
    277      * to advance off either end of the text.
    278      * @stable ICU 2.0
    279      */
    280     public abstract int next(int n);
    281 
    282     /**
    283      * Advances the iterator forward one boundary.  The current iteration
    284      * position is updated to point to the next boundary position after the
    285      * current position, and this is also the value that is returned.  If
    286      * the current position is equal to the value returned by last(), or to
    287      * DONE, this function returns DONE and sets the current position to
    288      * DONE.
    289      * @return The position of the first boundary position following the
    290      * iteration position.
    291      * @stable ICU 2.0
    292      */
    293     public abstract int next();
    294 
    295     /**
    296      * Advances the iterator backward one boundary.  The current iteration
    297      * position is updated to point to the last boundary position before
    298      * the current position, and this is also the value that is returned.  If
    299      * the current position is equal to the value returned by first(), or to
    300      * DONE, this function returns DONE and sets the current position to
    301      * DONE.
    302      * @return The position of the last boundary position preceding the
    303      * iteration position.
    304      * @stable ICU 2.0
    305      */
    306     public abstract int previous();
    307 
    308     /**
    309      * Sets the iterator's current iteration position to be the first
    310      * boundary position following the specified position.  (Whether the
    311      * specified position is itself a boundary position or not doesn't
    312      * matter-- this function always moves the iteration position to the
    313      * first boundary after the specified position.)  If the specified
    314      * position is the past-the-end position, returns DONE.
    315      * @param offset The character position to start searching from.
    316      * @return The position of the first boundary position following
    317      * "offset" (whether or not "offset" itself is a boundary position),
    318      * or DONE if "offset" is the past-the-end offset.
    319      * @stable ICU 2.0
    320      */
    321     public abstract int following(int offset);
    322 
    323     /**
    324      * Sets the iterator's current iteration position to be the last
    325      * boundary position preceding the specified position.  (Whether the
    326      * specified position is itself a boundary position or not doesn't
    327      * matter-- this function always moves the iteration position to the
    328      * last boundary before the specified position.)  If the specified
    329      * position is the starting position, returns DONE.
    330      * @param offset The character position to start searching from.
    331      * @return The position of the last boundary position preceding
    332      * "offset" (whether of not "offset" itself is a boundary position),
    333      * or DONE if "offset" is the starting offset of the iterator.
    334      * @stable ICU 2.0
    335      */
    336     public int preceding(int offset) {
    337         // NOTE:  This implementation is here solely because we can't add new
    338         // abstract methods to an existing class.  There is almost ALWAYS a
    339         // better, faster way to do this.
    340         int pos = following(offset);
    341         while (pos >= offset && pos != DONE)
    342             pos = previous();
    343         return pos;
    344     }
    345 
    346     /**
    347      * Return true if the specfied position is a boundary position.  If the
    348      * function returns true, the current iteration position is set to the
    349      * specified position; if the function returns false, the current
    350      * iteration position is set as though following() had been called.
    351      * @param offset the offset to check.
    352      * @return True if "offset" is a boundary position.
    353      * @stable ICU 2.0
    354      */
    355     public boolean isBoundary(int offset) {
    356         // Again, this is the default implementation, which is provided solely because
    357         // we couldn't add a new abstract method to an existing class.  The real
    358         // implementations will usually need to do a little more work.
    359         if (offset == 0) {
    360             return true;
    361         }
    362         else
    363             return following(offset - 1) == offset;
    364     }
    365 
    366     /**
    367      * Return the iterator's current position.
    368      * @return The iterator's current position.
    369      * @stable ICU 2.0
    370      */
    371     public abstract int current();
    372 
    373     /**
    374      * Returns a CharacterIterator over the text being analyzed.
    375      * For at least some subclasses of BreakIterator, this is a reference
    376      * to the <b>actual iterator being used</b> by the BreakIterator,
    377      * and therefore, this function's return value should be treated as
    378      * <tt>const</tt>.  No guarantees are made about the current position
    379      * of this iterator when it is returned.  If you need to move that
    380      * position to examine the text, clone this function's return value first.
    381      * @return A CharacterIterator over the text being analyzed.
    382      * @stable ICU 2.0
    383      */
    384     public abstract CharacterIterator getText();
    385 
    386     /**
    387      * Sets the iterator to analyze a new piece of text.  The new
    388      * piece of text is passed in as a String, and the current
    389      * iteration position is reset to the beginning of the string.
    390      * (The old text is dropped.)
    391      * @param newText A String containing the text to analyze with
    392      * this BreakIterator.
    393      * @stable ICU 2.0
    394      */
    395     public void setText(String newText)
    396     {
    397         setText(new StringCharacterIterator(newText));
    398     }
    399 
    400     /**
    401      * Sets the iterator to analyze a new piece of text.  The
    402      * BreakIterator is passed a CharacterIterator through which
    403      * it will access the text itself.  The current iteration
    404      * position is reset to the CharacterIterator's start index.
    405      * (The old iterator is dropped.)
    406      * @param newText A CharacterIterator referring to the text
    407      * to analyze with this BreakIterator (the iterator's current
    408      * position is ignored, but its other state is significant).
    409      * @stable ICU 2.0
    410      */
    411     public abstract void setText(CharacterIterator newText);
    412 
    413     /**
    414      * {@icu}
    415      * @stable ICU 2.4
    416      */
    417     public static final int KIND_CHARACTER = 0;
    418     /**
    419      * {@icu}
    420      * @stable ICU 2.4
    421      */
    422     public static final int KIND_WORD = 1;
    423     /**
    424      * {@icu}
    425      * @stable ICU 2.4
    426      */
    427     public static final int KIND_LINE = 2;
    428     /**
    429      * {@icu}
    430      * @stable ICU 2.4
    431      */
    432     public static final int KIND_SENTENCE = 3;
    433 //    /**
    434 //     * {@icu}
    435 //     * @stable ICU 2.4
    436 //     */
    437 //    public static final int KIND_TITLE = 4;
    438 
    439     /**
    440      * Returns a new instance of BreakIterator that locates word boundaries.
    441      * This function assumes that the text being analyzed is in the default
    442      * locale's language.
    443      * @return An instance of BreakIterator that locates word boundaries.
    444      * @stable ICU 2.0
    445      */
    446     public static BreakIterator getWordInstance()
    447     {
    448         return getWordInstance(Locale.getDefault());
    449     }
    450 
    451     /**
    452      * Returns a new instance of BreakIterator that locates word boundaries.
    453      * @param where A locale specifying the language of the text to be
    454      * analyzed.
    455      * @return An instance of BreakIterator that locates word boundaries.
    456      * @stable ICU 2.0
    457      */
    458     public static BreakIterator getWordInstance(Locale where)
    459     {
    460         return getBreakInstance(where, KIND_WORD);
    461     }
    462 
    463     /**
    464      * {@icu} Returns a new instance of BreakIterator that locates word boundaries.
    465      * @param where A locale specifying the language of the text to be
    466      * analyzed.
    467      * @return An instance of BreakIterator that locates word boundaries.
    468      * @stable ICU 3.2
    469      */
    470     public static BreakIterator getWordInstance(ULocale where)
    471     {
    472         return getBreakInstance(where.toLocale(), KIND_WORD);
    473     }
    474 
    475     /**
    476      * Returns a new instance of BreakIterator that locates legal line-
    477      * wrapping positions.  This function assumes the text being broken
    478      * is in the default locale's language.
    479      * @return A new instance of BreakIterator that locates legal
    480      * line-wrapping positions.
    481      * @stable ICU 2.0
    482      */
    483     public static BreakIterator getLineInstance()
    484     {
    485         return getLineInstance(Locale.getDefault());
    486     }
    487 
    488     /**
    489      * Returns a new instance of BreakIterator that locates legal line-
    490      * wrapping positions.
    491      * @param where A Locale specifying the language of the text being broken.
    492      * @return A new instance of BreakIterator that locates legal
    493      * line-wrapping positions.
    494      * @stable ICU 2.0
    495      */
    496     public static BreakIterator getLineInstance(Locale where)
    497     {
    498         return getBreakInstance(where, KIND_LINE);
    499     }
    500 
    501     /**
    502      * {@icu} Returns a new instance of BreakIterator that locates legal line-
    503      * wrapping positions.
    504      * @param where A Locale specifying the language of the text being broken.
    505      * @return A new instance of BreakIterator that locates legal
    506      * line-wrapping positions.
    507      * @stable ICU 3.2
    508      */
    509     public static BreakIterator getLineInstance(ULocale where)
    510     {
    511         return getBreakInstance(where.toLocale(), KIND_LINE);
    512     }
    513 
    514     /**
    515      * Returns a new instance of BreakIterator that locates logical-character
    516      * boundaries.  This function assumes that the text being analyzed is
    517      * in the default locale's language.
    518      * @return A new instance of BreakIterator that locates logical-character
    519      * boundaries.
    520      * @stable ICU 2.0
    521      */
    522     public static BreakIterator getCharacterInstance()
    523     {
    524         return getCharacterInstance(Locale.getDefault());
    525     }
    526 
    527     /**
    528      * Returns a new instance of BreakIterator that locates logical-character
    529      * boundaries.
    530      * @param where A Locale specifying the language of the text being analyzed.
    531      * @return A new instance of BreakIterator that locates logical-character
    532      * boundaries.
    533      * @stable ICU 2.0
    534      */
    535     public static BreakIterator getCharacterInstance(Locale where)
    536     {
    537         return getBreakInstance(where, KIND_CHARACTER);
    538     }
    539 
    540     /**
    541      * {@icu} Returns a new instance of BreakIterator that locates logical-character
    542      * boundaries.
    543      * @param where A Locale specifying the language of the text being analyzed.
    544      * @return A new instance of BreakIterator that locates logical-character
    545      * boundaries.
    546      * @stable ICU 3.2
    547      */
    548     public static BreakIterator getCharacterInstance(ULocale where)
    549     {
    550         return getBreakInstance(where.toLocale(), KIND_CHARACTER);
    551     }
    552 
    553     /**
    554      * Returns a new instance of BreakIterator that locates sentence boundaries.
    555      * This function assumes the text being analyzed is in the default locale's
    556      * language.
    557      * @return A new instance of BreakIterator that locates sentence boundaries.
    558      * @stable ICU 2.0
    559      */
    560     public static BreakIterator getSentenceInstance()
    561     {
    562         return getSentenceInstance(Locale.getDefault());
    563     }
    564 
    565     /**
    566      * Returns a new instance of BreakIterator that locates sentence boundaries.
    567      * @param where A Locale specifying the language of the text being analyzed.
    568      * @return A new instance of BreakIterator that locates sentence boundaries.
    569      * @stable ICU 2.0
    570      */
    571     public static BreakIterator getSentenceInstance(Locale where)
    572     {
    573         return getBreakInstance(where, KIND_SENTENCE);
    574     }
    575 
    576     /**
    577      * {@icu} Returns a new instance of BreakIterator that locates sentence boundaries.
    578      * @param where A Locale specifying the language of the text being analyzed.
    579      * @return A new instance of BreakIterator that locates sentence boundaries.
    580      * @stable ICU 3.2
    581      */
    582     public static BreakIterator getSentenceInstance(ULocale where)
    583     {
    584         return getBreakInstance(where.toLocale(), KIND_SENTENCE);
    585     }
    586 
    587 //    /**
    588 //     * {@icu} Returns a new instance of BreakIterator that locates title boundaries.
    589 //     * This function assumes the text being analyzed is in the default locale's
    590 //     * language. The iterator returned locates title boundaries as described for
    591 //     * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
    592 //     * please use a word boundary iterator. {@link #getWordInstance}
    593 //     * @return A new instance of BreakIterator that locates title boundaries.
    594 //     * @stable ICU 2.0
    595 //     */
    596 //    public static BreakIterator getTitleInstance()
    597 //    {
    598 //        return getTitleInstance(Locale.getDefault());
    599 //    }
    600 
    601 //    /**
    602 //     * {@icu} Returns a new instance of BreakIterator that locates title boundaries.
    603 //     * The iterator returned locates title boundaries as described for
    604 //     * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
    605 //     * please use Word Boundary iterator.{@link #getWordInstance}
    606 //     * @param where A Locale specifying the language of the text being analyzed.
    607 //     * @return A new instance of BreakIterator that locates title boundaries.
    608 //     * @stable ICU 2.0
    609 //     */
    610 //    public static BreakIterator getTitleInstance(Locale where)
    611 //    {
    612 //        return getBreakInstance(where, KIND_TITLE);
    613 //    }
    614 
    615 //    /**
    616 //     * {@icu} Returns a new instance of BreakIterator that locates title boundaries.
    617 //     * The iterator returned locates title boundaries as described for
    618 //     * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
    619 //     * please use Word Boundary iterator.{@link #getWordInstance}
    620 //     * @param where A Locale specifying the language of the text being analyzed.
    621 //     * @return A new instance of BreakIterator that locates title boundaries.
    622 //     * @stable ICU 3.2
    623 //s     */
    624 //    public static BreakIterator getTitleInstance(ULocale where)
    625 //    {
    626 //        return getBreakInstance(where.toLocale(), KIND_TITLE);
    627 //    }
    628 
    629 //    /**
    630 //     * {@icu} Registers a new break iterator of the indicated kind, to use in the given
    631 //     * locale.  Clones of the iterator will be returned if a request for a break iterator
    632 //     * of the given kind matches or falls back to this locale.
    633 //     * @param iter the BreakIterator instance to adopt.
    634 //     * @param locale the Locale for which this instance is to be registered
    635 //     * @param kind the type of iterator for which this instance is to be registered
    636 //     * @return a registry key that can be used to unregister this instance
    637 //     * @stable ICU 2.4
    638 //     */
    639 //    public static Object registerInstance(BreakIterator iter, Locale locale, int kind) {
    640 //        throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
    641 //    }
    642 
    643 //    /**
    644 //     * {@icu} Registers a new break iterator of the indicated kind, to use in the given
    645 //     * locale.  Clones of the iterator will be returned if a request for a break iterator
    646 //     * of the given kind matches or falls back to this locale.
    647 //     * @param iter the BreakIterator instance to adopt.
    648 //     * @param locale the Locale for which this instance is to be registered
    649 //     * @param kind the type of iterator for which this instance is to be registered
    650 //     * @return a registry key that can be used to unregister this instance
    651 //     * @stable ICU 3.2
    652 //     */
    653 //    public static Object registerInstance(BreakIterator iter, ULocale locale, int kind) {
    654 //        throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
    655 //    }
    656 
    657 //    /**
    658 //     * {@icu} Unregisters a previously-registered BreakIterator using the key returned
    659 //     * from the register call.  Key becomes invalid after this call and should not be used
    660 //     * again.
    661 //     * @param key the registry key returned by a previous call to registerInstance
    662 //     * @return true if the iterator for the key was successfully unregistered
    663 //     * @stable ICU 2.4
    664 //     */
    665 //    public static boolean unregister(Object key) {
    666 //        throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
    667 //    }
    668 
    669     // end of registration
    670 
    671     /**
    672      * Returns a particular kind of BreakIterator for a locale.
    673      * Avoids writing a switch statement with getXYZInstance(where) calls.
    674      * @internal
    675      * @deprecated This API is ICU internal only.
    676      */
    677     public static BreakIterator getBreakInstance(ULocale where, int kind) {
    678         return getBreakInstance(where.toLocale(), KIND_SENTENCE);
    679     }
    680 
    681     private static BreakIterator getBreakInstance(Locale where, int kind) {
    682         java.text.BreakIterator br = null;
    683         switch(kind) {
    684         case KIND_CHARACTER: br = java.text.BreakIterator.getCharacterInstance(where); break;
    685         case KIND_WORD: br = java.text.BreakIterator.getWordInstance(where); break;
    686         case KIND_LINE: br = java.text.BreakIterator.getLineInstance(where); break;
    687         case KIND_SENTENCE: br = java.text.BreakIterator.getSentenceInstance(where); break;
    688 //        case KIND_TITLE: throw new UnsupportedOperationException("Title break is not supported by com.ibm.icu.base");
    689         }
    690         return new BreakIteratorHandle(br);
    691     }
    692 
    693     /**
    694      * Returns a list of locales for which BreakIterators can be used.
    695      * @return An array of Locales.  All of the locales in the array can
    696      * be used when creating a BreakIterator.
    697      * @stable ICU 2.6
    698      */
    699     public static synchronized Locale[] getAvailableLocales() {
    700         return java.text.BreakIterator.getAvailableLocales();
    701     }
    702 
    703     /**
    704      * {@icu} Returns a list of locales for which BreakIterators can be used.
    705      * @return An array of Locales.  All of the locales in the array can
    706      * be used when creating a BreakIterator.
    707      * @draft ICU 3.2 (retain)
    708      * @provisional This API might change or be removed in a future release.
    709      */
    710     public static synchronized ULocale[] getAvailableULocales() {
    711         Locale[] locales = java.text.BreakIterator.getAvailableLocales();
    712         ULocale[] ulocales = new ULocale[locales.length];
    713         for (int i = 0; i < locales.length; ++i) {
    714             ulocales[i] = ULocale.forLocale(locales[i]);
    715         }
    716         return ulocales;
    717     }
    718 
    719 //    /**
    720 //     * {@icu} Returns the locale that was used to create this object, or null.
    721 //     * This may may differ from the locale requested at the time of
    722 //     * this object's creation.  For example, if an object is created
    723 //     * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
    724 //     * drawn from <tt>en</tt> (the <i>actual</i> locale), and
    725 //     * <tt>en_US</tt> may be the most specific locale that exists (the
    726 //     * <i>valid</i> locale).
    727 //     *
    728 //     * <p>Note: The <i>actual</i> locale is returned correctly, but the <i>valid</i>
    729 //     * locale is not, in most cases.
    730 //     * @param type type of information requested, either {@link
    731 //     * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
    732 //     * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
    733 //     * @return the information specified by <i>type</i>, or null if
    734 //     * this object was not constructed from locale data.
    735 //     * @see com.ibm.icu.util.ULocale
    736 //     * @see com.ibm.icu.util.ULocale#VALID_LOCALE
    737 //     * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
    738 //     * @draft ICU 2.8 (retain)
    739 //     * @provisional This API might change or be removed in a future release.
    740 //     */
    741 //    public final ULocale getLocale(ULocale.Type type) {
    742 //        throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
    743 //    }
    744 
    745     // forwarding implementation class
    746     static final class BreakIteratorHandle extends BreakIterator {
    747         /**
    748          * @internal
    749          */
    750         public final java.text.BreakIterator breakIterator;
    751 
    752         /**
    753          * @internal
    754          * @param delegate the BreakIterator to which to delegate
    755          */
    756         public BreakIteratorHandle(java.text.BreakIterator delegate) {
    757             this.breakIterator = delegate;
    758         }
    759 
    760         public int first() {
    761             return breakIterator.first();
    762         }
    763         public int last() {
    764             return breakIterator.last();
    765         }
    766         public int next(int n) {
    767             return breakIterator.next(n);
    768         }
    769         public int next() {
    770             return breakIterator.next();
    771         }
    772         public int previous() {
    773             return breakIterator.previous();
    774         }
    775         public int following(int offset) {
    776             return breakIterator.following(offset);
    777         }
    778         public int preceding(int offset) {
    779             return breakIterator.preceding(offset);
    780         }
    781         public boolean isBoundary(int offset) {
    782             return breakIterator.isBoundary(offset);
    783         }
    784         public int current() {
    785             return breakIterator.current();
    786         }
    787         public CharacterIterator getText() {
    788             return breakIterator.getText();
    789         }
    790         public void setText(CharacterIterator newText) {
    791             breakIterator.setText(newText);
    792         }
    793 
    794         /**
    795          * Return a string suitable for debugging.
    796          * @return a string suitable for debugging
    797          * @stable ICU 3.4.3
    798          */
    799         public String toString() {
    800             return breakIterator.toString();
    801         }
    802 
    803         /**
    804          * Return a clone of this BreakIterator.
    805          * @return a clone of this BreakIterator
    806          * @stable ICU 3.4.3
    807          */
    808         public Object clone() {
    809             return new BreakIteratorHandle((java.text.BreakIterator)breakIterator.clone());
    810         }
    811 
    812         /**
    813          * Return true if rhs is a BreakIterator with the same break behavior as this.
    814          * @return true if rhs equals this
    815          * @stable ICU 3.4.3
    816          */
    817         public boolean equals(Object rhs) {
    818             try {
    819                 return breakIterator.equals(((BreakIteratorHandle)rhs).breakIterator);
    820             }
    821             catch (Exception e) {
    822                 return false;
    823             }
    824         }
    825 
    826         /**
    827          * Return a hashCode.
    828          * @return a hashCode
    829          * @stable ICU 3.4.3
    830          */
    831         public int hashCode() {
    832             return breakIterator.hashCode();
    833         }
    834     }
    835 }
    836