Home | History | Annotate | Download | only in unicode
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 * Copyright (C) 1996-2015, International Business Machines Corporation and others.
      6 * All Rights Reserved.
      7 ******************************************************************************
      8 */
      9 
     10 #ifndef UBRK_H
     11 #define UBRK_H
     12 
     13 #include "unicode/utypes.h"
     14 #include "unicode/uloc.h"
     15 #include "unicode/utext.h"
     16 #include "unicode/localpointer.h"
     17 
     18 /**
     19  * A text-break iterator.
     20  *  For usage in C programs.
     21  */
     22 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
     23 #   define UBRK_TYPEDEF_UBREAK_ITERATOR
     24     /**
     25      *  Opaque type representing an ICU Break iterator object.
     26      *  @stable ICU 2.0
     27      */
     28     typedef struct UBreakIterator UBreakIterator;
     29 #endif
     30 
     31 #if !UCONFIG_NO_BREAK_ITERATION
     32 
     33 #include "unicode/parseerr.h"
     34 
     35 /**
     36  * \file
     37  * \brief C API: BreakIterator
     38  *
     39  * <h2> BreakIterator C API </h2>
     40  *
     41  * The BreakIterator C API defines  methods for finding the location
     42  * of boundaries in text. Pointer to a UBreakIterator maintain a
     43  * current position and scan over text returning the index of characters
     44  * where boundaries occur.
     45  * <p>
     46  * Line boundary analysis determines where a text string can be broken
     47  * when line-wrapping. The mechanism correctly handles punctuation and
     48  * hyphenated words.
     49  * <p>
     50  * Note: The locale keyword "lb" can be used to modify line break
     51  * behavior according to the CSS level 3 line-break options, see
     52  * <http://dev.w3.org/csswg/css-text/#line-breaking>. For example:
     53  * "ja@lb=strict", "zh@lb=loose".
     54  * <p>
     55  * Sentence boundary analysis allows selection with correct
     56  * interpretation of periods within numbers and abbreviations, and
     57  * trailing punctuation marks such as quotation marks and parentheses.
     58  * <p>
     59  * Note: The locale keyword "ss" can be used to enable use of
     60  * segmentation suppression data (preventing breaks in English after
     61  * abbreviations such as "Mr." or "Est.", for example), as follows:
     62  * "en@ss=standard".
     63  * <p>
     64  * Word boundary analysis is used by search and replace functions, as
     65  * well as within text editing applications that allow the user to
     66  * select words with a double click. Word selection provides correct
     67  * interpretation of punctuation marks within and following
     68  * words. Characters that are not part of a word, such as symbols or
     69  * punctuation marks, have word-breaks on both sides.
     70  * <p>
     71  * Character boundary analysis identifies the boundaries of
     72  * "Extended Grapheme Clusters", which are groupings of codepoints
     73  * that should be treated as character-like units for many text operations.
     74  * Please see Unicode Standard Annex #29, Unicode Text Segmentation,
     75  * http://www.unicode.org/reports/tr29/ for additional information
     76  * on grapheme clusters and guidelines on their use.
     77  * <p>
     78  * Title boundary analysis locates all positions,
     79  * typically starts of words, that should be set to Title Case
     80  * when title casing the text.
     81  * <p>
     82  * The text boundary positions are found according to the rules
     83  * described in Unicode Standard Annex #29, Text Boundaries, and
     84  * Unicode Standard Annex #14, Line Breaking Properties.  These
     85  * are available at http://www.unicode.org/reports/tr14/ and
     86  * http://www.unicode.org/reports/tr29/.
     87  * <p>
     88  * In addition to the plain C API defined in this header file, an
     89  * object oriented C++ API with equivalent functionality is defined in the
     90  * file brkiter.h.
     91  * <p>
     92  * Code snippets illustrating the use of the Break Iterator APIs
     93  * are available in the ICU User Guide,
     94  * http://icu-project.org/userguide/boundaryAnalysis.html
     95  * and in the sample program icu/source/samples/break/break.cpp
     96  */
     97 
     98 /** The possible types of text boundaries.  @stable ICU 2.0 */
     99 typedef enum UBreakIteratorType {
    100   /** Character breaks  @stable ICU 2.0 */
    101   UBRK_CHARACTER = 0,
    102   /** Word breaks @stable ICU 2.0 */
    103   UBRK_WORD = 1,
    104   /** Line breaks @stable ICU 2.0 */
    105   UBRK_LINE = 2,
    106   /** Sentence breaks @stable ICU 2.0 */
    107   UBRK_SENTENCE = 3,
    108 
    109 #ifndef U_HIDE_DEPRECATED_API
    110   /**
    111    * Title Case breaks
    112    * The iterator created using this type locates title boundaries as described for
    113    * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
    114    * please use Word Boundary iterator.
    115    *
    116    * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
    117    */
    118   UBRK_TITLE = 4,
    119     /**
    120      * One more than the highest normal UBreakIteratorType value.
    121      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    122      */
    123     UBRK_COUNT = 5
    124 #endif  // U_HIDE_DEPRECATED_API
    125 } UBreakIteratorType;
    126 
    127 /** Value indicating all text boundaries have been returned.
    128  *  @stable ICU 2.0
    129  */
    130 #define UBRK_DONE ((int32_t) -1)
    131 
    132 
    133 /**
    134  *  Enum constants for the word break tags returned by
    135  *  getRuleStatus().  A range of values is defined for each category of
    136  *  word, to allow for further subdivisions of a category in future releases.
    137  *  Applications should check for tag values falling within the range, rather
    138  *  than for single individual values.
    139  *
    140  * The numeric values of all of these constants are stable (will not change).
    141  *
    142  * @stable ICU 2.2
    143 */
    144 typedef enum UWordBreak {
    145     /** Tag value for "words" that do not fit into any of other categories.
    146      *  Includes spaces and most punctuation. */
    147     UBRK_WORD_NONE           = 0,
    148     /** Upper bound for tags for uncategorized words. */
    149     UBRK_WORD_NONE_LIMIT     = 100,
    150     /** Tag value for words that appear to be numbers, lower limit.    */
    151     UBRK_WORD_NUMBER         = 100,
    152     /** Tag value for words that appear to be numbers, upper limit.    */
    153     UBRK_WORD_NUMBER_LIMIT   = 200,
    154     /** Tag value for words that contain letters, excluding
    155      *  hiragana, katakana or ideographic characters, lower limit.    */
    156     UBRK_WORD_LETTER         = 200,
    157     /** Tag value for words containing letters, upper limit  */
    158     UBRK_WORD_LETTER_LIMIT   = 300,
    159     /** Tag value for words containing kana characters, lower limit */
    160     UBRK_WORD_KANA           = 300,
    161     /** Tag value for words containing kana characters, upper limit */
    162     UBRK_WORD_KANA_LIMIT     = 400,
    163     /** Tag value for words containing ideographic characters, lower limit */
    164     UBRK_WORD_IDEO           = 400,
    165     /** Tag value for words containing ideographic characters, upper limit */
    166     UBRK_WORD_IDEO_LIMIT     = 500
    167 } UWordBreak;
    168 
    169 /**
    170  *  Enum constants for the line break tags returned by getRuleStatus().
    171  *  A range of values is defined for each category of
    172  *  word, to allow for further subdivisions of a category in future releases.
    173  *  Applications should check for tag values falling within the range, rather
    174  *  than for single individual values.
    175  *
    176  * The numeric values of all of these constants are stable (will not change).
    177  *
    178  * @stable ICU 2.8
    179 */
    180 typedef enum ULineBreakTag {
    181     /** Tag value for soft line breaks, positions at which a line break
    182       *  is acceptable but not required                */
    183     UBRK_LINE_SOFT            = 0,
    184     /** Upper bound for soft line breaks.              */
    185     UBRK_LINE_SOFT_LIMIT      = 100,
    186     /** Tag value for a hard, or mandatory line break  */
    187     UBRK_LINE_HARD            = 100,
    188     /** Upper bound for hard line breaks.              */
    189     UBRK_LINE_HARD_LIMIT      = 200
    190 } ULineBreakTag;
    191 
    192 
    193 
    194 /**
    195  *  Enum constants for the sentence break tags returned by getRuleStatus().
    196  *  A range of values is defined for each category of
    197  *  sentence, to allow for further subdivisions of a category in future releases.
    198  *  Applications should check for tag values falling within the range, rather
    199  *  than for single individual values.
    200  *
    201  * The numeric values of all of these constants are stable (will not change).
    202  *
    203  * @stable ICU 2.8
    204 */
    205 typedef enum USentenceBreakTag {
    206     /** Tag value for for sentences  ending with a sentence terminator
    207       * ('.', '?', '!', etc.) character, possibly followed by a
    208       * hard separator (CR, LF, PS, etc.)
    209       */
    210     UBRK_SENTENCE_TERM       = 0,
    211     /** Upper bound for tags for sentences ended by sentence terminators.    */
    212     UBRK_SENTENCE_TERM_LIMIT = 100,
    213     /** Tag value for for sentences that do not contain an ending
    214       * sentence terminator ('.', '?', '!', etc.) character, but
    215       * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
    216       */
    217     UBRK_SENTENCE_SEP        = 100,
    218     /** Upper bound for tags for sentences ended by a separator.              */
    219     UBRK_SENTENCE_SEP_LIMIT  = 200
    220     /** Tag value for a hard, or mandatory line break  */
    221 } USentenceBreakTag;
    222 
    223 
    224 /**
    225  * Open a new UBreakIterator for locating text boundaries for a specified locale.
    226  * A UBreakIterator may be used for detecting character, line, word,
    227  * and sentence breaks in text.
    228  * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
    229  * UBRK_LINE, UBRK_SENTENCE
    230  * @param locale The locale specifying the text-breaking conventions. Note that
    231  * locale keys such as "lb" and "ss" may be used to modify text break behavior,
    232  * see general discussion of BreakIterator C API.
    233  * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
    234  *        used to specify the text to be iterated.
    235  * @param textLength The number of characters in text, or -1 if null-terminated.
    236  * @param status A UErrorCode to receive any errors.
    237  * @return A UBreakIterator for the specified locale.
    238  * @see ubrk_openRules
    239  * @stable ICU 2.0
    240  */
    241 U_STABLE UBreakIterator* U_EXPORT2
    242 ubrk_open(UBreakIteratorType type,
    243       const char *locale,
    244       const UChar *text,
    245       int32_t textLength,
    246       UErrorCode *status);
    247 
    248 /**
    249  * Open a new UBreakIterator for locating text boundaries using specified breaking rules.
    250  * The rule syntax is ... (TBD)
    251  * @param rules A set of rules specifying the text breaking conventions.
    252  * @param rulesLength The number of characters in rules, or -1 if null-terminated.
    253  * @param text The text to be iterated over.  May be null, in which case ubrk_setText() is
    254  *        used to specify the text to be iterated.
    255  * @param textLength The number of characters in text, or -1 if null-terminated.
    256  * @param parseErr   Receives position and context information for any syntax errors
    257  *                   detected while parsing the rules.
    258  * @param status A UErrorCode to receive any errors.
    259  * @return A UBreakIterator for the specified rules.
    260  * @see ubrk_open
    261  * @stable ICU 2.2
    262  */
    263 U_STABLE UBreakIterator* U_EXPORT2
    264 ubrk_openRules(const UChar     *rules,
    265                int32_t         rulesLength,
    266                const UChar     *text,
    267                int32_t          textLength,
    268                UParseError     *parseErr,
    269                UErrorCode      *status);
    270 
    271 #ifndef U_HIDE_DRAFT_API
    272 /**
    273  * Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
    274  * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
    275  * Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not
    276  * compatible across different major versions of ICU, nor across platforms of different
    277  * endianness or different base character set family (ASCII vs EBCDIC).
    278  * @param binaryRules A set of compiled binary rules specifying the text breaking
    279  *                    conventions. Ownership of the storage containing the compiled
    280  *                    rules remains with the caller of this function. The compiled
    281  *                    rules must not be modified or deleted during the life of the
    282  *                    break iterator.
    283  * @param rulesLength The length of binaryRules in bytes; must be >= 0.
    284  * @param text        The text to be iterated over.  May be null, in which case
    285  *                    ubrk_setText() is used to specify the text to be iterated.
    286  * @param textLength  The number of characters in text, or -1 if null-terminated.
    287  * @param status      Pointer to UErrorCode to receive any errors.
    288  * @return            UBreakIterator for the specified rules.
    289  * @see ubrk_getBinaryRules
    290  * @draft ICU 59
    291  */
    292 U_DRAFT UBreakIterator* U_EXPORT2
    293 ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
    294                      const UChar *  text, int32_t textLength,
    295                      UErrorCode *   status);
    296 
    297 #endif  /* U_HIDE_DRAFT_API */
    298 
    299 /**
    300  * Thread safe cloning operation
    301  * @param bi iterator to be cloned
    302  * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
    303  *  user allocated space for the new clone. If NULL new memory will be allocated.
    304  *  If buffer is not large enough, new memory will be allocated.
    305  *  Clients can use the U_BRK_SAFECLONE_BUFFERSIZE.
    306  * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
    307  *  pointer to size of allocated space.
    308  *  If *pBufferSize == 0, a sufficient size for use in cloning will
    309  *  be returned ('pre-flighting')
    310  *  If *pBufferSize is not enough for a stack-based safe clone,
    311  *  new memory will be allocated.
    312  * @param status to indicate whether the operation went on smoothly or there were errors
    313  *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
    314  * @return pointer to the new clone
    315  * @stable ICU 2.0
    316  */
    317 U_STABLE UBreakIterator * U_EXPORT2
    318 ubrk_safeClone(
    319           const UBreakIterator *bi,
    320           void *stackBuffer,
    321           int32_t *pBufferSize,
    322           UErrorCode *status);
    323 
    324 #ifndef U_HIDE_DEPRECATED_API
    325 
    326 /**
    327   * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
    328   * @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer.
    329   */
    330 #define U_BRK_SAFECLONE_BUFFERSIZE 1
    331 
    332 #endif /* U_HIDE_DEPRECATED_API */
    333 
    334 /**
    335 * Close a UBreakIterator.
    336 * Once closed, a UBreakIterator may no longer be used.
    337 * @param bi The break iterator to close.
    338  * @stable ICU 2.0
    339 */
    340 U_STABLE void U_EXPORT2
    341 ubrk_close(UBreakIterator *bi);
    342 
    343 #if U_SHOW_CPLUSPLUS_API
    344 
    345 U_NAMESPACE_BEGIN
    346 
    347 /**
    348  * \class LocalUBreakIteratorPointer
    349  * "Smart pointer" class, closes a UBreakIterator via ubrk_close().
    350  * For most methods see the LocalPointerBase base class.
    351  *
    352  * @see LocalPointerBase
    353  * @see LocalPointer
    354  * @stable ICU 4.4
    355  */
    356 U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close);
    357 
    358 U_NAMESPACE_END
    359 
    360 #endif
    361 
    362 /**
    363  * Sets an existing iterator to point to a new piece of text.
    364  * The break iterator retains a pointer to the supplied text.
    365  * The caller must not modify or delete the text while the BreakIterator
    366  * retains the reference.
    367  *
    368  * @param bi The iterator to use
    369  * @param text The text to be set
    370  * @param textLength The length of the text
    371  * @param status The error code
    372  * @stable ICU 2.0
    373  */
    374 U_STABLE void U_EXPORT2
    375 ubrk_setText(UBreakIterator* bi,
    376              const UChar*    text,
    377              int32_t         textLength,
    378              UErrorCode*     status);
    379 
    380 
    381 /**
    382  * Sets an existing iterator to point to a new piece of text.
    383  *
    384  * All index positions returned by break iterator functions are
    385  * native indices from the UText. For example, when breaking UTF-8
    386  * encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc.
    387  * will be UTF-8 string indices, not UTF-16 positions.
    388  *
    389  * @param bi The iterator to use
    390  * @param text The text to be set.
    391  *             This function makes a shallow clone of the supplied UText.  This means
    392  *             that the caller is free to immediately close or otherwise reuse the
    393  *             UText that was passed as a parameter, but that the underlying text itself
    394  *             must not be altered while being referenced by the break iterator.
    395  * @param status The error code
    396  * @stable ICU 3.4
    397  */
    398 U_STABLE void U_EXPORT2
    399 ubrk_setUText(UBreakIterator* bi,
    400              UText*          text,
    401              UErrorCode*     status);
    402 
    403 
    404 
    405 /**
    406  * Determine the most recently-returned text boundary.
    407  *
    408  * @param bi The break iterator to use.
    409  * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
    410  * \ref ubrk_first, or \ref ubrk_last.
    411  * @stable ICU 2.0
    412  */
    413 U_STABLE int32_t U_EXPORT2
    414 ubrk_current(const UBreakIterator *bi);
    415 
    416 /**
    417  * Advance the iterator to the boundary following the current boundary.
    418  *
    419  * @param bi The break iterator to use.
    420  * @return The character index of the next text boundary, or UBRK_DONE
    421  * if all text boundaries have been returned.
    422  * @see ubrk_previous
    423  * @stable ICU 2.0
    424  */
    425 U_STABLE int32_t U_EXPORT2
    426 ubrk_next(UBreakIterator *bi);
    427 
    428 /**
    429  * Set the iterator position to the boundary preceding the current boundary.
    430  *
    431  * @param bi The break iterator to use.
    432  * @return The character index of the preceding text boundary, or UBRK_DONE
    433  * if all text boundaries have been returned.
    434  * @see ubrk_next
    435  * @stable ICU 2.0
    436  */
    437 U_STABLE int32_t U_EXPORT2
    438 ubrk_previous(UBreakIterator *bi);
    439 
    440 /**
    441  * Set the iterator position to zero, the start of the text being scanned.
    442  * @param bi The break iterator to use.
    443  * @return The new iterator position (zero).
    444  * @see ubrk_last
    445  * @stable ICU 2.0
    446  */
    447 U_STABLE int32_t U_EXPORT2
    448 ubrk_first(UBreakIterator *bi);
    449 
    450 /**
    451  * Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
    452  * This is not the same as the last character.
    453  * @param bi The break iterator to use.
    454  * @return The character offset immediately <EM>beyond</EM> the last character in the
    455  * text being scanned.
    456  * @see ubrk_first
    457  * @stable ICU 2.0
    458  */
    459 U_STABLE int32_t U_EXPORT2
    460 ubrk_last(UBreakIterator *bi);
    461 
    462 /**
    463  * Set the iterator position to the first boundary preceding the specified offset.
    464  * The new position is always smaller than offset, or UBRK_DONE.
    465  * @param bi The break iterator to use.
    466  * @param offset The offset to begin scanning.
    467  * @return The text boundary preceding offset, or UBRK_DONE.
    468  * @see ubrk_following
    469  * @stable ICU 2.0
    470  */
    471 U_STABLE int32_t U_EXPORT2
    472 ubrk_preceding(UBreakIterator *bi,
    473            int32_t offset);
    474 
    475 /**
    476  * Advance the iterator to the first boundary following the specified offset.
    477  * The value returned is always greater than offset, or UBRK_DONE.
    478  * @param bi The break iterator to use.
    479  * @param offset The offset to begin scanning.
    480  * @return The text boundary following offset, or UBRK_DONE.
    481  * @see ubrk_preceding
    482  * @stable ICU 2.0
    483  */
    484 U_STABLE int32_t U_EXPORT2
    485 ubrk_following(UBreakIterator *bi,
    486            int32_t offset);
    487 
    488 /**
    489 * Get a locale for which text breaking information is available.
    490 * A UBreakIterator in a locale returned by this function will perform the correct
    491 * text breaking for the locale.
    492 * @param index The index of the desired locale.
    493 * @return A locale for which number text breaking information is available, or 0 if none.
    494 * @see ubrk_countAvailable
    495 * @stable ICU 2.0
    496 */
    497 U_STABLE const char* U_EXPORT2
    498 ubrk_getAvailable(int32_t index);
    499 
    500 /**
    501 * Determine how many locales have text breaking information available.
    502 * This function is most useful as determining the loop ending condition for
    503 * calls to \ref ubrk_getAvailable.
    504 * @return The number of locales for which text breaking information is available.
    505 * @see ubrk_getAvailable
    506 * @stable ICU 2.0
    507 */
    508 U_STABLE int32_t U_EXPORT2
    509 ubrk_countAvailable(void);
    510 
    511 
    512 /**
    513 * Returns true if the specfied position is a boundary position.  As a side
    514 * effect, leaves the iterator pointing to the first boundary position at
    515 * or after "offset".
    516 * @param bi The break iterator to use.
    517 * @param offset the offset to check.
    518 * @return True if "offset" is a boundary position.
    519 * @stable ICU 2.0
    520 */
    521 U_STABLE  UBool U_EXPORT2
    522 ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
    523 
    524 /**
    525  * Return the status from the break rule that determined the most recently
    526  * returned break position.  The values appear in the rule source
    527  * within brackets, {123}, for example.  For rules that do not specify a
    528  * status, a default value of 0 is returned.
    529  * <p>
    530  * For word break iterators, the possible values are defined in enum UWordBreak.
    531  * @stable ICU 2.2
    532  */
    533 U_STABLE  int32_t U_EXPORT2
    534 ubrk_getRuleStatus(UBreakIterator *bi);
    535 
    536 /**
    537  * Get the statuses from the break rules that determined the most recently
    538  * returned break position.  The values appear in the rule source
    539  * within brackets, {123}, for example.  The default status value for rules
    540  * that do not explicitly provide one is zero.
    541  * <p>
    542  * For word break iterators, the possible values are defined in enum UWordBreak.
    543  * @param bi        The break iterator to use
    544  * @param fillInVec an array to be filled in with the status values.
    545  * @param capacity  the length of the supplied vector.  A length of zero causes
    546  *                  the function to return the number of status values, in the
    547  *                  normal way, without attemtping to store any values.
    548  * @param status    receives error codes.
    549  * @return          The number of rule status values from rules that determined
    550  *                  the most recent boundary returned by the break iterator.
    551  * @stable ICU 3.0
    552  */
    553 U_STABLE  int32_t U_EXPORT2
    554 ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
    555 
    556 /**
    557  * Return the locale of the break iterator. You can choose between the valid and
    558  * the actual locale.
    559  * @param bi break iterator
    560  * @param type locale type (valid or actual)
    561  * @param status error code
    562  * @return locale string
    563  * @stable ICU 2.8
    564  */
    565 U_STABLE const char* U_EXPORT2
    566 ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
    567 
    568 /**
    569   *  Set the subject text string upon which the break iterator is operating
    570   *  without changing any other aspect of the state.
    571   *  The new and previous text strings must have the same content.
    572   *
    573   *  This function is intended for use in environments where ICU is operating on
    574   *  strings that may move around in memory.  It provides a mechanism for notifying
    575   *  ICU that the string has been relocated, and providing a new UText to access the
    576   *  string in its new position.
    577   *
    578   *  Note that the break iterator never copies the underlying text
    579   *  of a string being processed, but always operates directly on the original text
    580   *  provided by the user. Refreshing simply drops the references to the old text
    581   *  and replaces them with references to the new.
    582   *
    583   *  Caution:  this function is normally used only by very specialized
    584   *            system-level code.   One example use case is with garbage collection
    585   *            that moves the text in memory.
    586   *
    587   * @param bi         The break iterator.
    588   * @param text       The new (moved) text string.
    589   * @param status     Receives errors detected by this function.
    590   *
    591   * @stable ICU 49
    592   */
    593 U_STABLE void U_EXPORT2
    594 ubrk_refreshUText(UBreakIterator *bi,
    595                        UText          *text,
    596                        UErrorCode     *status);
    597 
    598 
    599 #ifndef U_HIDE_DRAFT_API
    600 /**
    601  * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
    602  * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
    603  * more quickly than using ubrk_openRules. The compiled rules are not compatible across
    604  * different major versions of ICU, nor across platforms of different endianness or
    605  * different base character set family (ASCII vs EBCDIC). Supports preflighting (with
    606  * binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to
    607  * the binaryRules buffer. However, whether preflighting or not, if the actual length
    608  * is greater than INT32_MAX, then the function returns 0 and sets *status to
    609  * U_INDEX_OUTOFBOUNDS_ERROR.
    610 
    611  * @param bi            The break iterator to use.
    612  * @param binaryRules   Buffer to receive the compiled binary rules; set to NULL for
    613  *                      preflighting.
    614  * @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for
    615  *                      preflighting. Must be >= 0.
    616  * @param status        Pointer to UErrorCode to receive any errors, such as
    617  *                      U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or
    618  *                      U_ILLEGAL_ARGUMENT_ERROR.
    619  * @return              The actual byte length of the binary rules, if <= INT32_MAX;
    620  *                      otherwise 0. If not preflighting and this is larger than
    621  *                      rulesCapacity, *status will be set to an error.
    622  * @see ubrk_openBinaryRules
    623  * @draft ICU 59
    624  */
    625 U_DRAFT int32_t U_EXPORT2
    626 ubrk_getBinaryRules(UBreakIterator *bi,
    627                     uint8_t *       binaryRules, int32_t rulesCapacity,
    628                     UErrorCode *    status);
    629 
    630 #endif  /* U_HIDE_DRAFT_API */
    631 
    632 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
    633 
    634 #endif
    635