Home | History | Annotate | Download | only in unicode
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2001-2008 IBM and others. All rights reserved.
      4 **********************************************************************
      5 *   Date        Name        Description
      6 *  06/28/2001   synwee      Creation.
      7 **********************************************************************
      8 */
      9 #ifndef USEARCH_H
     10 #define USEARCH_H
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
     15 
     16 #include "unicode/ucol.h"
     17 #include "unicode/ucoleitr.h"
     18 #include "unicode/ubrk.h"
     19 
     20 /**
     21  * \file
     22  * \brief C API: StringSearch
     23  *
     24  * C Apis for an engine that provides language-sensitive text searching based
     25  * on the comparison rules defined in a <tt>UCollator</tt> data struct,
     26  * see <tt>ucol.h</tt>. This ensures that language eccentricity can be
     27  * handled, e.g. for the German collator, characters &szlig; and SS will be matched
     28  * if case is chosen to be ignored.
     29  * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
     30  * "ICU Collation Design Document"</a> for more information.
     31  * <p>
     32  * The algorithm implemented is a modified form of the Boyer Moore's search.
     33  * For more information  see
     34  * <a href="http://icu-project.org/docs/papers/efficient_text_searching_in_java.html">
     35  * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i>
     36  * in February, 1999, for further information on the algorithm.
     37  * <p>
     38  * There are 2 match options for selection:<br>
     39  * Let S' be the sub-string of a text string S between the offsets start and
     40  * end <start, end>.
     41  * <br>
     42  * A pattern string P matches a text string S at the offsets <start, end>
     43  * if
     44  * <pre>
     45  * option 1. Some canonical equivalent of P matches some canonical equivalent
     46  *           of S'
     47  * option 2. P matches S' and if P starts or ends with a combining mark,
     48  *           there exists no non-ignorable combining mark before or after S'
     49  *           in S respectively.
     50  * </pre>
     51  * Option 2. will be the default.
     52  * <p>
     53  * This search has APIs similar to that of other text iteration mechanisms
     54  * such as the break iterators in <tt>ubrk.h</tt>. Using these
     55  * APIs, it is easy to scan through text looking for all occurances of
     56  * a given pattern. This search iterator allows changing of direction by
     57  * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>.
     58  * Though a direction change can occur without calling <tt>reset</tt> first,
     59  * this operation comes with some speed penalty.
     60  * Generally, match results in the forward direction will match the result
     61  * matches in the backwards direction in the reverse order
     62  * <p>
     63  * <tt>usearch.h</tt> provides APIs to specify the starting position
     64  * within the text string to be searched, e.g. <tt>usearch_setOffset</tt>,
     65  * <tt>usearch_preceding</tt> and <tt>usearch_following</tt>. Since the
     66  * starting position will be set as it is specified, please take note that
     67  * there are some dangerous positions which the search may render incorrect
     68  * results:
     69  * <ul>
     70  * <li> The midst of a substring that requires normalization.
     71  * <li> If the following match is to be found, the position should not be the
     72  *      second character which requires to be swapped with the preceding
     73  *      character. Vice versa, if the preceding match is to be found,
     74  *      position to search from should not be the first character which
     75  *      requires to be swapped with the next character. E.g certain Thai and
     76  *      Lao characters require swapping.
     77  * <li> If a following pattern match is to be found, any position within a
     78  *      contracting sequence except the first will fail. Vice versa if a
     79  *      preceding pattern match is to be found, a invalid starting point
     80  *      would be any character within a contracting sequence except the last.
     81  * </ul>
     82  * <p>
     83  * A breakiterator can be used if only matches at logical breaks are desired.
     84  * Using a breakiterator will only give you results that exactly matches the
     85  * boundaries given by the breakiterator. For instance the pattern "e" will
     86  * not be found in the string "\u00e9" if a character break iterator is used.
     87  * <p>
     88  * Options are provided to handle overlapping matches.
     89  * E.g. In English, overlapping matches produces the result 0 and 2
     90  * for the pattern "abab" in the text "ababab", where else mutually
     91  * exclusive matches only produce the result of 0.
     92  * <p>
     93  * Though collator attributes will be taken into consideration while
     94  * performing matches, there are no APIs here for setting and getting the
     95  * attributes. These attributes can be set by getting the collator
     96  * from <tt>usearch_getCollator</tt> and using the APIs in <tt>ucol.h</tt>.
     97  * Lastly to update String Search to the new collator attributes,
     98  * usearch_reset() has to be called.
     99  * <p>
    100  * Restriction: <br>
    101  * Currently there are no composite characters that consists of a
    102  * character with combining class > 0 before a character with combining
    103  * class == 0. However, if such a character exists in the future, the
    104  * search mechanism does not guarantee the results for option 1.
    105  *
    106  * <p>
    107  * Example of use:<br>
    108  * <pre><code>
    109  * char *tgtstr = "The quick brown fox jumped over the lazy fox";
    110  * char *patstr = "fox";
    111  * UChar target[64];
    112  * UChar pattern[16];
    113  * UErrorCode status = U_ZERO_ERROR;
    114  * u_uastrcpy(target, tgtstr);
    115  * u_uastrcpy(pattern, patstr);
    116  *
    117  * UStringSearch *search = usearch_open(pattern, -1, target, -1, "en_US",
    118  *                                  NULL, &status);
    119  * if (U_SUCCESS(status)) {
    120  *     for (int pos = usearch_first(search, &status);
    121  *          pos != USEARCH_DONE;
    122  *          pos = usearch_next(search, &status))
    123  *     {
    124  *         printf("Found match at %d pos, length is %d\n", pos,
    125  *                                        usearch_getMatchLength(search));
    126  *     }
    127  * }
    128  *
    129  * usearch_close(search);
    130  * </code></pre>
    131  * @stable ICU 2.4
    132  */
    133 
    134 /**
    135 * DONE is returned by previous() and next() after all valid matches have
    136 * been returned, and by first() and last() if there are no matches at all.
    137 * @stable ICU 2.4
    138 */
    139 #define USEARCH_DONE -1
    140 
    141 /**
    142 * Data structure for searching
    143 * @stable ICU 2.4
    144 */
    145 struct UStringSearch;
    146 /**
    147 * Data structure for searching
    148 * @stable ICU 2.4
    149 */
    150 typedef struct UStringSearch UStringSearch;
    151 
    152 /**
    153 * @stable ICU 2.4
    154 */
    155 typedef enum {
    156     /** Option for overlapping matches */
    157     USEARCH_OVERLAP,
    158     /**
    159     Option for canonical matches. option 1 in header documentation.
    160     The default value will be USEARCH_OFF
    161     */
    162     USEARCH_CANONICAL_MATCH,
    163     USEARCH_ATTRIBUTE_COUNT
    164 } USearchAttribute;
    165 
    166 /**
    167 * @stable ICU 2.4
    168 */
    169 typedef enum {
    170     /** default value for any USearchAttribute */
    171     USEARCH_DEFAULT = -1,
    172     /** value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
    173     USEARCH_OFF,
    174     /** value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
    175     USEARCH_ON,
    176     USEARCH_ATTRIBUTE_VALUE_COUNT
    177 } USearchAttributeValue;
    178 
    179 /* open and close ------------------------------------------------------ */
    180 
    181 /**
    182 * Creating a search iterator data struct using the argument locale language
    183 * rule set. A collator will be created in the process, which will be owned by
    184 * this search and will be deleted in <tt>usearch_close</tt>.
    185 * @param pattern for matching
    186 * @param patternlength length of the pattern, -1 for null-termination
    187 * @param text text string
    188 * @param textlength length of the text string, -1 for null-termination
    189 * @param locale name of locale for the rules to be used
    190 * @param breakiter A BreakIterator that will be used to restrict the points
    191 *                  at which matches are detected. If a match is found, but
    192 *                  the match's start or end index is not a boundary as
    193 *                  determined by the <tt>BreakIterator</tt>, the match will
    194 *                  be rejected and another will be searched for.
    195 *                  If this parameter is <tt>NULL</tt>, no break detection is
    196 *                  attempted.
    197 * @param status for errors if it occurs. If pattern or text is NULL, or if
    198 *               patternlength or textlength is 0 then an
    199 *               U_ILLEGAL_ARGUMENT_ERROR is returned.
    200 * @return search iterator data structure, or NULL if there is an error.
    201 * @stable ICU 2.4
    202 */
    203 U_STABLE UStringSearch * U_EXPORT2 usearch_open(const UChar          *pattern,
    204                                               int32_t         patternlength,
    205                                         const UChar          *text,
    206                                               int32_t         textlength,
    207                                         const char           *locale,
    208                                               UBreakIterator *breakiter,
    209                                               UErrorCode     *status);
    210 
    211 /**
    212 * Creating a search iterator data struct using the argument collator language
    213 * rule set. Note, user retains the ownership of this collator, thus the
    214 * responsibility of deletion lies with the user.
    215 * NOTE: string search cannot be instantiated from a collator that has
    216 * collate digits as numbers (CODAN) turned on.
    217 * @param pattern for matching
    218 * @param patternlength length of the pattern, -1 for null-termination
    219 * @param text text string
    220 * @param textlength length of the text string, -1 for null-termination
    221 * @param collator used for the language rules
    222 * @param breakiter A BreakIterator that will be used to restrict the points
    223 *                  at which matches are detected. If a match is found, but
    224 *                  the match's start or end index is not a boundary as
    225 *                  determined by the <tt>BreakIterator</tt>, the match will
    226 *                  be rejected and another will be searched for.
    227 *                  If this parameter is <tt>NULL</tt>, no break detection is
    228 *                  attempted.
    229 * @param status for errors if it occurs. If collator, pattern or text is NULL,
    230 *               or if patternlength or textlength is 0 then an
    231 *               U_ILLEGAL_ARGUMENT_ERROR is returned.
    232 * @return search iterator data structure, or NULL if there is an error.
    233 * @stable ICU 2.4
    234 */
    235 U_STABLE UStringSearch * U_EXPORT2 usearch_openFromCollator(
    236                                          const UChar *pattern,
    237                                                int32_t         patternlength,
    238                                          const UChar          *text,
    239                                                int32_t         textlength,
    240                                          const UCollator      *collator,
    241                                                UBreakIterator *breakiter,
    242                                                UErrorCode     *status);
    243 
    244 /**
    245 * Destroying and cleaning up the search iterator data struct.
    246 * If a collator is created in <tt>usearch_open</tt>, it will be destroyed here.
    247 * @param searchiter data struct to clean up
    248 * @stable ICU 2.4
    249 */
    250 U_STABLE void U_EXPORT2 usearch_close(UStringSearch *searchiter);
    251 
    252 /* get and set methods -------------------------------------------------- */
    253 
    254 /**
    255 * Sets the current position in the text string which the next search will
    256 * start from. Clears previous states.
    257 * This method takes the argument index and sets the position in the text
    258 * string accordingly without checking if the index is pointing to a
    259 * valid starting point to begin searching.
    260 * Search positions that may render incorrect results are highlighted in the
    261 * header comments
    262 * @param strsrch search iterator data struct
    263 * @param position position to start next search from. If position is less
    264 *          than or greater than the text range for searching,
    265 *          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
    266 * @param status error status if any.
    267 * @stable ICU 2.4
    268 */
    269 U_STABLE void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
    270                                         int32_t    position,
    271                                         UErrorCode    *status);
    272 
    273 /**
    274 * Return the current index in the string text being searched.
    275 * If the iteration has gone past the end of the text (or past the beginning
    276 * for a backwards search), <tt>USEARCH_DONE</tt> is returned.
    277 * @param strsrch search iterator data struct
    278 * @see #USEARCH_DONE
    279 * @stable ICU 2.4
    280 */
    281 U_STABLE int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch);
    282 
    283 /**
    284 * Sets the text searching attributes located in the enum USearchAttribute
    285 * with values from the enum USearchAttributeValue.
    286 * <tt>USEARCH_DEFAULT</tt> can be used for all attributes for resetting.
    287 * @param strsrch search iterator data struct
    288 * @param attribute text attribute to be set
    289 * @param value text attribute value
    290 * @param status for errors if it occurs
    291 * @see #usearch_getAttribute
    292 * @stable ICU 2.4
    293 */
    294 U_STABLE void U_EXPORT2 usearch_setAttribute(UStringSearch         *strsrch,
    295                                            USearchAttribute       attribute,
    296                                            USearchAttributeValue  value,
    297                                            UErrorCode            *status);
    298 
    299 /**
    300 * Gets the text searching attributes.
    301 * @param strsrch search iterator data struct
    302 * @param attribute text attribute to be retrieve
    303 * @return text attribute value
    304 * @see #usearch_setAttribute
    305 * @stable ICU 2.4
    306 */
    307 U_STABLE USearchAttributeValue U_EXPORT2 usearch_getAttribute(
    308                                          const UStringSearch    *strsrch,
    309                                                USearchAttribute  attribute);
    310 
    311 /**
    312 * Returns the index to the match in the text string that was searched.
    313 * This call returns a valid result only after a successful call to
    314 * <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
    315 * or <tt>usearch_last</tt>.
    316 * Just after construction, or after a searching method returns
    317 * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
    318 * <p>
    319 * Use <tt>usearch_getMatchedLength</tt> to get the matched string length.
    320 * @param strsrch search iterator data struct
    321 * @return index to a substring within the text string that is being
    322 *         searched.
    323 * @see #usearch_first
    324 * @see #usearch_next
    325 * @see #usearch_previous
    326 * @see #usearch_last
    327 * @see #USEARCH_DONE
    328 * @stable ICU 2.4
    329 */
    330 U_STABLE int32_t U_EXPORT2 usearch_getMatchedStart(
    331                                                const UStringSearch *strsrch);
    332 
    333 /**
    334 * Returns the length of text in the string which matches the search pattern.
    335 * This call returns a valid result only after a successful call to
    336 * <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
    337 * or <tt>usearch_last</tt>.
    338 * Just after construction, or after a searching method returns
    339 * <tt>USEARCH_DONE</tt>, this method will return 0.
    340 * @param strsrch search iterator data struct
    341 * @return The length of the match in the string text, or 0 if there is no
    342 *         match currently.
    343 * @see #usearch_first
    344 * @see #usearch_next
    345 * @see #usearch_previous
    346 * @see #usearch_last
    347 * @see #USEARCH_DONE
    348 * @stable ICU 2.4
    349 */
    350 U_STABLE int32_t U_EXPORT2 usearch_getMatchedLength(
    351                                                const UStringSearch *strsrch);
    352 
    353 /**
    354 * Returns the text that was matched by the most recent call to
    355 * <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
    356 * or <tt>usearch_last</tt>.
    357 * If the iterator is not pointing at a valid match (e.g. just after
    358 * construction or after <tt>USEARCH_DONE</tt> has been returned, returns
    359 * an empty string. If result is not large enough to store the matched text,
    360 * result will be filled with the partial text and an U_BUFFER_OVERFLOW_ERROR
    361 * will be returned in status. result will be null-terminated whenever
    362 * possible. If the buffer fits the matched text exactly, a null-termination
    363 * is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status.
    364 * Pre-flighting can be either done with length = 0 or the API
    365 * <tt>usearch_getMatchLength</tt>.
    366 * @param strsrch search iterator data struct
    367 * @param result UChar buffer to store the matched string
    368 * @param resultCapacity length of the result buffer
    369 * @param status error returned if result is not large enough
    370 * @return exact length of the matched text, not counting the null-termination
    371 * @see #usearch_first
    372 * @see #usearch_next
    373 * @see #usearch_previous
    374 * @see #usearch_last
    375 * @see #USEARCH_DONE
    376 * @stable ICU 2.4
    377 */
    378 U_STABLE int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch,
    379                                             UChar         *result,
    380                                             int32_t        resultCapacity,
    381                                             UErrorCode    *status);
    382 
    383 #if !UCONFIG_NO_BREAK_ITERATION
    384 
    385 /**
    386 * Set the BreakIterator that will be used to restrict the points at which
    387 * matches are detected.
    388 * @param strsrch search iterator data struct
    389 * @param breakiter A BreakIterator that will be used to restrict the points
    390 *                  at which matches are detected. If a match is found, but
    391 *                  the match's start or end index is not a boundary as
    392 *                  determined by the <tt>BreakIterator</tt>, the match will
    393 *                  be rejected and another will be searched for.
    394 *                  If this parameter is <tt>NULL</tt>, no break detection is
    395 *                  attempted.
    396 * @param status for errors if it occurs
    397 * @see #usearch_getBreakIterator
    398 * @stable ICU 2.4
    399 */
    400 U_STABLE void U_EXPORT2 usearch_setBreakIterator(UStringSearch  *strsrch,
    401                                                UBreakIterator *breakiter,
    402                                                UErrorCode     *status);
    403 
    404 /**
    405 * Returns the BreakIterator that is used to restrict the points at which
    406 * matches are detected. This will be the same object that was passed to the
    407 * constructor or to <tt>usearch_setBreakIterator</tt>. Note that
    408 * <tt>NULL</tt>
    409 * is a legal value; it means that break detection should not be attempted.
    410 * @param strsrch search iterator data struct
    411 * @return break iterator used
    412 * @see #usearch_setBreakIterator
    413 * @stable ICU 2.4
    414 */
    415 U_STABLE const UBreakIterator * U_EXPORT2 usearch_getBreakIterator(
    416                                               const UStringSearch *strsrch);
    417 
    418 #endif
    419 
    420 /**
    421 * Set the string text to be searched. Text iteration will hence begin at the
    422 * start of the text string. This method is useful if you want to re-use an
    423 * iterator to search for the same pattern within a different body of text.
    424 * @param strsrch search iterator data struct
    425 * @param text new string to look for match
    426 * @param textlength length of the new string, -1 for null-termination
    427 * @param status for errors if it occurs. If text is NULL, or textlength is 0
    428 *               then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
    429 *               done to strsrch.
    430 * @see #usearch_getText
    431 * @stable ICU 2.4
    432 */
    433 U_STABLE void U_EXPORT2 usearch_setText(      UStringSearch *strsrch,
    434                                       const UChar         *text,
    435                                             int32_t        textlength,
    436                                             UErrorCode    *status);
    437 
    438 /**
    439 * Return the string text to be searched.
    440 * @param strsrch search iterator data struct
    441 * @param length returned string text length
    442 * @return string text
    443 * @see #usearch_setText
    444 * @stable ICU 2.4
    445 */
    446 U_STABLE const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch,
    447                                                int32_t       *length);
    448 
    449 /**
    450 * Gets the collator used for the language rules.
    451 * <p>
    452 * Deleting the returned <tt>UCollator</tt> before calling
    453 * <tt>usearch_close</tt> would cause the string search to fail.
    454 * <tt>usearch_close</tt> will delete the collator if this search owns it.
    455 * @param strsrch search iterator data struct
    456 * @return collator
    457 * @stable ICU 2.4
    458 */
    459 U_STABLE UCollator * U_EXPORT2 usearch_getCollator(
    460                                                const UStringSearch *strsrch);
    461 
    462 /**
    463 * Sets the collator used for the language rules. User retains the ownership
    464 * of this collator, thus the responsibility of deletion lies with the user.
    465 * This method causes internal data such as Boyer-Moore shift tables to
    466 * be recalculated, but the iterator's position is unchanged.
    467 * @param strsrch search iterator data struct
    468 * @param collator to be used
    469 * @param status for errors if it occurs
    470 * @stable ICU 2.4
    471 */
    472 U_STABLE void U_EXPORT2 usearch_setCollator(      UStringSearch *strsrch,
    473                                           const UCollator     *collator,
    474                                                 UErrorCode    *status);
    475 
    476 /**
    477 * Sets the pattern used for matching.
    478 * Internal data like the Boyer Moore table will be recalculated, but the
    479 * iterator's position is unchanged.
    480 * @param strsrch search iterator data struct
    481 * @param pattern string
    482 * @param patternlength pattern length, -1 for null-terminated string
    483 * @param status for errors if it occurs. If text is NULL, or textlength is 0
    484 *               then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
    485 *               done to strsrch.
    486 * @stable ICU 2.4
    487 */
    488 U_STABLE void U_EXPORT2 usearch_setPattern(      UStringSearch *strsrch,
    489                                          const UChar         *pattern,
    490                                                int32_t        patternlength,
    491                                                UErrorCode    *status);
    492 
    493 /**
    494 * Gets the search pattern
    495 * @param strsrch search iterator data struct
    496 * @param length return length of the pattern, -1 indicates that the pattern
    497 *               is null-terminated
    498 * @return pattern string
    499 * @stable ICU 2.4
    500 */
    501 U_STABLE const UChar * U_EXPORT2 usearch_getPattern(
    502                                                const UStringSearch *strsrch,
    503                                                      int32_t       *length);
    504 
    505 /* methods ------------------------------------------------------------- */
    506 
    507 /**
    508 * Returns the first index at which the string text matches the search
    509 * pattern.
    510 * The iterator is adjusted so that its current index (as returned by
    511 * <tt>usearch_getOffset</tt>) is the match position if one was found.
    512 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
    513 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
    514 * @param strsrch search iterator data struct
    515 * @param status for errors if it occurs
    516 * @return The character index of the first match, or
    517 * <tt>USEARCH_DONE</tt> if there are no matches.
    518 * @see #usearch_getOffset
    519 * @see #USEARCH_DONE
    520 * @stable ICU 2.4
    521 */
    522 U_STABLE int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
    523                                            UErrorCode    *status);
    524 
    525 /**
    526 * Returns the first index greater than <tt>position</tt> at which the string
    527 * text
    528 * matches the search pattern. The iterator is adjusted so that its current
    529 * index (as returned by <tt>usearch_getOffset</tt>) is the match position if
    530 * one was found.
    531 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
    532 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
    533 * <p>
    534 * Search positions that may render incorrect results are highlighted in the
    535 * header comments. If position is less than or greater than the text range
    536 * for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
    537 * @param strsrch search iterator data struct
    538 * @param position to start the search at
    539 * @param status for errors if it occurs
    540 * @return The character index of the first match following <tt>pos</tt>,
    541 *         or <tt>USEARCH_DONE</tt> if there are no matches.
    542 * @see #usearch_getOffset
    543 * @see #USEARCH_DONE
    544 * @stable ICU 2.4
    545 */
    546 U_STABLE int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
    547                                                int32_t    position,
    548                                                UErrorCode    *status);
    549 
    550 /**
    551 * Returns the last index in the target text at which it matches the search
    552 * pattern. The iterator is adjusted so that its current
    553 * index (as returned by <tt>usearch_getOffset</tt>) is the match position if
    554 * one was found.
    555 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
    556 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
    557 * @param strsrch search iterator data struct
    558 * @param status for errors if it occurs
    559 * @return The index of the first match, or <tt>USEARCH_DONE</tt> if there
    560 *         are no matches.
    561 * @see #usearch_getOffset
    562 * @see #USEARCH_DONE
    563 * @stable ICU 2.4
    564 */
    565 U_STABLE int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
    566                                           UErrorCode    *status);
    567 
    568 /**
    569 * Returns the first index less than <tt>position</tt> at which the string text
    570 * matches the search pattern. The iterator is adjusted so that its current
    571 * index (as returned by <tt>usearch_getOffset</tt>) is the match position if
    572 * one was found.
    573 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
    574 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
    575 * <p>
    576 * Search positions that may render incorrect results are highlighted in the
    577 * header comments. If position is less than or greater than the text range
    578 * for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
    579 * @param strsrch search iterator data struct
    580 * @param position index position the search is to begin at
    581 * @param status for errors if it occurs
    582 * @return The character index of the first match preceding <tt>pos</tt>,
    583 *         or <tt>USEARCH_DONE</tt> if there are no matches.
    584 * @see #usearch_getOffset
    585 * @see #USEARCH_DONE
    586 * @stable ICU 2.4
    587 */
    588 U_STABLE int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
    589                                                int32_t    position,
    590                                                UErrorCode    *status);
    591 
    592 /**
    593 * Returns the index of the next point at which the string text matches the
    594 * search pattern, starting from the current position.
    595 * The iterator is adjusted so that its current
    596 * index (as returned by <tt>usearch_getOffset</tt>) is the match position if
    597 * one was found.
    598 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
    599 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
    600 * @param strsrch search iterator data struct
    601 * @param status for errors if it occurs
    602 * @return The index of the next match after the current position, or
    603 *         <tt>USEARCH_DONE</tt> if there are no more matches.
    604 * @see #usearch_first
    605 * @see #usearch_getOffset
    606 * @see #USEARCH_DONE
    607 * @stable ICU 2.4
    608 */
    609 U_STABLE int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
    610                                           UErrorCode    *status);
    611 
    612 /**
    613 * Returns the index of the previous point at which the string text matches
    614 * the search pattern, starting at the current position.
    615 * The iterator is adjusted so that its current
    616 * index (as returned by <tt>usearch_getOffset</tt>) is the match position if
    617 * one was found.
    618 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
    619 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
    620 * @param strsrch search iterator data struct
    621 * @param status for errors if it occurs
    622 * @return The index of the previous match before the current position,
    623 *         or <tt>USEARCH_DONE</tt> if there are no more matches.
    624 * @see #usearch_last
    625 * @see #usearch_getOffset
    626 * @see #USEARCH_DONE
    627 * @stable ICU 2.4
    628 */
    629 U_STABLE int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
    630                                               UErrorCode    *status);
    631 
    632 /**
    633 * Reset the iteration.
    634 * Search will begin at the start of the text string if a forward iteration
    635 * is initiated before a backwards iteration. Otherwise if a backwards
    636 * iteration is initiated before a forwards iteration, the search will begin
    637 * at the end of the text string.
    638 * @param strsrch search iterator data struct
    639 * @see #usearch_first
    640 * @stable ICU 2.4
    641 */
    642 U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
    643 
    644 /**
    645   *  Simple forward search for the pattern, starting at a specified index,
    646   *     and using using a default set search options.
    647   *
    648   *  This is an experimental function, and is not an official part of the
    649   *      ICU API.
    650   *
    651   *  The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
    652   *
    653   *  The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
    654   *  any Break Iterator are ignored.
    655   *
    656   *  Matches obey the following constraints:
    657   *
    658   *      Characters at the start or end positions of a match that are ignorable
    659   *      for collation are not included as part of the match, unless they
    660   *      are part of a combining sequence, as described below.
    661   *
    662   *      A match will not include a partial combining sequence.  Combining
    663   *      character sequences  are considered to be  inseperable units,
    664   *      and either match the pattern completely, or are considered to not match
    665   *      at all.  Thus, for example, an A followed a combining accent mark will
    666   *      not be found when searching for a plain (unaccented) A.   (unless
    667   *      the collation strength has been set to ignore all accents).
    668   *
    669   *      When beginning a search, the initial starting position, startIdx,
    670   *      is assumed to be an acceptable match boundary with respect to
    671   *      combining characters.  A combining sequence that spans across the
    672   *      starting point will not supress a match beginning at startIdx.
    673   *
    674   *      Characters that expand to multiple collation elements
    675   *      (German sharp-S becoming 'ss', or the composed forms of accented
    676   *      characters, for example) also must match completely.
    677   *      Searching for a single 's' in a string containing only a sharp-s will
    678   *      find no match.
    679   *
    680   *
    681   *  @param strsrch    the UStringSearch struct, which references both
    682   *                    the text to be searched  and the pattern being sought.
    683   *  @param startIdx   The index into the text to begin the search.
    684   *  @param matchStart An out parameter, the starting index of the matched text.
    685   *                    This parameter may be NULL.
    686   *                    A value of -1 will be returned if no match was found.
    687   *  @param matchLimit Out parameter, the index of the first position following the matched text.
    688   *                    The matchLimit will be at a suitable position for beginning a subsequent search
    689   *                    in the input text.
    690   *                    This parameter may be NULL.
    691   *                    A value of -1 will be returned if no match was found.
    692   *
    693   *  @param status     Report any errors.  Note that no match found is not an error.
    694   *  @return           TRUE if a match was found, FALSE otherwise.
    695   *
    696   *  @internal
    697   */
    698 U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
    699                                           int32_t        startIdx,
    700                                           int32_t        *matchStart,
    701                                           int32_t        *matchLimit,
    702                                           UErrorCode     *status);
    703 
    704 /**
    705   *  Simple backwards search for the pattern, starting at a specified index,
    706   *     and using using a default set search options.
    707   *
    708   *  This is an experimental function, and is not an official part of the
    709   *      ICU API.
    710   *
    711   *  The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
    712   *
    713   *  The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
    714   *  any Break Iterator are ignored.
    715   *
    716   *  Matches obey the following constraints:
    717   *
    718   *      Characters at the start or end positions of a match that are ignorable
    719   *      for collation are not included as part of the match, unless they
    720   *      are part of a combining sequence, as described below.
    721   *
    722   *      A match will not include a partial combining sequence.  Combining
    723   *      character sequences  are considered to be  inseperable units,
    724   *      and either match the pattern completely, or are considered to not match
    725   *      at all.  Thus, for example, an A followed a combining accent mark will
    726   *      not be found when searching for a plain (unaccented) A.   (unless
    727   *      the collation strength has been set to ignore all accents).
    728   *
    729   *      When beginning a search, the initial starting position, startIdx,
    730   *      is assumed to be an acceptable match boundary with respect to
    731   *      combining characters.  A combining sequence that spans across the
    732   *      starting point will not supress a match beginning at startIdx.
    733   *
    734   *      Characters that expand to multiple collation elements
    735   *      (German sharp-S becoming 'ss', or the composed forms of accented
    736   *      characters, for example) also must match completely.
    737   *      Searching for a single 's' in a string containing only a sharp-s will
    738   *      find no match.
    739   *
    740   *
    741   *  @param strsrch    the UStringSearch struct, which references both
    742   *                    the text to be searched  and the pattern being sought.
    743   *  @param startIdx   The index into the text to begin the search.
    744   *  @param matchStart An out parameter, the starting index of the matched text.
    745   *                    This parameter may be NULL.
    746   *                    A value of -1 will be returned if no match was found.
    747   *  @param matchLimit Out parameter, the index of the first position following the matched text.
    748   *                    The matchLimit will be at a suitable position for beginning a subsequent search
    749   *                    in the input text.
    750   *                    This parameter may be NULL.
    751   *                    A value of -1 will be returned if no match was found.
    752   *
    753   *  @param status     Report any errors.  Note that no match found is not an error.
    754   *  @return           TRUE if a match was found, FALSE otherwise.
    755   *
    756   *  @internal
    757   */
    758 U_INTERNAL UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
    759                                                    int32_t        startIdx,
    760                                                    int32_t        *matchStart,
    761                                                    int32_t        *matchLimit,
    762                                                    UErrorCode     *status);
    763 
    764 #endif /* #if !UCONFIG_NO_COLLATION  && !UCONFIG_NO_BREAK_ITERATION */
    765 
    766 #endif
    767