Home | History | Annotate | Download | only in unicode
      1 /*
      2 ********************************************************************************
      3 *   Copyright (C) 1997-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 ********************************************************************************
      6 */
      7 
      8 #ifndef FILTEREDBRK_H
      9 #define FILTEREDBRK_H
     10 
     11 #include "unicode/brkiter.h"
     12 
     13 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
     14 
     15 U_NAMESPACE_BEGIN
     16 
     17 /**
     18  * \file
     19  * \brief C++ API: FilteredBreakIteratorBuilder
     20  */
     21 
     22 /**
     23  * The BreakIteratorFilter is used to modify the behavior of a BreakIterator
     24  *  by constructing a new BreakIterator which suppresses certain segment boundaries.
     25  *  See  http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions .
     26  *  For example, a typical English Sentence Break Iterator would break on the space
     27  *  in the string "Mr. Smith" (resulting in two segments),
     28  *  but with "Mr." as an exception, a filtered break iterator
     29  *  would consider the string "Mr. Smith" to be a single segment.
     30  *
     31  * @internal technology preview
     32  */
     33 class U_I18N_API FilteredBreakIteratorBuilder : public UObject {
     34  public:
     35   /**
     36    *  destructor.
     37    * @internal technology preview
     38    */
     39   virtual ~FilteredBreakIteratorBuilder();
     40 
     41   /**
     42    * Construct a FilteredBreakIteratorBuilder based on rules in a locale.
     43    * The rules are taken from CLDR exception data for the locale,
     44    *  see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions
     45    *  This is the equivalent of calling createInstance(UErrorCode&)
     46    *    and then repeatedly calling addNoBreakAfter(...) with the contents
     47    *    of the CLDR exception data.
     48    * @param where the locale.
     49    * @param status The error code.
     50    * @return the new builder
     51    * @internal technology preview
     52    */
     53   static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);
     54 
     55   /**
     56    * Construct an empty FilteredBreakIteratorBuilder.
     57    * In this state, it will not suppress any segment boundaries.
     58    * @param status The error code.
     59    * @return the new builder
     60    * @internal technology preview
     61    */
     62   static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
     63 
     64   /**
     65    * Suppress a certain string from being the end of a segment.
     66    * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned
     67    * by the iterator.
     68    * @param string the string to suppress, such as "Mr."
     69    * @param status error code
     70    * @return returns TRUE if the string was not present and now added,
     71    * FALSE if the call was a no-op because the string was already being suppressed.
     72    * @internal technology preview
     73    */
     74   virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
     75 
     76   /**
     77    * Stop suppressing a certain string from being the end of the segment.
     78    * This function does not create any new segment boundaries, but only serves to un-do
     79    * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of
     80    * locale data which may be suppressing certain strings.
     81    * @param exception the exception to remove
     82    * @param status error code
     83    * @return returns TRUE if the string was present and now removed,
     84    * FALSE if the call was a no-op because the string was not being suppressed.
     85    * @internal technology preview
     86    */
     87   virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
     88 
     89   /**
     90    * Wrap (adopt) an existing break iterator in a new filtered instance.
     91    * The resulting BreakIterator is owned by the caller.
     92    * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed.
     93    * Note that the adoptBreakIterator is adopted by the new BreakIterator
     94    * and should no longer be used by the caller.
     95    * The FilteredBreakIteratorBuilder may be reused.
     96    * @param adoptBreakIterator the break iterator to adopt
     97    * @param status error code
     98    * @return the new BreakIterator, owned by the caller.
     99    * @internal technology preview
    100    */
    101   virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
    102 
    103  protected:
    104   /**
    105    * For subclass use
    106    * @internal technology preview
    107    */
    108   FilteredBreakIteratorBuilder();
    109 };
    110 
    111 
    112 U_NAMESPACE_END
    113 
    114 #endif // #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
    115 
    116 #endif // #ifndef FILTEREDBRK_H
    117