1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2014-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.text; 10 11 import java.util.Locale; 12 13 import com.ibm.icu.impl.SimpleFilteredSentenceBreakIterator; 14 import com.ibm.icu.util.ULocale; 15 16 /** 17 * The BreakIteratorFilter is used to modify the behavior of a BreakIterator 18 * by constructing a new BreakIterator which suppresses certain segment boundaries. 19 * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions . 20 * For example, a typical English Sentence Break Iterator would break on the space 21 * in the string "Mr. Smith" (resulting in two segments), 22 * but with "Mr." as an exception, a filtered break iterator 23 * would consider the string "Mr. Smith" to be a single segment. 24 * 25 * <p>This class is not intended for public subclassing. 26 * 27 * @draft ICU 60 28 * @provisional This API might change or be removed in a future release. 29 */ 30 public abstract class FilteredBreakIteratorBuilder { 31 32 /** 33 * Construct a FilteredBreakIteratorBuilder based on sentence break exception rules in a locale. 34 * The rules are taken from CLDR exception data for the locale, 35 * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions 36 * This is the equivalent of calling createInstance(UErrorCode&) 37 * and then repeatedly calling addNoBreakAfter(...) with the contents 38 * of the CLDR exception data. 39 * @param where the locale. 40 * @return the new builder 41 * @draft ICU 60 42 * @provisional This API might change or be removed in a future release. 43 */ 44 public static final FilteredBreakIteratorBuilder getInstance(Locale where) { 45 return new SimpleFilteredSentenceBreakIterator.Builder(where); 46 } 47 48 /** 49 * Construct a FilteredBreakIteratorBuilder based on sentence break exception rules in a locale. 50 * The rules are taken from CLDR exception data for the locale, 51 * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions 52 * This is the equivalent of calling createInstance(UErrorCode&) 53 * and then repeatedly calling addNoBreakAfter(...) with the contents 54 * of the CLDR exception data. 55 * @param where the locale. 56 * @return the new builder 57 * @draft ICU 60 58 * @provisional This API might change or be removed in a future release. 59 */ 60 public static final FilteredBreakIteratorBuilder getInstance(ULocale where) { 61 return new SimpleFilteredSentenceBreakIterator.Builder(where); 62 } 63 64 /** 65 * Construct an empty FilteredBreakIteratorBuilder. 66 * In this state, it will not suppress any segment boundaries. 67 * @return the new builder 68 * @draft ICU 60 69 * @provisional This API might change or be removed in a future release. 70 */ 71 public static final FilteredBreakIteratorBuilder getEmptyInstance() { 72 return new SimpleFilteredSentenceBreakIterator.Builder(); 73 } 74 75 /** 76 * Suppress a certain string from being the end of a segment. 77 * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned 78 * by the iterator. 79 * @param str the string to suppress, such as "Mr." 80 * @return true if the string was not present and now added, 81 * false if the call was a no-op because the string was already being suppressed. 82 * @draft ICU 60 83 * @provisional This API might change or be removed in a future release. 84 */ 85 public abstract boolean suppressBreakAfter(CharSequence str); 86 87 /** 88 * Stop suppressing a certain string from being the end of the segment. 89 * This function does not create any new segment boundaries, but only serves to un-do 90 * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of 91 * locale data which may be suppressing certain strings. 92 * @param str the str the string to unsuppress, such as "Mr." 93 * @return true if the string was present and now removed, 94 * false if the call was a no-op because the string was not being suppressed. 95 * @draft ICU 60 96 * @provisional This API might change or be removed in a future release. 97 */ 98 public abstract boolean unsuppressBreakAfter(CharSequence str); 99 100 /** 101 * Wrap (adopt) an existing break iterator in a new filtered instance. 102 * Note that the wrappedBreakIterator is adopted by the new BreakIterator 103 * and should no longer be used by the caller. 104 * The FilteredBreakIteratorBuilder may be reused. 105 * @param wrappedBreakIterator the break iterator to wrap 106 * @return the new BreakIterator 107 * @draft ICU 60 108 * @provisional This API might change or be removed in a future release. 109 */ 110 public abstract BreakIterator wrapIteratorWithFilter(BreakIterator wrappedBreakIterator); 111 112 /** 113 * For subclass use 114 * @internal 115 * @deprecated internal to ICU 116 */ 117 @Deprecated 118 protected FilteredBreakIteratorBuilder() { 119 } 120 }