Home | History | Annotate | Download | only in text
      1 //  2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 package com.ibm.icu.text;
      4 
      5 import java.util.Locale;
      6 
      7 import com.ibm.icu.impl.CaseMapImpl;
      8 import com.ibm.icu.impl.UCaseProps;
      9 import com.ibm.icu.lang.UCharacter;
     10 import com.ibm.icu.util.ULocale;
     11 
     12 /**
     13  * Low-level case mapping options and methods. Immutable.
     14  * "Setters" return instances with the union of the current and new options set.
     15  *
     16  * This class is not intended for public subclassing.
     17  *
     18  * @draft ICU 59
     19  * @provisional This API might change or be removed in a future release.
     20  */
     21 public abstract class CaseMap {
     22     /**
     23      * @internal
     24      * @deprecated This API is ICU internal only.
     25      */
     26     @Deprecated
     27     protected int internalOptions;
     28 
     29     private CaseMap(int opt) { internalOptions = opt; }
     30 
     31     private static int getCaseLocale(Locale locale) {
     32         if (locale == null) {
     33             locale = Locale.getDefault();
     34         }
     35         return UCaseProps.getCaseLocale(locale);
     36     }
     37 
     38     /**
     39      * @return Lowercasing object with default options.
     40      * @draft ICU 59
     41      * @provisional This API might change or be removed in a future release.
     42      */
     43     public static Lower toLower() { return Lower.DEFAULT; }
     44     /**
     45      * @return Uppercasing object with default options.
     46      * @draft ICU 59
     47      * @provisional This API might change or be removed in a future release.
     48      */
     49     public static Upper toUpper() { return Upper.DEFAULT; }
     50     /**
     51      * @return Titlecasing object with default options.
     52      * @draft ICU 59
     53      * @provisional This API might change or be removed in a future release.
     54      */
     55     public static Title toTitle() { return Title.DEFAULT; }
     56     /**
     57      * @return Case folding object with default options.
     58      * @draft ICU 59
     59      * @provisional This API might change or be removed in a future release.
     60      */
     61     public static Fold fold() { return Fold.DEFAULT; }
     62 
     63     /**
     64      * Returns an instance that behaves like this one but
     65      * omits unchanged text when case-mapping with {@link Edits}.
     66      *
     67      * @return an options object with this option.
     68      * @draft ICU 59
     69      * @provisional This API might change or be removed in a future release.
     70      */
     71     public abstract CaseMap omitUnchangedText();
     72 
     73     /**
     74      * Lowercasing options and methods. Immutable.
     75      *
     76      * @see #toLower()
     77      * @draft ICU 59
     78      * @provisional This API might change or be removed in a future release.
     79      */
     80     public static final class Lower extends CaseMap {
     81         private static final Lower DEFAULT = new Lower(0);
     82         private static final Lower OMIT_UNCHANGED = new Lower(CaseMapImpl.OMIT_UNCHANGED_TEXT);
     83         private Lower(int opt) { super(opt); }
     84 
     85         /**
     86          * {@inheritDoc}
     87          * @draft ICU 59
     88          * @provisional This API might change or be removed in a future release.
     89          */
     90         @Override
     91         public Lower omitUnchangedText() {
     92             return OMIT_UNCHANGED;
     93         }
     94 
     95         /**
     96          * Lowercases a string and optionally records edits (see {@link #omitUnchangedText}).
     97          * Casing is locale-dependent and context-sensitive.
     98          * The result may be longer or shorter than the original.
     99          *
    100          * @param locale    The locale ID. Can be null for {@link Locale#getDefault}.
    101          *                  (See {@link ULocale#toLocale}.)
    102          * @param src       The original string.
    103          * @param dest      A buffer for the result string. Must not be null.
    104          * @param edits     Records edits for index mapping, working with styled text,
    105          *                  and getting only changes (if any).
    106          *                  This function calls edits.reset() first. edits can be null.
    107          * @return dest with the result string (or only changes) appended.
    108          *
    109          * @see UCharacter#toLowerCase(Locale, String)
    110          * @draft ICU 59
    111          * @provisional This API might change or be removed in a future release.
    112          */
    113          public <A extends Appendable> A apply(
    114                  Locale locale, CharSequence src, A dest, Edits edits) {
    115              return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src, dest, edits);
    116          }
    117     }
    118 
    119     /**
    120      * Uppercasing options and methods. Immutable.
    121      *
    122      * @see #toUpper()
    123      * @draft ICU 59
    124      * @provisional This API might change or be removed in a future release.
    125      */
    126     public static final class Upper extends CaseMap {
    127         private static final Upper DEFAULT = new Upper(0);
    128         private static final Upper OMIT_UNCHANGED = new Upper(CaseMapImpl.OMIT_UNCHANGED_TEXT);
    129         private Upper(int opt) { super(opt); }
    130 
    131         /**
    132          * {@inheritDoc}
    133          * @draft ICU 59
    134          * @provisional This API might change or be removed in a future release.
    135          */
    136         @Override
    137         public Upper omitUnchangedText() {
    138             return OMIT_UNCHANGED;
    139         }
    140 
    141         /**
    142          * Uppercases a string and optionally records edits (see {@link #omitUnchangedText}).
    143          * Casing is locale-dependent and context-sensitive.
    144          * The result may be longer or shorter than the original.
    145          *
    146          * @param locale    The locale ID. Can be null for {@link Locale#getDefault}.
    147          *                  (See {@link ULocale#toLocale}.)
    148          * @param src       The original string.
    149          * @param dest      A buffer for the result string. Must not be null.
    150          * @param edits     Records edits for index mapping, working with styled text,
    151          *                  and getting only changes (if any).
    152          *                  This function calls edits.reset() first. edits can be null.
    153          * @return dest with the result string (or only changes) appended.
    154          *
    155          * @see UCharacter#toUpperCase(Locale, String)
    156          * @draft ICU 59
    157          * @provisional This API might change or be removed in a future release.
    158          */
    159          public <A extends Appendable> A apply(
    160                  Locale locale, CharSequence src, A dest, Edits edits) {
    161              return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src, dest, edits);
    162          }
    163     }
    164 
    165     /**
    166      * Titlecasing options and methods. Immutable.
    167      *
    168      * @see #toTitle()
    169      * @draft ICU 59
    170      * @provisional This API might change or be removed in a future release.
    171      */
    172     public static final class Title extends CaseMap {
    173         private static final Title DEFAULT = new Title(0);
    174         private static final Title OMIT_UNCHANGED = new Title(CaseMapImpl.OMIT_UNCHANGED_TEXT);
    175         private Title(int opt) { super(opt); }
    176 
    177         /**
    178          * {@inheritDoc}
    179          * @draft ICU 59
    180          * @provisional This API might change or be removed in a future release.
    181          */
    182         @Override
    183         public Title omitUnchangedText() {
    184             if (internalOptions == 0 || internalOptions == CaseMapImpl.OMIT_UNCHANGED_TEXT) {
    185                 return OMIT_UNCHANGED;
    186             }
    187             return new Title(internalOptions | CaseMapImpl.OMIT_UNCHANGED_TEXT);
    188         }
    189 
    190         /**
    191          * Returns an instance that behaves like this one but
    192          * does not lowercase non-initial parts of words when titlecasing.
    193          *
    194          * <p>By default, titlecasing will titlecase the first cased character
    195          * of a word and lowercase all other characters.
    196          * With this option, the other characters will not be modified.
    197          *
    198          * @return an options object with this option.
    199          * @see UCharacter#TITLECASE_NO_LOWERCASE
    200          * @draft ICU 59
    201          * @provisional This API might change or be removed in a future release.
    202          */
    203         public Title noLowercase() {
    204             return new Title(internalOptions | UCharacter.TITLECASE_NO_LOWERCASE);
    205         }
    206 
    207         // TODO: update references to the Unicode Standard for recent version
    208         /**
    209          * Returns an instance that behaves like this one but
    210          * does not adjust the titlecasing indexes from BreakIterator::next() indexes;
    211          * titlecases exactly the characters at breaks from the iterator.
    212          *
    213          * <p>By default, titlecasing will take each break iterator index,
    214          * adjust it by looking for the next cased character, and titlecase that one.
    215          * Other characters are lowercased.
    216          *
    217          * <p>This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
    218          *
    219          * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
    220          * #29, "Text Boundaries." Between each pair of word boundaries, find the first
    221          * cased character F. If F exists, map F to default_title(F); then map each
    222          * subsequent character C to default_lower(C).
    223          *
    224          * @return an options object with this option.
    225          * @see UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT
    226          * @draft ICU 59
    227          * @provisional This API might change or be removed in a future release.
    228          */
    229         public Title noBreakAdjustment() {
    230             return new Title(internalOptions | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT);
    231         }
    232 
    233         /**
    234          * Titlecases a string and optionally records edits (see {@link #omitUnchangedText}).
    235          * Casing is locale-dependent and context-sensitive.
    236          * The result may be longer or shorter than the original.
    237          *
    238          * <p>Titlecasing uses a break iterator to find the first characters of words
    239          * that are to be titlecased. It titlecases those characters and lowercases
    240          * all others. (This can be modified with options bits.)
    241          *
    242          * @param locale    The locale ID. Can be null for {@link Locale#getDefault}.
    243          *                  (See {@link ULocale#toLocale}.)
    244          * @param iter      A break iterator to find the first characters of words that are to be titlecased.
    245          *                  It is set to the source string (setText())
    246          *                  and used one or more times for iteration (first() and next()).
    247          *                  If null, then a word break iterator for the locale is used
    248          *                  (or something equivalent).
    249          * @param src       The original string.
    250          * @param dest      A buffer for the result string. Must not be null.
    251          * @param edits     Records edits for index mapping, working with styled text,
    252          *                  and getting only changes (if any).
    253          *                  This function calls edits.reset() first. edits can be null.
    254          * @return dest with the result string (or only changes) appended.
    255          *
    256          * @see UCharacter#toTitleCase(Locale, String, BreakIterator, int)
    257          * @draft ICU 59
    258          * @provisional This API might change or be removed in a future release.
    259          */
    260          public <A extends Appendable> A apply(
    261                  Locale locale, BreakIterator iter, CharSequence src, A dest, Edits edits) {
    262              if (iter == null) {
    263                  iter = BreakIterator.getWordInstance(locale);
    264              }
    265              iter.setText(src.toString());
    266              return CaseMapImpl.toTitle(
    267                      getCaseLocale(locale), internalOptions, iter, src, dest, edits);
    268          }
    269     }
    270 
    271     /**
    272      * Case folding options and methods. Immutable.
    273      *
    274      * @see #fold()
    275      * @draft ICU 59
    276      * @provisional This API might change or be removed in a future release.
    277      */
    278     public static final class Fold extends CaseMap {
    279         private static final Fold DEFAULT = new Fold(0);
    280         private static final Fold TURKIC = new Fold(UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I);
    281         private static final Fold OMIT_UNCHANGED = new Fold(CaseMapImpl.OMIT_UNCHANGED_TEXT);
    282         private static final Fold TURKIC_OMIT_UNCHANGED = new Fold(
    283                 UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I | CaseMapImpl.OMIT_UNCHANGED_TEXT);
    284         private Fold(int opt) { super(opt); }
    285 
    286         /**
    287          * {@inheritDoc}
    288          * @draft ICU 59
    289          * @provisional This API might change or be removed in a future release.
    290          */
    291         @Override
    292         public Fold omitUnchangedText() {
    293             return (internalOptions & UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0 ?
    294                     OMIT_UNCHANGED : TURKIC_OMIT_UNCHANGED;
    295         }
    296 
    297         /**
    298          * Returns an instance that behaves like this one but
    299          * handles dotted I and dotless i appropriately for Turkic languages (tr, az).
    300          *
    301          * <p>Uses the Unicode CaseFolding.txt mappings marked with 'T' that
    302          * are to be excluded for default mappings and
    303          * included for the Turkic-specific mappings.
    304          *
    305          * @return an options object with this option.
    306          * @see UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I
    307          * @draft ICU 59
    308          * @provisional This API might change or be removed in a future release.
    309          */
    310         public Fold turkic() {
    311             return (internalOptions & CaseMapImpl.OMIT_UNCHANGED_TEXT) == 0 ?
    312                     TURKIC : TURKIC_OMIT_UNCHANGED;
    313         }
    314 
    315         /**
    316          * Case-folds a string and optionally records edits (see {@link #omitUnchangedText}).
    317          *
    318          * <p>Case-folding is locale-independent and not context-sensitive,
    319          * but there is an option for whether to include or exclude mappings for dotted I
    320          * and dotless i that are marked with 'T' in CaseFolding.txt.
    321          *
    322          * <p>The result may be longer or shorter than the original.
    323          *
    324          * @param src       The original string.
    325          * @param dest      A buffer for the result string. Must not be null.
    326          * @param edits     Records edits for index mapping, working with styled text,
    327          *                  and getting only changes (if any).
    328          *                  This function calls edits.reset() first. edits can be null.
    329          * @return dest with the result string (or only changes) appended.
    330          *
    331          * @see UCharacter#foldCase(String, int)
    332          * @draft ICU 59
    333          * @provisional This API might change or be removed in a future release.
    334          */
    335          public <A extends Appendable> A apply(CharSequence src, A dest, Edits edits) {
    336              return CaseMapImpl.fold(internalOptions, src, dest, edits);
    337          }
    338     }
    339 }
    340