Home | History | Annotate | Download | only in text
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  * Copyright (C) 1996-2011, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  *
      7  */
      8 package com.ibm.icu.text;
      9 
     10 import com.ibm.icu.impl.UCaseProps;
     11 import com.ibm.icu.lang.UCharacter;
     12 import com.ibm.icu.util.ULocale;
     13 
     14 /**
     15  * A transliterator that converts all letters (as defined by
     16  * <code>UCharacter.isLetter()</code>) to lower case, except for those
     17  * letters preceded by non-letters.  The latter are converted to title
     18  * case using <code>UCharacter.toTitleCase()</code>.
     19  * @author Alan Liu
     20  */
     21 class TitlecaseTransliterator extends Transliterator {
     22 
     23     static final String _ID = "Any-Title";
     24     // TODO: Add variants for tr/az, lt, default = default locale: ICU ticket #12720
     25 
     26     /**
     27      * System registration hook.
     28      */
     29     static void register() {
     30         Transliterator.registerFactory(_ID, new Transliterator.Factory() {
     31             @Override
     32             public Transliterator getInstance(String ID) {
     33                 return new TitlecaseTransliterator(ULocale.US);
     34             }
     35         });
     36 
     37         registerSpecialInverse("Title", "Lower", false);
     38     }
     39 
     40     private final ULocale locale;
     41 
     42     private final UCaseProps csp;
     43     private ReplaceableContextIterator iter;
     44     private StringBuilder result;
     45     private int caseLocale;
     46 
     47    /**
     48      * Constructs a transliterator.
     49      */
     50     public TitlecaseTransliterator(ULocale loc) {
     51         super(_ID, null);
     52         locale = loc;
     53         // Need to look back 2 characters in the case of "can't"
     54         setMaximumContextLength(2);
     55         csp=UCaseProps.INSTANCE;
     56         iter=new ReplaceableContextIterator();
     57         result = new StringBuilder();
     58         caseLocale = UCaseProps.getCaseLocale(locale);
     59     }
     60 
     61     /**
     62      * Implements {@link Transliterator#handleTransliterate}.
     63      */
     64     @Override
     65     protected synchronized void handleTransliterate(Replaceable text,
     66                                        Position offsets, boolean isIncremental) {
     67         // TODO reimplement, see ustrcase.c
     68         // using a real word break iterator
     69         //   instead of just looking for a transition between cased and uncased characters
     70         // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
     71         // needs to take isIncremental into account because case mappings are context-sensitive
     72         //   also detect when lowercasing function did not finish because of context
     73 
     74         if (offsets.start >= offsets.limit) {
     75             return;
     76         }
     77 
     78         // case type: >0 cased (UCaseProps.LOWER etc.)  ==0 uncased  <0 case-ignorable
     79         int type;
     80 
     81         // Our mode; we are either converting letter toTitle or
     82         // toLower.
     83         boolean doTitle = true;
     84 
     85         // Determine if there is a preceding context of cased case-ignorable*,
     86         // in which case we want to start in toLower mode.  If the
     87         // prior context is anything else (including empty) then start
     88         // in toTitle mode.
     89         int c, start;
     90         for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF16.getCharCount(c)) {
     91             c = text.char32At(start);
     92             type=csp.getTypeOrIgnorable(c);
     93             if(type>0) { // cased
     94                 doTitle=false;
     95                 break;
     96             } else if(type==0) { // uncased but not ignorable
     97                 break;
     98             }
     99             // else (type<0) case-ignorable: continue
    100         }
    101 
    102         // Convert things after a cased character toLower; things
    103         // after a uncased, non-case-ignorable character toTitle.  Case-ignorable
    104         // characters are copied directly and do not change the mode.
    105 
    106         iter.setText(text);
    107         iter.setIndex(offsets.start);
    108         iter.setLimit(offsets.limit);
    109         iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
    110 
    111         result.setLength(0);
    112 
    113         // Walk through original string
    114         // If there is a case change, modify corresponding position in replaceable
    115         int delta;
    116 
    117         while((c=iter.nextCaseMapCP())>=0) {
    118             type=csp.getTypeOrIgnorable(c);
    119             if(type>=0) { // not case-ignorable
    120                 if(doTitle) {
    121                     c=csp.toFullTitle(c, iter, result, caseLocale);
    122                 } else {
    123                     c=csp.toFullLower(c, iter, result, caseLocale);
    124                 }
    125                 doTitle = type==0; // doTitle=isUncased
    126 
    127                 if(iter.didReachLimit() && isIncremental) {
    128                     // the case mapping function tried to look beyond the context limit
    129                     // wait for more input
    130                     offsets.start=iter.getCaseMapCPStart();
    131                     return;
    132                 }
    133 
    134                 /* decode the result */
    135                 if(c<0) {
    136                     /* c mapped to itself, no change */
    137                     continue;
    138                 } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
    139                     /* replace by the mapping string */
    140                     delta=iter.replace(result.toString());
    141                     result.setLength(0);
    142                 } else {
    143                     /* replace by single-code point mapping */
    144                     delta=iter.replace(UTF16.valueOf(c));
    145                 }
    146 
    147                 if(delta!=0) {
    148                     offsets.limit += delta;
    149                     offsets.contextLimit += delta;
    150                 }
    151             }
    152         }
    153         offsets.start = offsets.limit;
    154     }
    155 
    156     // NOTE: normally this would be static, but because the results vary by locale....
    157     SourceTargetUtility sourceTargetUtility = null;
    158 
    159     /* (non-Javadoc)
    160      * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
    161      */
    162     @Override
    163     public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
    164         synchronized (this) {
    165             if (sourceTargetUtility == null) {
    166                 sourceTargetUtility = new SourceTargetUtility(new Transform<String,String>() {
    167                     @Override
    168                     public String transform(String source) {
    169                         return UCharacter.toTitleCase(locale, source, null);
    170                     }
    171                 });
    172             }
    173         }
    174         sourceTargetUtility.addSourceTargetSet(this, inputFilter, sourceSet, targetSet);
    175     }
    176 }
    177