Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 1996-2011, International Business Machines Corporation and
      3  * others. All Rights Reserved.
      4  *
      5  */
      6 package com.ibm.icu.text;
      7 
      8 import com.ibm.icu.impl.UCaseProps;
      9 import com.ibm.icu.lang.UCharacter;
     10 import com.ibm.icu.util.ULocale;
     11 
     12 /**
     13  * A transliterator that converts all letters (as defined by
     14  * <code>UCharacter.isLetter()</code>) to lower case, except for those
     15  * letters preceded by non-letters.  The latter are converted to title
     16  * case using <code>UCharacter.toTitleCase()</code>.
     17  * @author Alan Liu
     18  */
     19 class TitlecaseTransliterator extends Transliterator {
     20 
     21     static final String _ID = "Any-Title";
     22 
     23     /**
     24      * System registration hook.
     25      */
     26     static void register() {
     27         Transliterator.registerFactory(_ID, new Transliterator.Factory() {
     28             public Transliterator getInstance(String ID) {
     29                 return new TitlecaseTransliterator(ULocale.US);
     30             }
     31         });
     32 
     33         registerSpecialInverse("Title", "Lower", false);
     34     }
     35 
     36     private ULocale locale;
     37 
     38     private UCaseProps csp;
     39     private ReplaceableContextIterator iter;
     40     private StringBuilder result;
     41     private int[] locCache;
     42 
     43    /**
     44      * Constructs a transliterator.
     45      */
     46     public TitlecaseTransliterator(ULocale loc) {
     47         super(_ID, null);
     48         locale = loc;
     49         // Need to look back 2 characters in the case of "can't"
     50         setMaximumContextLength(2);
     51         csp=UCaseProps.INSTANCE;
     52         iter=new ReplaceableContextIterator();
     53         result = new StringBuilder();
     54         locCache = new int[1];
     55         locCache[0]=0;
     56     }
     57 
     58     /**
     59      * Implements {@link Transliterator#handleTransliterate}.
     60      */
     61     protected synchronized void handleTransliterate(Replaceable text,
     62                                        Position offsets, boolean isIncremental) {
     63         // TODO reimplement, see ustrcase.c
     64         // using a real word break iterator
     65         //   instead of just looking for a transition between cased and uncased characters
     66         // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
     67         // needs to take isIncremental into account because case mappings are context-sensitive
     68         //   also detect when lowercasing function did not finish because of context
     69 
     70         if (offsets.start >= offsets.limit) {
     71             return;
     72         }
     73 
     74         // case type: >0 cased (UCaseProps.LOWER etc.)  ==0 uncased  <0 case-ignorable
     75         int type;
     76 
     77         // Our mode; we are either converting letter toTitle or
     78         // toLower.
     79         boolean doTitle = true;
     80 
     81         // Determine if there is a preceding context of cased case-ignorable*,
     82         // in which case we want to start in toLower mode.  If the
     83         // prior context is anything else (including empty) then start
     84         // in toTitle mode.
     85         int c, start;
     86         for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF16.getCharCount(c)) {
     87             c = text.char32At(start);
     88             type=csp.getTypeOrIgnorable(c);
     89             if(type>0) { // cased
     90                 doTitle=false;
     91                 break;
     92             } else if(type==0) { // uncased but not ignorable
     93                 break;
     94             }
     95             // else (type<0) case-ignorable: continue
     96         }
     97 
     98         // Convert things after a cased character toLower; things
     99         // after a uncased, non-case-ignorable character toTitle.  Case-ignorable
    100         // characters are copied directly and do not change the mode.
    101 
    102         iter.setText(text);
    103         iter.setIndex(offsets.start);
    104         iter.setLimit(offsets.limit);
    105         iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
    106 
    107         result.setLength(0);
    108 
    109         // Walk through original string
    110         // If there is a case change, modify corresponding position in replaceable
    111         int delta;
    112 
    113         while((c=iter.nextCaseMapCP())>=0) {
    114             type=csp.getTypeOrIgnorable(c);
    115             if(type>=0) { // not case-ignorable
    116                 if(doTitle) {
    117                     c=csp.toFullTitle(c, iter, result, locale, locCache);
    118                 } else {
    119                     c=csp.toFullLower(c, iter, result, locale, locCache);
    120                 }
    121                 doTitle = type==0; // doTitle=isUncased
    122 
    123                 if(iter.didReachLimit() && isIncremental) {
    124                     // the case mapping function tried to look beyond the context limit
    125                     // wait for more input
    126                     offsets.start=iter.getCaseMapCPStart();
    127                     return;
    128                 }
    129 
    130                 /* decode the result */
    131                 if(c<0) {
    132                     /* c mapped to itself, no change */
    133                     continue;
    134                 } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
    135                     /* replace by the mapping string */
    136                     delta=iter.replace(result.toString());
    137                     result.setLength(0);
    138                 } else {
    139                     /* replace by single-code point mapping */
    140                     delta=iter.replace(UTF16.valueOf(c));
    141                 }
    142 
    143                 if(delta!=0) {
    144                     offsets.limit += delta;
    145                     offsets.contextLimit += delta;
    146                 }
    147             }
    148         }
    149         offsets.start = offsets.limit;
    150     }
    151 
    152     // NOTE: normally this would be static, but because the results vary by locale....
    153     SourceTargetUtility sourceTargetUtility = null;
    154 
    155     /* (non-Javadoc)
    156      * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
    157      */
    158     @Override
    159     public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
    160         synchronized (this) {
    161             if (sourceTargetUtility == null) {
    162                 sourceTargetUtility = new SourceTargetUtility(new Transform<String,String>() {
    163                     public String transform(String source) {
    164                         return UCharacter.toTitleCase(locale, source, null);
    165                     }
    166                 });
    167             }
    168         }
    169         sourceTargetUtility.addSourceTargetSet(this, inputFilter, sourceSet, targetSet);
    170     }
    171 }
    172