1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 * Copyright (C) 1996-2011, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 * 7 */ 8 package com.ibm.icu.text; 9 10 import com.ibm.icu.impl.UCaseProps; 11 import com.ibm.icu.lang.UCharacter; 12 import com.ibm.icu.util.ULocale; 13 14 /** 15 * A transliterator that converts all letters (as defined by 16 * <code>UCharacter.isLetter()</code>) to lower case, except for those 17 * letters preceded by non-letters. The latter are converted to title 18 * case using <code>UCharacter.toTitleCase()</code>. 19 * @author Alan Liu 20 */ 21 class TitlecaseTransliterator extends Transliterator { 22 23 static final String _ID = "Any-Title"; 24 // TODO: Add variants for tr/az, lt, default = default locale: ICU ticket #12720 25 26 /** 27 * System registration hook. 28 */ 29 static void register() { 30 Transliterator.registerFactory(_ID, new Transliterator.Factory() { 31 @Override 32 public Transliterator getInstance(String ID) { 33 return new TitlecaseTransliterator(ULocale.US); 34 } 35 }); 36 37 registerSpecialInverse("Title", "Lower", false); 38 } 39 40 private final ULocale locale; 41 42 private final UCaseProps csp; 43 private ReplaceableContextIterator iter; 44 private StringBuilder result; 45 private int caseLocale; 46 47 /** 48 * Constructs a transliterator. 49 */ 50 public TitlecaseTransliterator(ULocale loc) { 51 super(_ID, null); 52 locale = loc; 53 // Need to look back 2 characters in the case of "can't" 54 setMaximumContextLength(2); 55 csp=UCaseProps.INSTANCE; 56 iter=new ReplaceableContextIterator(); 57 result = new StringBuilder(); 58 caseLocale = UCaseProps.getCaseLocale(locale); 59 } 60 61 /** 62 * Implements {@link Transliterator#handleTransliterate}. 63 */ 64 @Override 65 protected synchronized void handleTransliterate(Replaceable text, 66 Position offsets, boolean isIncremental) { 67 // TODO reimplement, see ustrcase.c 68 // using a real word break iterator 69 // instead of just looking for a transition between cased and uncased characters 70 // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap) 71 // needs to take isIncremental into account because case mappings are context-sensitive 72 // also detect when lowercasing function did not finish because of context 73 74 if (offsets.start >= offsets.limit) { 75 return; 76 } 77 78 // case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0 case-ignorable 79 int type; 80 81 // Our mode; we are either converting letter toTitle or 82 // toLower. 83 boolean doTitle = true; 84 85 // Determine if there is a preceding context of cased case-ignorable*, 86 // in which case we want to start in toLower mode. If the 87 // prior context is anything else (including empty) then start 88 // in toTitle mode. 89 int c, start; 90 for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF16.getCharCount(c)) { 91 c = text.char32At(start); 92 type=csp.getTypeOrIgnorable(c); 93 if(type>0) { // cased 94 doTitle=false; 95 break; 96 } else if(type==0) { // uncased but not ignorable 97 break; 98 } 99 // else (type<0) case-ignorable: continue 100 } 101 102 // Convert things after a cased character toLower; things 103 // after a uncased, non-case-ignorable character toTitle. Case-ignorable 104 // characters are copied directly and do not change the mode. 105 106 iter.setText(text); 107 iter.setIndex(offsets.start); 108 iter.setLimit(offsets.limit); 109 iter.setContextLimits(offsets.contextStart, offsets.contextLimit); 110 111 result.setLength(0); 112 113 // Walk through original string 114 // If there is a case change, modify corresponding position in replaceable 115 int delta; 116 117 while((c=iter.nextCaseMapCP())>=0) { 118 type=csp.getTypeOrIgnorable(c); 119 if(type>=0) { // not case-ignorable 120 if(doTitle) { 121 c=csp.toFullTitle(c, iter, result, caseLocale); 122 } else { 123 c=csp.toFullLower(c, iter, result, caseLocale); 124 } 125 doTitle = type==0; // doTitle=isUncased 126 127 if(iter.didReachLimit() && isIncremental) { 128 // the case mapping function tried to look beyond the context limit 129 // wait for more input 130 offsets.start=iter.getCaseMapCPStart(); 131 return; 132 } 133 134 /* decode the result */ 135 if(c<0) { 136 /* c mapped to itself, no change */ 137 continue; 138 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 139 /* replace by the mapping string */ 140 delta=iter.replace(result.toString()); 141 result.setLength(0); 142 } else { 143 /* replace by single-code point mapping */ 144 delta=iter.replace(UTF16.valueOf(c)); 145 } 146 147 if(delta!=0) { 148 offsets.limit += delta; 149 offsets.contextLimit += delta; 150 } 151 } 152 } 153 offsets.start = offsets.limit; 154 } 155 156 // NOTE: normally this would be static, but because the results vary by locale.... 157 SourceTargetUtility sourceTargetUtility = null; 158 159 /* (non-Javadoc) 160 * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet) 161 */ 162 @Override 163 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 164 synchronized (this) { 165 if (sourceTargetUtility == null) { 166 sourceTargetUtility = new SourceTargetUtility(new Transform<String,String>() { 167 @Override 168 public String transform(String source) { 169 return UCharacter.toTitleCase(locale, source, null); 170 } 171 }); 172 } 173 } 174 sourceTargetUtility.addSourceTargetSet(this, inputFilter, sourceSet, targetSet); 175 } 176 } 177