1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ***************************************************************** 6 * Copyright (c) 2002-2014, International Business Machines Corporation 7 * and others. All Rights Reserved. 8 ***************************************************************** 9 * Date Name Description 10 * 06/06/2002 aliu Creation. 11 ***************************************************************** 12 */ 13 package android.icu.text; 14 15 import java.util.ArrayList; 16 import java.util.Enumeration; 17 import java.util.HashMap; 18 import java.util.HashSet; 19 import java.util.List; 20 import java.util.MissingResourceException; 21 import java.util.Set; 22 import java.util.concurrent.ConcurrentHashMap; 23 24 import android.icu.lang.UScript; 25 /** 26 * A transliterator that translates multiple input scripts to a single 27 * output script. It is named Any-T or Any-T/V, where T is the target 28 * and V is the optional variant. The target T is a script. 29 * 30 * <p>An AnyTransliterator partitions text into runs of the same 31 * script, together with adjacent COMMON or INHERITED characters. 32 * After determining the script of each run, it transliterates from 33 * that script to the given target/variant. It does so by 34 * instantiating a transliterator from the source script to the 35 * target/variant. If a run consists only of the target script, 36 * COMMON, or INHERITED characters, then the run is not changed. 37 * 38 * <p>At startup, all possible AnyTransliterators are registered with 39 * the system, as determined by examining the registered script 40 * transliterators. 41 * 42 * @author Alan Liu 43 */ 44 class AnyTransliterator extends Transliterator { 45 46 //------------------------------------------------------------ 47 // Constants 48 49 static final char TARGET_SEP = '-'; 50 static final char VARIANT_SEP = '/'; 51 static final String ANY = "Any"; 52 static final String NULL_ID = "Null"; 53 static final String LATIN_PIVOT = "-Latin;Latin-"; 54 55 /** 56 * Cache mapping UScriptCode values to Transliterator*. 57 */ 58 private ConcurrentHashMap<Integer, Transliterator> cache; 59 60 /** 61 * The target or target/variant string. 62 */ 63 private String target; 64 65 /** 66 * The target script code. Never USCRIPT_INVALID_CODE. 67 */ 68 private int targetScript; 69 70 /** 71 * Special code for handling width characters 72 */ 73 private Transliterator widthFix = Transliterator.getInstance("[[:dt=Nar:][:dt=Wide:]] nfkd"); 74 75 /** 76 * Implements {@link Transliterator#handleTransliterate}. 77 */ 78 @Override 79 protected void handleTransliterate(Replaceable text, 80 Position pos, boolean isIncremental) { 81 int allStart = pos.start; 82 int allLimit = pos.limit; 83 84 ScriptRunIterator it = 85 new ScriptRunIterator(text, pos.contextStart, pos.contextLimit); 86 87 while (it.next()) { 88 // Ignore runs in the ante context 89 if (it.limit <= allStart) continue; 90 91 // Try to instantiate transliterator from it.scriptCode to 92 // our target or target/variant 93 Transliterator t = getTransliterator(it.scriptCode); 94 95 if (t == null) { 96 // We have no transliterator. Do nothing, but keep 97 // pos.start up to date. 98 pos.start = it.limit; 99 continue; 100 } 101 102 // If the run end is before the transliteration limit, do 103 // a non-incremental transliteration. Otherwise do an 104 // incremental one. 105 boolean incremental = isIncremental && (it.limit >= allLimit); 106 107 pos.start = Math.max(allStart, it.start); 108 pos.limit = Math.min(allLimit, it.limit); 109 int limit = pos.limit; 110 t.filteredTransliterate(text, pos, incremental); 111 int delta = pos.limit - limit; 112 allLimit += delta; 113 it.adjustLimit(delta); 114 115 // We're done if we enter the post context 116 if (it.limit >= allLimit) break; 117 } 118 119 // Restore limit. pos.start is fine where the last transliterator 120 // left it, or at the end of the last run. 121 pos.limit = allLimit; 122 } 123 124 /** 125 * Private constructor 126 * @param id the ID of the form S-T or S-T/V, where T is theTarget 127 * and V is theVariant. Must not be empty. 128 * @param theTarget the target name. Must not be empty, and must 129 * name a script corresponding to theTargetScript. 130 * @param theVariant the variant name, or the empty string if 131 * there is no variant 132 * @param theTargetScript the script code corresponding to 133 * theTarget. 134 */ 135 private AnyTransliterator(String id, 136 String theTarget, 137 String theVariant, 138 int theTargetScript) { 139 super(id, null); 140 targetScript = theTargetScript; 141 cache = new ConcurrentHashMap<Integer, Transliterator>(); 142 143 target = theTarget; 144 if (theVariant.length() > 0) { 145 target = theTarget + VARIANT_SEP + theVariant; 146 } 147 } 148 149 /** 150 * @param id the ID of the form S-T or S-T/V, where T is theTarget 151 * and V is theVariant. Must not be empty. 152 * @param filter The Unicode filter. 153 * @param target2 the target name. 154 * @param targetScript2 the script code corresponding to theTarget. 155 * @param widthFix2 The Transliterator width fix. 156 * @param cache2 The Map object for cache. 157 */ 158 public AnyTransliterator(String id, UnicodeFilter filter, String target2, 159 int targetScript2, Transliterator widthFix2, ConcurrentHashMap<Integer, Transliterator> cache2) { 160 super(id, filter); 161 targetScript = targetScript2; 162 cache = cache2; 163 target = target2; 164 } 165 166 /** 167 * Returns a transliterator from the given source to our target or 168 * target/variant. Returns NULL if the source is the same as our 169 * target script, or if the source is USCRIPT_INVALID_CODE. 170 * Caches the result and returns the same transliterator the next 171 * time. The caller does NOT own the result and must not delete 172 * it. 173 */ 174 private Transliterator getTransliterator(int source) { 175 if (source == targetScript || source == UScript.INVALID_CODE) { 176 if (isWide(targetScript)) { 177 return null; 178 } else { 179 return widthFix; 180 } 181 } 182 183 Integer key = Integer.valueOf(source); 184 Transliterator t = cache.get(key); 185 if (t == null) { 186 String sourceName = UScript.getName(source); 187 String id = sourceName + TARGET_SEP + target; 188 189 try { 190 t = Transliterator.getInstance(id, FORWARD); 191 } catch (RuntimeException e) { } 192 if (t == null) { 193 194 // Try to pivot around Latin, our most common script 195 id = sourceName + LATIN_PIVOT + target; 196 try { 197 t = Transliterator.getInstance(id, FORWARD); 198 } catch (RuntimeException e) { } 199 } 200 201 if (t != null) { 202 if (!isWide(targetScript)) { 203 List<Transliterator> v = new ArrayList<Transliterator>(); 204 v.add(widthFix); 205 v.add(t); 206 t = new CompoundTransliterator(v); 207 } 208 Transliterator prevCachedT = cache.putIfAbsent(key, t); 209 if (prevCachedT != null) { 210 t = prevCachedT; 211 } 212 } else if (!isWide(targetScript)) { 213 return widthFix; 214 } 215 } 216 217 return t; 218 } 219 220 /** 221 * @param targetScript2 222 * @return 223 */ 224 private boolean isWide(int script) { 225 return script == UScript.BOPOMOFO || script == UScript.HAN || script == UScript.HANGUL || script == UScript.HIRAGANA || script == UScript.KATAKANA; 226 } 227 228 /** 229 * Registers standard transliterators with the system. Called by 230 * Transliterator during initialization. Scan all current targets 231 * and register those that are scripts T as Any-T/V. 232 */ 233 static void register() { 234 235 HashMap<String, Set<String>> seen = new HashMap<String, Set<String>>(); // old code used set, but was dependent on order 236 237 for (Enumeration<String> s = Transliterator.getAvailableSources(); s.hasMoreElements(); ) { 238 String source = s.nextElement(); 239 240 // Ignore the "Any" source 241 if (source.equalsIgnoreCase(ANY)) continue; 242 243 for (Enumeration<String> t = Transliterator.getAvailableTargets(source); 244 t.hasMoreElements(); ) { 245 String target = t.nextElement(); 246 247 // Get the script code for the target. If not a script, ignore. 248 int targetScript = scriptNameToCode(target); 249 if (targetScript == UScript.INVALID_CODE) { 250 continue; 251 } 252 253 Set<String> seenVariants = seen.get(target); 254 if (seenVariants == null) { 255 seen.put(target, seenVariants = new HashSet<String>()); 256 } 257 258 for (Enumeration<String> v = Transliterator.getAvailableVariants(source, target); 259 v.hasMoreElements(); ) { 260 String variant = v.nextElement(); 261 262 // Only process each target/variant pair once 263 if (seenVariants.contains(variant)) { 264 continue; 265 } 266 seenVariants.add(variant); 267 268 String id; 269 id = TransliteratorIDParser.STVtoID(ANY, target, variant); 270 AnyTransliterator trans = new AnyTransliterator(id, target, variant, 271 targetScript); 272 Transliterator.registerInstance(trans); 273 Transliterator.registerSpecialInverse(target, NULL_ID, false); 274 } 275 } 276 } 277 } 278 279 /** 280 * Return the script code for a given name, or 281 * UScript.INVALID_CODE if not found. 282 */ 283 private static int scriptNameToCode(String name) { 284 try{ 285 int[] codes = UScript.getCode(name); 286 return codes != null ? codes[0] : UScript.INVALID_CODE; 287 }catch( MissingResourceException e){ 288 ///CLOVER:OFF 289 return UScript.INVALID_CODE; 290 ///CLOVER:ON 291 } 292 } 293 294 //------------------------------------------------------------ 295 // ScriptRunIterator 296 297 /** 298 * Returns a series of ranges corresponding to scripts. They will be 299 * of the form: 300 * 301 * ccccSScSSccccTTcTcccc - c = common, S = first script, T = second 302 * | | - first run (start, limit) 303 * | | - second run (start, limit) 304 * 305 * That is, the runs will overlap. The reason for this is so that a 306 * transliterator can consider common characters both before and after 307 * the scripts. 308 */ 309 private static class ScriptRunIterator { 310 311 private Replaceable text; 312 private int textStart; 313 private int textLimit; 314 315 /** 316 * The code of the current run, valid after next() returns. May 317 * be UScript.INVALID_CODE if and only if the entire text is 318 * COMMON/INHERITED. 319 */ 320 public int scriptCode; 321 322 /** 323 * The start of the run, inclusive, valid after next() returns. 324 */ 325 public int start; 326 327 /** 328 * The end of the run, exclusive, valid after next() returns. 329 */ 330 public int limit; 331 332 /** 333 * Constructs a run iterator over the given text from start 334 * (inclusive) to limit (exclusive). 335 */ 336 public ScriptRunIterator(Replaceable text, int start, int limit) { 337 this.text = text; 338 this.textStart = start; 339 this.textLimit = limit; 340 this.limit = start; 341 } 342 343 344 /** 345 * Returns TRUE if there are any more runs. TRUE is always 346 * returned at least once. Upon return, the caller should 347 * examine scriptCode, start, and limit. 348 */ 349 public boolean next() { 350 int ch; 351 int s; 352 353 scriptCode = UScript.INVALID_CODE; // don't know script yet 354 start = limit; 355 356 // Are we done? 357 if (start == textLimit) { 358 return false; 359 } 360 361 // Move start back to include adjacent COMMON or INHERITED 362 // characters 363 while (start > textStart) { 364 ch = text.char32At(start - 1); // look back 365 s = UScript.getScript(ch); 366 if (s == UScript.COMMON || s == UScript.INHERITED) { 367 --start; 368 } else { 369 break; 370 } 371 } 372 373 // Move limit ahead to include COMMON, INHERITED, and characters 374 // of the current script. 375 while (limit < textLimit) { 376 ch = text.char32At(limit); // look ahead 377 s = UScript.getScript(ch); 378 if (s != UScript.COMMON && s != UScript.INHERITED) { 379 if (scriptCode == UScript.INVALID_CODE) { 380 scriptCode = s; 381 } else if (s != scriptCode) { 382 break; 383 } 384 } 385 ++limit; 386 } 387 388 // Return TRUE even if the entire text is COMMON / INHERITED, in 389 // which case scriptCode will be UScript.INVALID_CODE. 390 return true; 391 } 392 393 /** 394 * Adjusts internal indices for a change in the limit index of the 395 * given delta. A positive delta means the limit has increased. 396 */ 397 public void adjustLimit(int delta) { 398 limit += delta; 399 textLimit += delta; 400 } 401 } 402 403 /** 404 * Temporary hack for registry problem. Needs to be replaced by better architecture. 405 */ 406 public Transliterator safeClone() { 407 UnicodeFilter filter = getFilter(); 408 if (filter != null && filter instanceof UnicodeSet) { 409 filter = new UnicodeSet((UnicodeSet)filter); 410 } 411 return new AnyTransliterator(getID(), filter, target, targetScript, widthFix, cache); 412 } 413 414 /* (non-Javadoc) 415 * @see android.icu.text.Transliterator#addSourceTargetSet(android.icu.text.UnicodeSet, android.icu.text.UnicodeSet, android.icu.text.UnicodeSet) 416 */ 417 @Override 418 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 419 UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); 420 // Assume that it can modify any character to any other character 421 sourceSet.addAll(myFilter); 422 if (myFilter.size() != 0) { 423 targetSet.addAll(0, 0x10FFFF); 424 } 425 } 426 } 427 428