1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package android.icu.text; 11 12 import java.text.MessageFormat; 13 import java.util.ArrayList; 14 import java.util.Collections; 15 import java.util.Enumeration; 16 import java.util.HashMap; 17 import java.util.List; 18 import java.util.Locale; 19 import java.util.Map; 20 import java.util.MissingResourceException; 21 22 import android.icu.impl.ICUData; 23 import android.icu.impl.ICUResourceBundle; 24 import android.icu.impl.Utility; 25 import android.icu.impl.UtilityExtensions; 26 import android.icu.text.RuleBasedTransliterator.Data; 27 import android.icu.text.TransliteratorIDParser.SingleID; 28 import android.icu.util.CaseInsensitiveString; 29 import android.icu.util.ULocale; 30 import android.icu.util.ULocale.Category; 31 import android.icu.util.UResourceBundle; 32 33 /** 34 * <code>Transliterator</code> is an abstract class that transliterates text from one format to another. The most common 35 * kind of transliterator is a script, or alphabet, transliterator. For example, a Russian to Latin transliterator 36 * changes Russian text written in Cyrillic characters to phonetically equivalent Latin characters. It does not 37 * <em>translate</em> Russian to English! Transliteration, unlike translation, operates on characters, without reference 38 * to the meanings of words and sentences. 39 * 40 * <p> 41 * Although script conversion is its most common use, a transliterator can actually perform a more general class of 42 * tasks. In fact, <code>Transliterator</code> defines a very general API which specifies only that a segment of the 43 * input text is replaced by new text. The particulars of this conversion are determined entirely by subclasses of 44 * <code>Transliterator</code>. 45 * 46 * <p> 47 * <b>Transliterators are stateless</b> 48 * 49 * <p> 50 * <code>Transliterator</code> objects are <em>stateless</em>; they retain no information between calls to 51 * <code>transliterate()</code>. As a result, threads may share transliterators without synchronizing them. This might 52 * seem to limit the complexity of the transliteration operation. In practice, subclasses perform complex 53 * transliterations by delaying the replacement of text until it is known that no other replacements are possible. In 54 * other words, although the <code>Transliterator</code> objects are stateless, the source text itself embodies all the 55 * needed information, and delayed operation allows arbitrary complexity. 56 * 57 * <p> 58 * <b>Batch transliteration</b> 59 * 60 * <p> 61 * The simplest way to perform transliteration is all at once, on a string of existing text. This is referred to as 62 * <em>batch</em> transliteration. For example, given a string <code>input</code> and a transliterator <code>t</code>, 63 * the call 64 * 65 * <blockquote><code>String result = t.transliterate(input); 66 * </code></blockquote> 67 * 68 * will transliterate it and return the result. Other methods allow the client to specify a substring to be 69 * transliterated and to use {@link Replaceable} objects instead of strings, in order to preserve out-of-band 70 * information (such as text styles). 71 * 72 * <p> 73 * <b>Keyboard transliteration</b> 74 * 75 * <p> 76 * Somewhat more involved is <em>keyboard</em>, or incremental transliteration. This is the transliteration of text that 77 * is arriving from some source (typically the user's keyboard) one character at a time, or in some other piecemeal 78 * fashion. 79 * 80 * <p> 81 * In keyboard transliteration, a <code>Replaceable</code> buffer stores the text. As text is inserted, as much as 82 * possible is transliterated on the fly. This means a GUI that displays the contents of the buffer may show text being 83 * modified as each new character arrives. 84 * 85 * <p> 86 * Consider the simple <code>RuleBasedTransliterator</code>: 87 * 88 * <blockquote><code> 89 * th>{theta}<br> 90 * t>{tau} 91 * </code></blockquote> 92 * 93 * When the user types 't', nothing will happen, since the transliterator is waiting to see if the next character is 94 * 'h'. To remedy this, we introduce the notion of a cursor, marked by a '|' in the output string: 95 * 96 * <blockquote><code> 97 * t>|{tau}<br> 98 * {tau}h>{theta} 99 * </code></blockquote> 100 * 101 * Now when the user types 't', tau appears, and if the next character is 'h', the tau changes to a theta. This is 102 * accomplished by maintaining a cursor position (independent of the insertion point, and invisible in the GUI) across 103 * calls to <code>transliterate()</code>. Typically, the cursor will be coincident with the insertion point, but in a 104 * case like the one above, it will precede the insertion point. 105 * 106 * <p> 107 * Keyboard transliteration methods maintain a set of three indices that are updated with each call to 108 * <code>transliterate()</code>, including the cursor, start, and limit. These indices are changed by the method, and 109 * they are passed in and out via a Position object. The <code>start</code> index marks the beginning of the substring 110 * that the transliterator will look at. It is advanced as text becomes committed (but it is not the committed index; 111 * that's the <code>cursor</code>). The <code>cursor</code> index, described above, marks the point at which the 112 * transliterator last stopped, either because it reached the end, or because it required more characters to 113 * disambiguate between possible inputs. The <code>cursor</code> can also be explicitly set by rules in a 114 * <code>RuleBasedTransliterator</code>. Any characters before the <code>cursor</code> index are frozen; future keyboard 115 * transliteration calls within this input sequence will not change them. New text is inserted at the <code>limit</code> 116 * index, which marks the end of the substring that the transliterator looks at. 117 * 118 * <p> 119 * Because keyboard transliteration assumes that more characters are to arrive, it is conservative in its operation. It 120 * only transliterates when it can do so unambiguously. Otherwise it waits for more characters to arrive. When the 121 * client code knows that no more characters are forthcoming, perhaps because the user has performed some input 122 * termination operation, then it should call <code>finishTransliteration()</code> to complete any pending 123 * transliterations. 124 * 125 * <p> 126 * <b>Inverses</b> 127 * 128 * <p> 129 * Pairs of transliterators may be inverses of one another. For example, if transliterator <b>A</b> transliterates 130 * characters by incrementing their Unicode value (so "abc" -> "def"), and transliterator <b>B</b> decrements character 131 * values, then <b>A</b> is an inverse of <b>B</b> and vice versa. If we compose <b>A</b> with <b>B</b> in a compound 132 * transliterator, the result is the indentity transliterator, that is, a transliterator that does not change its input 133 * text. 134 * 135 * The <code>Transliterator</code> method <code>getInverse()</code> returns a transliterator's inverse, if one exists, 136 * or <code>null</code> otherwise. However, the result of <code>getInverse()</code> usually will <em>not</em> be a true 137 * mathematical inverse. This is because true inverse transliterators are difficult to formulate. For example, consider 138 * two transliterators: <b>AB</b>, which transliterates the character 'A' to 'B', and <b>BA</b>, which transliterates 139 * 'B' to 'A'. It might seem that these are exact inverses, since 140 * 141 * <blockquote>"A" x <b>AB</b> -> "B"<br> 142 * "B" x <b>BA</b> -> "A"</blockquote> 143 * 144 * where 'x' represents transliteration. However, 145 * 146 * <blockquote>"ABCD" x <b>AB</b> -> "BBCD"<br> 147 * "BBCD" x <b>BA</b> -> "AACD"</blockquote> 148 * 149 * so <b>AB</b> composed with <b>BA</b> is not the identity. Nonetheless, <b>BA</b> may be usefully considered to be 150 * <b>AB</b>'s inverse, and it is on this basis that <b>AB</b><code>.getInverse()</code> could legitimately return 151 * <b>BA</b>. 152 * 153 * <p> 154 * <b>Filtering</b> 155 * <p>Each transliterator has a filter, which restricts changes to those characters selected by the filter. The 156 * filter affects just the characters that are changed -- the characters outside of the filter are still part of the 157 * context for the filter. For example, in the following even though 'x' is filtered out, and doesn't convert to y, it does affect the conversion of 'a'. 158 * 159 * <pre> 160 * String rules = "x > y; x{a} > b; "; 161 * Transliterator tempTrans = Transliterator.createFromRules("temp", rules, Transliterator.FORWARD); 162 * tempTrans.setFilter(new UnicodeSet("[a]")); 163 * String tempResult = tempTrans.transform("xa"); 164 * // results in "xb" 165 *</pre> 166 * <p> 167 * <b>IDs and display names</b> 168 * 169 * <p> 170 * A transliterator is designated by a short identifier string or <em>ID</em>. IDs follow the format 171 * <em>source-destination</em>, where <em>source</em> describes the entity being replaced, and <em>destination</em> 172 * describes the entity replacing <em>source</em>. The entities may be the names of scripts, particular sequences of 173 * characters, or whatever else it is that the transliterator converts to or from. For example, a transliterator from 174 * Russian to Latin might be named "Russian-Latin". A transliterator from keyboard escape sequences to Latin-1 175 * characters might be named "KeyboardEscape-Latin1". By convention, system entity names are in English, with the 176 * initial letters of words capitalized; user entity names may follow any format so long as they do not contain dashes. 177 * 178 * <p> 179 * In addition to programmatic IDs, transliterator objects have display names for presentation in user interfaces, 180 * returned by {@link #getDisplayName}. 181 * 182 * <p> 183 * <b>Factory methods and registration</b> 184 * 185 * <p> 186 * In general, client code should use the factory method <code>getInstance()</code> to obtain an instance of a 187 * transliterator given its ID. Valid IDs may be enumerated using <code>getAvailableIDs()</code>. Since transliterators 188 * are stateless, multiple calls to <code>getInstance()</code> with the same ID will return the same object. 189 * 190 * <p> 191 * In addition to the system transliterators registered at startup, user transliterators may be registered by calling 192 * <code>registerInstance()</code> at run time. To register a transliterator subclass without instantiating it (until it 193 * is needed), users may call <code>registerClass()</code>. 194 * 195 * <p> 196 * <b>Composed transliterators</b> 197 * 198 * <p> 199 * In addition to built-in system transliterators like "Latin-Greek", there are also built-in <em>composed</em> 200 * transliterators. These are implemented by composing two or more component transliterators. For example, if we have 201 * scripts "A", "B", "C", and "D", and we want to transliterate between all pairs of them, then we need to write 12 202 * transliterators: "A-B", "A-C", "A-D", "B-A",..., "D-A", "D-B", "D-C". If it is possible to convert all scripts to an 203 * intermediate script "M", then instead of writing 12 rule sets, we only need to write 8: "A~M", "B~M", "C~M", "D~M", 204 * "M~A", "M~B", "M~C", "M~D". (This might not seem like a big win, but it's really 2<em>n</em> vs. <em>n</em> 205 * <sup>2</sup> - <em>n</em>, so as <em>n</em> gets larger the gain becomes significant. With 9 scripts, it's 18 vs. 72 206 * rule sets, a big difference.) Note the use of "~" rather than "-" for the script separator here; this indicates that 207 * the given transliterator is intended to be composed with others, rather than be used as is. 208 * 209 * <p> 210 * Composed transliterators can be instantiated as usual. For example, the system transliterator "Devanagari-Gujarati" 211 * is a composed transliterator built internally as "Devanagari~InterIndic;InterIndic~Gujarati". When this 212 * transliterator is instantiated, it appears externally to be a standard transliterator (e.g., getID() returns 213 * "Devanagari-Gujarati"). 214 * 215 * <p> 216 * <b>Subclassing</b> 217 * 218 * <p> 219 * Subclasses must implement the abstract method <code>handleTransliterate()</code>. 220 * <p> 221 * Subclasses should override the <code>transliterate()</code> method taking a <code>Replaceable</code> and the 222 * <code>transliterate()</code> method taking a <code>String</code> and <code>StringBuffer</code> if the performance of 223 * these methods can be improved over the performance obtained by the default implementations in this class. 224 * 225 * @author Alan Liu 226 * @hide Only a subset of ICU is exposed in Android 227 */ 228 public abstract class Transliterator implements StringTransform { 229 /** 230 * Direction constant indicating the forward direction in a transliterator, 231 * e.g., the forward rules of a RuleBasedTransliterator. An "A-B" 232 * transliterator transliterates A to B when operating in the forward 233 * direction, and B to A when operating in the reverse direction. 234 */ 235 public static final int FORWARD = 0; 236 237 /** 238 * Direction constant indicating the reverse direction in a transliterator, 239 * e.g., the reverse rules of a RuleBasedTransliterator. An "A-B" 240 * transliterator transliterates A to B when operating in the forward 241 * direction, and B to A when operating in the reverse direction. 242 */ 243 public static final int REVERSE = 1; 244 245 /** 246 * Position structure for incremental transliteration. This data 247 * structure defines two substrings of the text being 248 * transliterated. The first region, [contextStart, 249 * contextLimit), defines what characters the transliterator will 250 * read as context. The second region, [start, limit), defines 251 * what characters will actually be transliterated. The second 252 * region should be a subset of the first. 253 * 254 * <p>After a transliteration operation, some of the indices in this 255 * structure will be modified. See the field descriptions for 256 * details. 257 * 258 * <p>contextStart <= start <= limit <= contextLimit 259 * 260 * <p>Note: All index values in this structure must be at code point 261 * boundaries. That is, none of them may occur between two code units 262 * of a surrogate pair. If any index does split a surrogate pair, 263 * results are unspecified. 264 */ 265 public static class Position { 266 267 /** 268 * Beginning index, inclusive, of the context to be considered for 269 * a transliteration operation. The transliterator will ignore 270 * anything before this index. INPUT/OUTPUT parameter: This parameter 271 * is updated by a transliteration operation to reflect the maximum 272 * amount of antecontext needed by a transliterator. 273 */ 274 public int contextStart; 275 276 /** 277 * Ending index, exclusive, of the context to be considered for a 278 * transliteration operation. The transliterator will ignore 279 * anything at or after this index. INPUT/OUTPUT parameter: This 280 * parameter is updated to reflect changes in the length of the 281 * text, but points to the same logical position in the text. 282 */ 283 public int contextLimit; 284 285 /** 286 * Beginning index, inclusive, of the text to be transliteratd. 287 * INPUT/OUTPUT parameter: This parameter is advanced past 288 * characters that have already been transliterated by a 289 * transliteration operation. 290 */ 291 public int start; 292 293 /** 294 * Ending index, exclusive, of the text to be transliteratd. 295 * INPUT/OUTPUT parameter: This parameter is updated to reflect 296 * changes in the length of the text, but points to the same 297 * logical position in the text. 298 */ 299 public int limit; 300 301 /** 302 * Constructs a Position object with start, limit, 303 * contextStart, and contextLimit all equal to zero. 304 */ 305 public Position() { 306 this(0, 0, 0, 0); 307 } 308 309 /** 310 * Constructs a Position object with the given start, 311 * contextStart, and contextLimit. The limit is set to the 312 * contextLimit. 313 */ 314 public Position(int contextStart, int contextLimit, int start) { 315 this(contextStart, contextLimit, start, contextLimit); 316 } 317 318 /** 319 * Constructs a Position object with the given start, limit, 320 * contextStart, and contextLimit. 321 */ 322 public Position(int contextStart, int contextLimit, 323 int start, int limit) { 324 this.contextStart = contextStart; 325 this.contextLimit = contextLimit; 326 this.start = start; 327 this.limit = limit; 328 } 329 330 /** 331 * Constructs a Position object that is a copy of another. 332 */ 333 public Position(Position pos) { 334 set(pos); 335 } 336 337 /** 338 * Copies the indices of this position from another. 339 */ 340 public void set(Position pos) { 341 contextStart = pos.contextStart; 342 contextLimit = pos.contextLimit; 343 start = pos.start; 344 limit = pos.limit; 345 } 346 347 /** 348 * Returns true if this Position is equal to the given object. 349 */ 350 @Override 351 public boolean equals(Object obj) { 352 if (obj instanceof Position) { 353 Position pos = (Position) obj; 354 return contextStart == pos.contextStart && 355 contextLimit == pos.contextLimit && 356 start == pos.start && 357 limit == pos.limit; 358 } 359 return false; 360 } 361 362 /** 363 * Mock implementation of hashCode(). This implementation always returns a constant 364 * value. When Java assertion is enabled, this method triggers an assertion failure. 365 * @deprecated This API is ICU internal only. 366 * @hide draft / provisional / internal are hidden on Android 367 */ 368 @Override 369 @Deprecated 370 public int hashCode() { 371 assert false : "hashCode not designed"; 372 return 42; 373 } 374 375 /** 376 * Returns a string representation of this Position. 377 */ 378 @Override 379 public String toString() { 380 return "[cs=" + contextStart 381 + ", s=" + start 382 + ", l=" + limit 383 + ", cl=" + contextLimit 384 + "]"; 385 } 386 387 /** 388 * Check all bounds. If they are invalid, throw an exception. 389 * @param length the length of the string this object applies to 390 * @exception IllegalArgumentException if any indices are out 391 * of bounds 392 */ 393 public final void validate(int length) { 394 if (contextStart < 0 || 395 start < contextStart || 396 limit < start || 397 contextLimit < limit || 398 length < contextLimit) { 399 throw new IllegalArgumentException("Invalid Position {cs=" + 400 contextStart + ", s=" + 401 start + ", l=" + 402 limit + ", cl=" + 403 contextLimit + "}, len=" + 404 length); 405 } 406 } 407 } 408 409 /** 410 * Programmatic name, e.g., "Latin-Arabic". 411 */ 412 private String ID; 413 414 /** 415 * This transliterator's filter. Any character for which 416 * <tt>filter.contains()</tt> returns <tt>false</tt> will not be 417 * altered by this transliterator. If <tt>filter</tt> is 418 * <tt>null</tt> then no filtering is applied. 419 */ 420 private UnicodeSet filter; 421 422 private int maximumContextLength = 0; 423 424 /** 425 * System transliterator registry. 426 */ 427 private static TransliteratorRegistry registry; 428 429 private static Map<CaseInsensitiveString, String> displayNameCache; 430 431 /** 432 * Prefix for resource bundle key for the display name for a 433 * transliterator. The ID is appended to this to form the key. 434 * The resource bundle value should be a String. 435 */ 436 private static final String RB_DISPLAY_NAME_PREFIX = "%Translit%%"; 437 438 /** 439 * Prefix for resource bundle key for the display name for a 440 * transliterator SCRIPT. The ID is appended to this to form the key. 441 * The resource bundle value should be a String. 442 */ 443 private static final String RB_SCRIPT_DISPLAY_NAME_PREFIX = "%Translit%"; 444 445 /** 446 * Resource bundle key for display name pattern. 447 * The resource bundle value should be a String forming a 448 * MessageFormat pattern, e.g.: 449 * "{0,choice,0#|1#{1} Transliterator|2#{1} to {2} Transliterator}". 450 */ 451 private static final String RB_DISPLAY_NAME_PATTERN = "TransliteratorNamePattern"; 452 453 /** 454 * Delimiter between elements in a compound ID. 455 */ 456 static final char ID_DELIM = ';'; 457 458 /** 459 * Delimiter before target in an ID. 460 */ 461 static final char ID_SEP = '-'; 462 463 /** 464 * Delimiter before variant in an ID. 465 */ 466 static final char VARIANT_SEP = '/'; 467 468 /** 469 * To enable debugging output in the Transliterator component, set 470 * DEBUG to true. 471 * 472 * N.B. Make sure to recompile all of the android.icu.text package 473 * after changing this. Easiest way to do this is 'ant clean 474 * core' ('ant' will NOT pick up the dependency automatically). 475 * 476 * <<This generates a lot of output.>> 477 */ 478 static final boolean DEBUG = false; 479 480 /** 481 * Default constructor. 482 * @param ID the string identifier for this transliterator 483 * @param filter the filter. Any character for which 484 * <tt>filter.contains()</tt> returns <tt>false</tt> will not be 485 * altered by this transliterator. If <tt>filter</tt> is 486 * <tt>null</tt> then no filtering is applied. 487 */ 488 protected Transliterator(String ID, UnicodeFilter filter) { 489 if (ID == null) { 490 throw new NullPointerException(); 491 } 492 this.ID = ID; 493 setFilter(filter); 494 } 495 496 /** 497 * Transliterates a segment of a string, with optional filtering. 498 * 499 * @param text the string to be transliterated 500 * @param start the beginning index, inclusive; <code>0 <= start 501 * <= limit</code>. 502 * @param limit the ending index, exclusive; <code>start <= limit 503 * <= text.length()</code>. 504 * @return The new limit index. The text previously occupying <code>[start, 505 * limit)</code> has been transliterated, possibly to a string of a different 506 * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where 507 * <em>new-limit</em> is the return value. If the input offsets are out of bounds, 508 * the returned value is -1 and the input string remains unchanged. 509 */ 510 public final int transliterate(Replaceable text, int start, int limit) { 511 if (start < 0 || 512 limit < start || 513 text.length() < limit) { 514 return -1; 515 } 516 517 Position pos = new Position(start, limit, start); 518 filteredTransliterate(text, pos, false, true); 519 return pos.limit; 520 } 521 522 /** 523 * Transliterates an entire string in place. Convenience method. 524 * @param text the string to be transliterated 525 */ 526 public final void transliterate(Replaceable text) { 527 transliterate(text, 0, text.length()); 528 } 529 530 /** 531 * Transliterate an entire string and returns the result. Convenience method. 532 * 533 * @param text the string to be transliterated 534 * @return The transliterated text 535 */ 536 public final String transliterate(String text) { 537 ReplaceableString result = new ReplaceableString(text); 538 transliterate(result); 539 return result.toString(); 540 } 541 542 /** 543 * Transliterates the portion of the text buffer that can be 544 * transliterated unambiguosly after new text has been inserted, 545 * typically as a result of a keyboard event. The new text in 546 * <code>insertion</code> will be inserted into <code>text</code> 547 * at <code>index.contextLimit</code>, advancing 548 * <code>index.contextLimit</code> by <code>insertion.length()</code>. 549 * Then the transliterator will try to transliterate characters of 550 * <code>text</code> between <code>index.start</code> and 551 * <code>index.contextLimit</code>. Characters before 552 * <code>index.start</code> will not be changed. 553 * 554 * <p>Upon return, values in <code>index</code> will be updated. 555 * <code>index.contextStart</code> will be advanced to the first 556 * character that future calls to this method will read. 557 * <code>index.start</code> and <code>index.contextLimit</code> will 558 * be adjusted to delimit the range of text that future calls to 559 * this method may change. 560 * 561 * <p>Typical usage of this method begins with an initial call 562 * with <code>index.contextStart</code> and <code>index.contextLimit</code> 563 * set to indicate the portion of <code>text</code> to be 564 * transliterated, and <code>index.start == index.contextStart</code>. 565 * Thereafter, <code>index</code> can be used without 566 * modification in future calls, provided that all changes to 567 * <code>text</code> are made via this method. 568 * 569 * <p>This method assumes that future calls may be made that will 570 * insert new text into the buffer. As a result, it only performs 571 * unambiguous transliterations. After the last call to this 572 * method, there may be untransliterated text that is waiting for 573 * more input to resolve an ambiguity. In order to perform these 574 * pending transliterations, clients should call {@link 575 * #finishTransliteration} after the last call to this 576 * method has been made. 577 * 578 * @param text the buffer holding transliterated and untransliterated text 579 * @param index the start and limit of the text, the position 580 * of the cursor, and the start and limit of transliteration. 581 * @param insertion text to be inserted and possibly 582 * transliterated into the translation buffer at 583 * <code>index.contextLimit</code>. If <code>null</code> then no text 584 * is inserted. 585 * @see #handleTransliterate 586 * @exception IllegalArgumentException if <code>index</code> 587 * is invalid 588 */ 589 public final void transliterate(Replaceable text, Position index, 590 String insertion) { 591 index.validate(text.length()); 592 593 // int originalStart = index.contextStart; 594 if (insertion != null) { 595 text.replace(index.limit, index.limit, insertion); 596 index.limit += insertion.length(); 597 index.contextLimit += insertion.length(); 598 } 599 600 if (index.limit > 0 && 601 UTF16.isLeadSurrogate(text.charAt(index.limit - 1))) { 602 // Oops, there is a dangling lead surrogate in the buffer. 603 // This will break most transliterators, since they will 604 // assume it is part of a pair. Don't transliterate until 605 // more text comes in. 606 return; 607 } 608 609 filteredTransliterate(text, index, true, true); 610 611 // TODO 612 // This doesn't work once we add quantifier support. Need to rewrite 613 // this code to support quantifiers and 'use maximum backup <n>;'. 614 // 615 // index.contextStart = Math.max(index.start - getMaximumContextLength(), 616 // originalStart); 617 } 618 619 /** 620 * Transliterates the portion of the text buffer that can be 621 * transliterated unambiguosly after a new character has been 622 * inserted, typically as a result of a keyboard event. This is a 623 * convenience method; see {@link #transliterate(Replaceable, 624 * Transliterator.Position, String)} for details. 625 * @param text the buffer holding transliterated and 626 * untransliterated text 627 * @param index the start and limit of the text, the position 628 * of the cursor, and the start and limit of transliteration. 629 * @param insertion text to be inserted and possibly 630 * transliterated into the translation buffer at 631 * <code>index.contextLimit</code>. 632 * @see #transliterate(Replaceable, Transliterator.Position, String) 633 */ 634 public final void transliterate(Replaceable text, Position index, 635 int insertion) { 636 transliterate(text, index, UTF16.valueOf(insertion)); 637 } 638 639 /** 640 * Transliterates the portion of the text buffer that can be 641 * transliterated unambiguosly. This is a convenience method; see 642 * {@link #transliterate(Replaceable, Transliterator.Position, 643 * String)} for details. 644 * @param text the buffer holding transliterated and 645 * untransliterated text 646 * @param index the start and limit of the text, the position 647 * of the cursor, and the start and limit of transliteration. 648 * @see #transliterate(Replaceable, Transliterator.Position, String) 649 */ 650 public final void transliterate(Replaceable text, Position index) { 651 transliterate(text, index, null); 652 } 653 654 /** 655 * Finishes any pending transliterations that were waiting for 656 * more characters. Clients should call this method as the last 657 * call after a sequence of one or more calls to 658 * <code>transliterate()</code>. 659 * @param text the buffer holding transliterated and 660 * untransliterated text. 661 * @param index the array of indices previously passed to {@link 662 * #transliterate} 663 */ 664 public final void finishTransliteration(Replaceable text, 665 Position index) { 666 index.validate(text.length()); 667 filteredTransliterate(text, index, false, true); 668 } 669 670 /** 671 * Abstract method that concrete subclasses define to implement 672 * their transliteration algorithm. This method handles both 673 * incremental and non-incremental transliteration. Let 674 * <code>originalStart</code> refer to the value of 675 * <code>pos.start</code> upon entry. 676 * 677 * <ul> 678 * <li>If <code>incremental</code> is false, then this method 679 * should transliterate all characters between 680 * <code>pos.start</code> and <code>pos.limit</code>. Upon return 681 * <code>pos.start</code> must == <code> pos.limit</code>.</li> 682 * 683 * <li>If <code>incremental</code> is true, then this method 684 * should transliterate all characters between 685 * <code>pos.start</code> and <code>pos.limit</code> that can be 686 * unambiguously transliterated, regardless of future insertions 687 * of text at <code>pos.limit</code>. Upon return, 688 * <code>pos.start</code> should be in the range 689 * [<code>originalStart</code>, <code>pos.limit</code>). 690 * <code>pos.start</code> should be positioned such that 691 * characters [<code>originalStart</code>, <code> 692 * pos.start</code>) will not be changed in the future by this 693 * transliterator and characters [<code>pos.start</code>, 694 * <code>pos.limit</code>) are unchanged.</li> 695 * </ul> 696 * 697 * <p>Implementations of this method should also obey the 698 * following invariants:</p> 699 * 700 * <ul> 701 * <li> <code>pos.limit</code> and <code>pos.contextLimit</code> 702 * should be updated to reflect changes in length of the text 703 * between <code>pos.start</code> and <code>pos.limit</code>. The 704 * difference <code> pos.contextLimit - pos.limit</code> should 705 * not change.</li> 706 * 707 * <li><code>pos.contextStart</code> should not change.</li> 708 * 709 * <li>Upon return, neither <code>pos.start</code> nor 710 * <code>pos.limit</code> should be less than 711 * <code>originalStart</code>.</li> 712 * 713 * <li>Text before <code>originalStart</code> and text after 714 * <code>pos.limit</code> should not change.</li> 715 * 716 * <li>Text before <code>pos.contextStart</code> and text after 717 * <code> pos.contextLimit</code> should be ignored.</li> 718 * </ul> 719 * 720 * <p>Subclasses may safely assume that all characters in 721 * [<code>pos.start</code>, <code>pos.limit</code>) are filtered. 722 * In other words, the filter has already been applied by the time 723 * this method is called. See 724 * <code>filteredTransliterate()</code>. 725 * 726 * <p>This method is <b>not</b> for public consumption. Calling 727 * this method directly will transliterate 728 * [<code>pos.start</code>, <code>pos.limit</code>) without 729 * applying the filter. End user code should call <code> 730 * transliterate()</code> instead of this method. Subclass code 731 * should call <code>filteredTransliterate()</code> instead of 732 * this method.<p> 733 * 734 * @param text the buffer holding transliterated and 735 * untransliterated text 736 * 737 * @param pos the indices indicating the start, limit, context 738 * start, and context limit of the text. 739 * 740 * @param incremental if true, assume more text may be inserted at 741 * <code>pos.limit</code> and act accordingly. Otherwise, 742 * transliterate all text between <code>pos.start</code> and 743 * <code>pos.limit</code> and move <code>pos.start</code> up to 744 * <code>pos.limit</code>. 745 * 746 * @see #transliterate 747 */ 748 protected abstract void handleTransliterate(Replaceable text, 749 Position pos, boolean incremental); 750 751 /** 752 * Top-level transliteration method, handling filtering, incremental and 753 * non-incremental transliteration, and rollback. All transliteration 754 * public API methods eventually call this method with a rollback argument 755 * of TRUE. Other entities may call this method but rollback should be 756 * FALSE. 757 * 758 * <p>If this transliterator has a filter, break up the input text into runs 759 * of unfiltered characters. Pass each run to 760 * <subclass>.handleTransliterate(). 761 * 762 * <p>In incremental mode, if rollback is TRUE, perform a special 763 * incremental procedure in which several passes are made over the input 764 * text, adding one character at a time, and committing successful 765 * transliterations as they occur. Unsuccessful transliterations are rolled 766 * back and retried with additional characters to give correct results. 767 * 768 * @param text the text to be transliterated 769 * @param index the position indices 770 * @param incremental if TRUE, then assume more characters may be inserted 771 * at index.limit, and postpone processing to accomodate future incoming 772 * characters 773 * @param rollback if TRUE and if incremental is TRUE, then perform special 774 * incremental processing, as described above, and undo partial 775 * transliterations where necessary. If incremental is FALSE then this 776 * parameter is ignored. 777 */ 778 private void filteredTransliterate(Replaceable text, 779 Position index, 780 boolean incremental, 781 boolean rollback) { 782 // Short circuit path for transliterators with no filter in 783 // non-incremental mode. 784 if (filter == null && !rollback) { 785 handleTransliterate(text, index, incremental); 786 return; 787 } 788 789 //---------------------------------------------------------------------- 790 // This method processes text in two groupings: 791 // 792 // RUNS -- A run is a contiguous group of characters which are contained 793 // in the filter for this transliterator (filter.contains(ch) == true). 794 // Text outside of runs may appear as context but it is not modified. 795 // The start and limit Position values are narrowed to each run. 796 // 797 // PASSES (incremental only) -- To make incremental mode work correctly, 798 // each run is broken up into n passes, where n is the length (in code 799 // points) of the run. Each pass contains the first n characters. If a 800 // pass is completely transliterated, it is committed, and further passes 801 // include characters after the committed text. If a pass is blocked, 802 // and does not transliterate completely, then this method rolls back 803 // the changes made during the pass, extends the pass by one code point, 804 // and tries again. 805 //---------------------------------------------------------------------- 806 807 // globalLimit is the limit value for the entire operation. We 808 // set index.limit to the end of each unfiltered run before 809 // calling handleTransliterate(), so we need to maintain the real 810 // value of index.limit here. After each transliteration, we 811 // update globalLimit for insertions or deletions that have 812 // happened. 813 int globalLimit = index.limit; 814 815 // If there is a non-null filter, then break the input text up. Say the 816 // input text has the form: 817 // xxxabcxxdefxx 818 // where 'x' represents a filtered character (filter.contains('x') == 819 // false). Then we break this up into: 820 // xxxabc xxdef xx 821 // Each pass through the loop consumes a run of filtered 822 // characters (which are ignored) and a subsequent run of 823 // unfiltered characters (which are transliterated). 824 825 StringBuffer log = null; 826 if (DEBUG) { 827 log = new StringBuffer(); 828 } 829 830 for (;;) { 831 832 if (filter != null) { 833 // Narrow the range to be transliterated to the first run 834 // of unfiltered characters at or after index.start. 835 836 // Advance past filtered chars 837 int c; 838 while (index.start < globalLimit && 839 !filter.contains(c=text.char32At(index.start))) { 840 index.start += UTF16.getCharCount(c); 841 } 842 843 // Find the end of this run of unfiltered chars 844 index.limit = index.start; 845 while (index.limit < globalLimit && 846 filter.contains(c=text.char32At(index.limit))) { 847 index.limit += UTF16.getCharCount(c); 848 } 849 } 850 851 // Check to see if the unfiltered run is empty. This only 852 // happens at the end of the string when all the remaining 853 // characters are filtered. 854 if (index.start == index.limit) { 855 break; 856 } 857 858 // Is this run incremental? If there is additional 859 // filtered text (if limit < globalLimit) then we pass in 860 // an incremental value of FALSE to force the subclass to 861 // complete the transliteration for this run. 862 boolean isIncrementalRun = 863 (index.limit < globalLimit ? false : incremental); 864 865 int delta; 866 867 // Implement rollback. To understand the need for rollback, 868 // consider the following transliterator: 869 // 870 // "t" is "a > A;" 871 // "u" is "A > b;" 872 // "v" is a compound of "t; NFD; u" with a filter [:Ll:] 873 // 874 // Now apply "v" to the input text "a". The result is "b". But if 875 // the transliteration is done incrementally, then the NFD holds 876 // things up after "t" has already transformed "a" to "A". When 877 // finishTransliterate() is called, "A" is _not_ processed because 878 // it gets excluded by the [:Ll:] filter, and the end result is "A" 879 // -- incorrect. The problem is that the filter is applied to a 880 // partially-transliterated result, when we only want it to apply to 881 // input text. Although this example describes a compound 882 // transliterator containing NFD and a specific filter, it can 883 // happen with any transliterator which does a partial 884 // transformation in incremental mode into characters outside its 885 // filter. 886 // 887 // To handle this, when in incremental mode we supply characters to 888 // handleTransliterate() in several passes. Each pass adds one more 889 // input character to the input text. That is, for input "ABCD", we 890 // first try "A", then "AB", then "ABC", and finally "ABCD". If at 891 // any point we block (upon return, start < limit) then we roll 892 // back. If at any point we complete the run (upon return start == 893 // limit) then we commit that run. 894 895 if (rollback && isIncrementalRun) { 896 897 if (DEBUG) { 898 log.setLength(0); 899 System.out.println("filteredTransliterate{"+getID()+"}i: IN=" + 900 UtilityExtensions.formatInput(text, index)); 901 } 902 903 int runStart = index.start; 904 int runLimit = index.limit; 905 int runLength = runLimit - runStart; 906 907 // Make a rollback copy at the end of the string 908 int rollbackOrigin = text.length(); 909 text.copy(runStart, runLimit, rollbackOrigin); 910 911 // Variables reflecting the commitment of completely 912 // transliterated text. passStart is the runStart, advanced 913 // past committed text. rollbackStart is the rollbackOrigin, 914 // advanced past rollback text that corresponds to committed 915 // text. 916 int passStart = runStart; 917 int rollbackStart = rollbackOrigin; 918 919 // The limit for each pass; we advance by one code point with 920 // each iteration. 921 int passLimit = index.start; 922 923 // Total length, in 16-bit code units, of uncommitted text. 924 // This is the length to be rolled back. 925 int uncommittedLength = 0; 926 927 // Total delta (change in length) for all passes 928 int totalDelta = 0; 929 930 // PASS MAIN LOOP -- Start with a single character, and extend 931 // the text by one character at a time. Roll back partial 932 // transliterations and commit complete transliterations. 933 for (;;) { 934 // Length of additional code point, either one or two 935 int charLength = 936 UTF16.getCharCount(text.char32At(passLimit)); 937 passLimit += charLength; 938 if (passLimit > runLimit) { 939 break; 940 } 941 uncommittedLength += charLength; 942 943 index.limit = passLimit; 944 945 if (DEBUG) { 946 log.setLength(0); 947 log.append("filteredTransliterate{"+getID()+"}i: "); 948 UtilityExtensions.formatInput(log, text, index); 949 } 950 951 // Delegate to subclass for actual transliteration. Upon 952 // return, start will be updated to point after the 953 // transliterated text, and limit and contextLimit will be 954 // adjusted for length changes. 955 handleTransliterate(text, index, true); 956 957 if (DEBUG) { 958 log.append(" => "); 959 UtilityExtensions.formatInput(log, text, index); 960 } 961 962 delta = index.limit - passLimit; // change in length 963 964 // We failed to completely transliterate this pass. 965 // Roll back the text. Indices remain unchanged; reset 966 // them where necessary. 967 if (index.start != index.limit) { 968 // Find the rollbackStart, adjusted for length changes 969 // and the deletion of partially transliterated text. 970 int rs = rollbackStart + delta - (index.limit - passStart); 971 972 // Delete the partially transliterated text 973 text.replace(passStart, index.limit, ""); 974 975 // Copy the rollback text back 976 text.copy(rs, rs + uncommittedLength, passStart); 977 978 // Restore indices to their original values 979 index.start = passStart; 980 index.limit = passLimit; 981 index.contextLimit -= delta; 982 983 if (DEBUG) { 984 log.append(" (ROLLBACK)"); 985 } 986 } 987 988 // We did completely transliterate this pass. Update the 989 // commit indices to record how far we got. Adjust indices 990 // for length change. 991 else { 992 // Move the pass indices past the committed text. 993 passStart = passLimit = index.start; 994 995 // Adjust the rollbackStart for length changes and move 996 // it past the committed text. All characters we've 997 // processed to this point are committed now, so zero 998 // out the uncommittedLength. 999 rollbackStart += delta + uncommittedLength; 1000 uncommittedLength = 0; 1001 1002 // Adjust indices for length changes. 1003 runLimit += delta; 1004 totalDelta += delta; 1005 } 1006 1007 if (DEBUG) { 1008 System.out.println(Utility.escape(log.toString())); 1009 } 1010 } 1011 1012 // Adjust overall limit and rollbackOrigin for insertions and 1013 // deletions. Don't need to worry about contextLimit because 1014 // handleTransliterate() maintains that. 1015 rollbackOrigin += totalDelta; 1016 globalLimit += totalDelta; 1017 1018 // Delete the rollback copy 1019 text.replace(rollbackOrigin, rollbackOrigin + runLength, ""); 1020 1021 // Move start past committed text 1022 index.start = passStart; 1023 } 1024 1025 else { 1026 // Delegate to subclass for actual transliteration. 1027 if (DEBUG) { 1028 log.setLength(0); 1029 log.append("filteredTransliterate{"+getID()+"}: "); 1030 UtilityExtensions.formatInput(log, text, index); 1031 } 1032 1033 int limit = index.limit; 1034 handleTransliterate(text, index, isIncrementalRun); 1035 delta = index.limit - limit; // change in length 1036 1037 if (DEBUG) { 1038 log.append(" => "); 1039 UtilityExtensions.formatInput(log, text, index); 1040 } 1041 1042 // In a properly written transliterator, start == limit after 1043 // handleTransliterate() returns when incremental is false. 1044 // Catch cases where the subclass doesn't do this, and throw 1045 // an exception. (Just pinning start to limit is a bad idea, 1046 // because what's probably happening is that the subclass 1047 // isn't transliterating all the way to the end, and it should 1048 // in non-incremental mode.) 1049 if (!isIncrementalRun && index.start != index.limit) { 1050 throw new RuntimeException("ERROR: Incomplete non-incremental transliteration by " + getID()); 1051 } 1052 1053 // Adjust overall limit for insertions/deletions. Don't need 1054 // to worry about contextLimit because handleTransliterate() 1055 // maintains that. 1056 globalLimit += delta; 1057 1058 if (DEBUG) { 1059 System.out.println(Utility.escape(log.toString())); 1060 } 1061 } 1062 1063 if (filter == null || isIncrementalRun) { 1064 break; 1065 } 1066 1067 // If we did completely transliterate this 1068 // run, then repeat with the next unfiltered run. 1069 } 1070 1071 // Start is valid where it is. Limit needs to be put back where 1072 // it was, modulo adjustments for deletions/insertions. 1073 index.limit = globalLimit; 1074 1075 if (DEBUG) { 1076 System.out.println("filteredTransliterate{"+getID()+"}: OUT=" + 1077 UtilityExtensions.formatInput(text, index)); 1078 } 1079 } 1080 1081 /** 1082 * Transliterate a substring of text, as specified by index, taking filters 1083 * into account. This method is for subclasses that need to delegate to 1084 * another transliterator, such as CompoundTransliterator. 1085 * @param text the text to be transliterated 1086 * @param index the position indices 1087 * @param incremental if TRUE, then assume more characters may be inserted 1088 * at index.limit, and postpone processing to accomodate future incoming 1089 * characters 1090 */ 1091 public void filteredTransliterate(Replaceable text, 1092 Position index, 1093 boolean incremental) { 1094 filteredTransliterate(text, index, incremental, false); 1095 } 1096 1097 /** 1098 * Returns the length of the longest context required by this transliterator. 1099 * This is <em>preceding</em> context. The default value is zero, but 1100 * subclasses can change this by calling <code>setMaximumContextLength()</code>. 1101 * For example, if a transliterator translates "ddd" (where 1102 * d is any digit) to "555" when preceded by "(ddd)", then the preceding 1103 * context length is 5, the length of "(ddd)". 1104 * 1105 * @return The maximum number of preceding context characters this 1106 * transliterator needs to examine 1107 */ 1108 public final int getMaximumContextLength() { 1109 return maximumContextLength; 1110 } 1111 1112 /** 1113 * Method for subclasses to use to set the maximum context length. 1114 * @see #getMaximumContextLength 1115 */ 1116 protected void setMaximumContextLength(int a) { 1117 if (a < 0) { 1118 throw new IllegalArgumentException("Invalid context length " + a); 1119 } 1120 maximumContextLength = a; 1121 } 1122 1123 /** 1124 * Returns a programmatic identifier for this transliterator. 1125 * If this identifier is passed to <code>getInstance()</code>, it 1126 * will return this object, if it has been registered. 1127 * @see #registerClass 1128 * @see #getAvailableIDs 1129 */ 1130 public final String getID() { 1131 return ID; 1132 } 1133 1134 /** 1135 * Set the programmatic identifier for this transliterator. Only 1136 * for use by subclasses. 1137 */ 1138 protected final void setID(String id) { 1139 ID = id; 1140 } 1141 1142 /** 1143 * Returns a name for this transliterator that is appropriate for 1144 * display to the user in the default <code>DISPLAY</code> locale. See {@link 1145 * #getDisplayName(String,Locale)} for details. 1146 * @see android.icu.util.ULocale.Category#DISPLAY 1147 */ 1148 public final static String getDisplayName(String ID) { 1149 return getDisplayName(ID, ULocale.getDefault(Category.DISPLAY)); 1150 } 1151 1152 /** 1153 * Returns a name for this transliterator that is appropriate for 1154 * display to the user in the given locale. This name is taken 1155 * from the locale resource data in the standard manner of the 1156 * <code>java.text</code> package. 1157 * 1158 * <p>If no localized names exist in the system resource bundles, 1159 * a name is synthesized using a localized 1160 * <code>MessageFormat</code> pattern from the resource data. The 1161 * arguments to this pattern are an integer followed by one or two 1162 * strings. The integer is the number of strings, either 1 or 2. 1163 * The strings are formed by splitting the ID for this 1164 * transliterator at the first '-'. If there is no '-', then the 1165 * entire ID forms the only string. 1166 * @param inLocale the Locale in which the display name should be 1167 * localized. 1168 * @see java.text.MessageFormat 1169 */ 1170 public static String getDisplayName(String id, Locale inLocale) { 1171 return getDisplayName(id, ULocale.forLocale(inLocale)); 1172 } 1173 1174 /** 1175 * Returns a name for this transliterator that is appropriate for 1176 * display to the user in the given locale. This name is taken 1177 * from the locale resource data in the standard manner of the 1178 * <code>java.text</code> package. 1179 * 1180 * <p>If no localized names exist in the system resource bundles, 1181 * a name is synthesized using a localized 1182 * <code>MessageFormat</code> pattern from the resource data. The 1183 * arguments to this pattern are an integer followed by one or two 1184 * strings. The integer is the number of strings, either 1 or 2. 1185 * The strings are formed by splitting the ID for this 1186 * transliterator at the first '-'. If there is no '-', then the 1187 * entire ID forms the only string. 1188 * @param inLocale the ULocale in which the display name should be 1189 * localized. 1190 * @see java.text.MessageFormat 1191 */ 1192 public static String getDisplayName(String id, ULocale inLocale) { 1193 1194 // Resource bundle containing display name keys and the 1195 // RB_RULE_BASED_IDS array. 1196 // 1197 //If we ever integrate this with the Sun JDK, the resource bundle 1198 // root will change to sun.text.resources.LocaleElements 1199 1200 ICUResourceBundle bundle = (ICUResourceBundle)UResourceBundle. 1201 getBundleInstance(ICUData.ICU_TRANSLIT_BASE_NAME, inLocale); 1202 1203 // Normalize the ID 1204 String stv[] = TransliteratorIDParser.IDtoSTV(id); 1205 if (stv == null) { 1206 // No target; malformed id 1207 return ""; 1208 } 1209 String ID = stv[0] + '-' + stv[1]; 1210 if (stv[2] != null && stv[2].length() > 0) { 1211 ID = ID + '/' + stv[2]; 1212 } 1213 1214 // Use the registered display name, if any 1215 String n = displayNameCache.get(new CaseInsensitiveString(ID)); 1216 if (n != null) { 1217 return n; 1218 } 1219 1220 // Use display name for the entire transliterator, if it 1221 // exists. 1222 try { 1223 return bundle.getString(RB_DISPLAY_NAME_PREFIX + ID); 1224 } catch (MissingResourceException e) {} 1225 1226 try { 1227 // Construct the formatter first; if getString() fails 1228 // we'll exit the try block 1229 MessageFormat format = new MessageFormat( 1230 bundle.getString(RB_DISPLAY_NAME_PATTERN)); 1231 // Construct the argument array 1232 Object[] args = new Object[] { Integer.valueOf(2), stv[0], stv[1] }; 1233 1234 // Use display names for the scripts, if they exist 1235 for (int j=1; j<=2; ++j) { 1236 try { 1237 args[j] = bundle.getString(RB_SCRIPT_DISPLAY_NAME_PREFIX + 1238 (String) args[j]); 1239 } catch (MissingResourceException e) {} 1240 } 1241 1242 // Format it using the pattern in the resource 1243 return (stv[2].length() > 0) ? 1244 (format.format(args) + '/' + stv[2]) : 1245 format.format(args); 1246 } catch (MissingResourceException e2) {} 1247 1248 // We should not reach this point unless there is something 1249 // wrong with the build or the RB_DISPLAY_NAME_PATTERN has 1250 // been deleted from the root RB_LOCALE_ELEMENTS resource. 1251 throw new RuntimeException(); 1252 } 1253 1254 /** 1255 * Returns the filter used by this transliterator, or <tt>null</tt> 1256 * if this transliterator uses no filter. 1257 */ 1258 public final UnicodeFilter getFilter() { 1259 return filter; 1260 } 1261 1262 /** 1263 * Changes the filter used by this transliterator. If the filter 1264 * is set to <tt>null</tt> then no filtering will occur. 1265 * 1266 * <p>Callers must take care if a transliterator is in use by 1267 * multiple threads. The filter should not be changed by one 1268 * thread while another thread may be transliterating. 1269 */ 1270 public void setFilter(UnicodeFilter filter) { 1271 if (filter == null) { 1272 this.filter = null; 1273 } else { 1274 try { 1275 // fast high-runner case 1276 this.filter = new UnicodeSet((UnicodeSet)filter).freeze(); 1277 } catch (Exception e) { 1278 this.filter = new UnicodeSet(); 1279 filter.addMatchSetTo(this.filter); 1280 this.filter.freeze(); 1281 } 1282 } 1283 } 1284 1285 /** 1286 * Returns a <code>Transliterator</code> object given its ID. 1287 * The ID must be either a system transliterator ID or a ID registered 1288 * using <code>registerClass()</code>. 1289 * 1290 * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> 1291 * @return A <code>Transliterator</code> object with the given ID 1292 * @exception IllegalArgumentException if the given ID is invalid. 1293 */ 1294 public static final Transliterator getInstance(String ID) { 1295 return getInstance(ID, FORWARD); 1296 } 1297 1298 /** 1299 * Returns a <code>Transliterator</code> object given its ID. 1300 * The ID must be either a system transliterator ID or a ID registered 1301 * using <code>registerClass()</code>. 1302 * 1303 * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> 1304 * @param dir either FORWARD or REVERSE. If REVERSE then the 1305 * inverse of the given ID is instantiated. 1306 * @return A <code>Transliterator</code> object with the given ID 1307 * @exception IllegalArgumentException if the given ID is invalid. 1308 * @see #registerClass 1309 * @see #getAvailableIDs 1310 * @see #getID 1311 */ 1312 public static Transliterator getInstance(String ID, 1313 int dir) { 1314 StringBuffer canonID = new StringBuffer(); 1315 List<SingleID> list = new ArrayList<SingleID>(); 1316 UnicodeSet[] globalFilter = new UnicodeSet[1]; 1317 if (!TransliteratorIDParser.parseCompoundID(ID, dir, canonID, list, globalFilter)) { 1318 throw new IllegalArgumentException("Invalid ID " + ID); 1319 } 1320 1321 List<Transliterator> translits = TransliteratorIDParser.instantiateList(list); 1322 1323 // assert(list.size() > 0); 1324 Transliterator t = null; 1325 if (list.size() > 1 || canonID.indexOf(";") >= 0) { 1326 // [NOTE: If it's a compoundID, we instantiate a CompoundTransliterator even if it only 1327 // has one child transliterator. This is so that toRules() will return the right thing 1328 // (without any inactive ID), but our main ID still comes out correct. That is, if we 1329 // instantiate "(Lower);Latin-Greek;", we want the rules to come out as "::Latin-Greek;" 1330 // even though the ID is "(Lower);Latin-Greek;". 1331 t = new CompoundTransliterator(translits); 1332 } 1333 else { 1334 t = translits.get(0); 1335 } 1336 1337 t.setID(canonID.toString()); 1338 if (globalFilter[0] != null) { 1339 t.setFilter(globalFilter[0]); 1340 } 1341 return t; 1342 } 1343 1344 /** 1345 * Create a transliterator from a basic ID. This is an ID 1346 * containing only the forward direction source, target, and 1347 * variant. 1348 * @param id a basic ID of the form S-T or S-T/V. 1349 * @param canonID canonical ID to apply to the result, or 1350 * null to leave the ID unchanged 1351 * @return a newly created Transliterator or null if the ID is 1352 * invalid. 1353 */ 1354 static Transliterator getBasicInstance(String id, String canonID) { 1355 StringBuffer s = new StringBuffer(); 1356 Transliterator t = registry.get(id, s); 1357 if (s.length() != 0) { 1358 // assert(t==0); 1359 // Instantiate an alias 1360 t = getInstance(s.toString(), FORWARD); 1361 } 1362 if (t != null && canonID != null) { 1363 t.setID(canonID); 1364 } 1365 return t; 1366 } 1367 1368 /** 1369 * Returns a <code>Transliterator</code> object constructed from 1370 * the given rule string. This will be a RuleBasedTransliterator, 1371 * if the rule string contains only rules, or a 1372 * CompoundTransliterator, if it contains ID blocks, or a 1373 * NullTransliterator, if it contains ID blocks which parse as 1374 * empty for the given direction. 1375 */ 1376 public static final Transliterator createFromRules(String ID, String rules, int dir) { 1377 Transliterator t = null; 1378 1379 TransliteratorParser parser = new TransliteratorParser(); 1380 parser.parse(rules, dir); 1381 1382 // NOTE: The logic here matches that in TransliteratorRegistry. 1383 if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) { 1384 t = new NullTransliterator(); 1385 } 1386 else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) { 1387 t = new RuleBasedTransliterator(ID, parser.dataVector.get(0), parser.compoundFilter); 1388 } 1389 else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) { 1390 // idBlock, no data -- this is an alias. The ID has 1391 // been munged from reverse into forward mode, if 1392 // necessary, so instantiate the ID in the forward 1393 // direction. 1394 if (parser.compoundFilter != null) { 1395 t = getInstance(parser.compoundFilter.toPattern(false) + ";" 1396 + parser.idBlockVector.get(0)); 1397 } else { 1398 t = getInstance(parser.idBlockVector.get(0)); 1399 } 1400 1401 if (t != null) { 1402 t.setID(ID); 1403 } 1404 } 1405 else { 1406 List<Transliterator> transliterators = new ArrayList<Transliterator>(); 1407 int passNumber = 1; 1408 1409 int limit = Math.max(parser.idBlockVector.size(), parser.dataVector.size()); 1410 for (int i = 0; i < limit; i++) { 1411 if (i < parser.idBlockVector.size()) { 1412 String idBlock = parser.idBlockVector.get(i); 1413 if (idBlock.length() > 0) { 1414 Transliterator temp = getInstance(idBlock); 1415 if (!(temp instanceof NullTransliterator)) 1416 transliterators.add(getInstance(idBlock)); 1417 } 1418 } 1419 if (i < parser.dataVector.size()) { 1420 Data data = parser.dataVector.get(i); 1421 transliterators.add(new RuleBasedTransliterator("%Pass" + passNumber++, data, null)); 1422 } 1423 } 1424 1425 t = new CompoundTransliterator(transliterators, passNumber - 1); 1426 t.setID(ID); 1427 if (parser.compoundFilter != null) { 1428 t.setFilter(parser.compoundFilter); 1429 } 1430 } 1431 1432 return t; 1433 } 1434 1435 /** 1436 * Returns a rule string for this transliterator. 1437 * @param escapeUnprintable if true, then unprintable characters 1438 * will be converted to escape form backslash-'u' or 1439 * backslash-'U'. 1440 */ 1441 public String toRules(boolean escapeUnprintable) { 1442 return baseToRules(escapeUnprintable); 1443 } 1444 1445 /** 1446 * Returns a rule string for this transliterator. This is 1447 * a non-overrideable base class implementation that subclasses 1448 * may call. It simply munges the ID into the correct format, 1449 * that is, "foo" => "::foo". 1450 * @param escapeUnprintable if true, then unprintable characters 1451 * will be converted to escape form backslash-'u' or 1452 * backslash-'U'. 1453 */ 1454 protected final String baseToRules(boolean escapeUnprintable) { 1455 // The base class implementation of toRules munges the ID into 1456 // the correct format. That is: foo => ::foo 1457 // KEEP in sync with rbt_pars 1458 if (escapeUnprintable) { 1459 StringBuffer rulesSource = new StringBuffer(); 1460 String id = getID(); 1461 for (int i=0; i<id.length();) { 1462 int c = UTF16.charAt(id, i); 1463 if (!Utility.escapeUnprintable(rulesSource, c)) { 1464 UTF16.append(rulesSource, c); 1465 } 1466 i += UTF16.getCharCount(c); 1467 } 1468 rulesSource.insert(0, "::"); 1469 rulesSource.append(ID_DELIM); 1470 return rulesSource.toString(); 1471 } 1472 return "::" + getID() + ID_DELIM; 1473 } 1474 1475 /** 1476 * Return the elements that make up this transliterator. For 1477 * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" 1478 * were created, the return value of this method would be an array 1479 * of the three transliterator objects that make up that 1480 * transliterator: [NFD, Jamo-Latin, Latin-Greek]. 1481 * 1482 * <p>If this transliterator is not composed of other 1483 * transliterators, then this method will return an array of 1484 * length one containing a reference to this transliterator. 1485 * @return an array of one or more transliterators that make up 1486 * this transliterator 1487 */ 1488 public Transliterator[] getElements() { 1489 Transliterator result[]; 1490 if (this instanceof CompoundTransliterator) { 1491 CompoundTransliterator cpd = (CompoundTransliterator) this; 1492 result = new Transliterator[cpd.getCount()]; 1493 for (int i=0; i<result.length; ++i) { 1494 result[i] = cpd.getTransliterator(i); 1495 } 1496 } else { 1497 result = new Transliterator[] { this }; 1498 } 1499 return result; 1500 } 1501 1502 /** 1503 * Returns the set of all characters that may be modified in the 1504 * input text by this Transliterator. This incorporates this 1505 * object's current filter; if the filter is changed, the return 1506 * value of this function will change. The default implementation 1507 * returns an empty set. Some subclasses may override {@link 1508 * #handleGetSourceSet} to return a more precise result. The 1509 * return result is approximate in any case and is intended for 1510 * use by tests, tools, or utilities. 1511 * @see #getTargetSet 1512 * @see #handleGetSourceSet 1513 */ 1514 public final UnicodeSet getSourceSet() { 1515 UnicodeSet result = new UnicodeSet(); 1516 addSourceTargetSet(getFilterAsUnicodeSet(UnicodeSet.ALL_CODE_POINTS), result, new UnicodeSet()); 1517 return result; 1518 } 1519 1520 /** 1521 * Framework method that returns the set of all characters that 1522 * may be modified in the input text by this Transliterator, 1523 * ignoring the effect of this object's filter. The base class 1524 * implementation returns the empty set. Subclasses that wish to 1525 * implement this should override this method. 1526 * @return the set of characters that this transliterator may 1527 * modify. The set may be modified, so subclasses should return a 1528 * newly-created object. 1529 * @see #getSourceSet 1530 * @see #getTargetSet 1531 */ 1532 protected UnicodeSet handleGetSourceSet() { 1533 return new UnicodeSet(); 1534 } 1535 1536 /** 1537 * Returns the set of all characters that may be generated as 1538 * replacement text by this transliterator. The default 1539 * implementation returns the empty set. Some subclasses may 1540 * override this method to return a more precise result. The 1541 * return result is approximate in any case and is intended for 1542 * use by tests, tools, or utilities requiring such 1543 * meta-information. 1544 * <p>Warning. You might expect an empty filter to always produce an empty target. 1545 * However, consider the following: 1546 * <pre> 1547 * [Pp]{}[\u03A3\u03C2\u03C3\u03F7\u03F8\u03FA\u03FB] > \'; 1548 * </pre> 1549 * With a filter of [], you still get some elements in the target set, because this rule will still match. It could 1550 * be recast to the following if it were important. 1551 * <pre> 1552 * [Pp]{([\u03A3\u03C2\u03C3\u03F7\u03F8\u03FA\u03FB])} > \' | $1; 1553 * </pre> 1554 * @see #getTargetSet 1555 */ 1556 public UnicodeSet getTargetSet() { 1557 UnicodeSet result = new UnicodeSet(); 1558 addSourceTargetSet(getFilterAsUnicodeSet(UnicodeSet.ALL_CODE_POINTS), new UnicodeSet(), result); 1559 return result; 1560 } 1561 1562 /** 1563 * Returns the set of all characters that may be generated as 1564 * replacement text by this transliterator, filtered by BOTH the input filter, and the current getFilter(). 1565 * <p>SHOULD BE OVERRIDEN BY SUBCLASSES. 1566 * It is probably an error for any transliterator to NOT override this, but we can't force them to 1567 * for backwards compatibility. 1568 * <p>Other methods vector through this. 1569 * <p>When gathering the information on source and target, the compound transliterator makes things complicated. 1570 * For example, suppose we have: 1571 * <pre> 1572 * Global FILTER = [ax] 1573 * a > b; 1574 * :: NULL; 1575 * b > c; 1576 * x > d; 1577 * </pre> 1578 * While the filter just allows a and x, b is an intermediate result, which could produce c. So the source and target sets 1579 * cannot be gathered independently. What we have to do is filter the sources for the first transliterator according to 1580 * the global filter, intersect that transliterator's filter. Based on that we get the target. 1581 * The next transliterator gets as a global filter (global + last target). And so on. 1582 * <p>There is another complication: 1583 * <pre> 1584 * Global FILTER = [ax] 1585 * a >|b; 1586 * b >c; 1587 * </pre> 1588 * Even though b would be filtered from the input, whenever we have a backup, it could be part of the input. So ideally we will 1589 * change the global filter as we go. 1590 * @param targetSet TODO 1591 * @see #getTargetSet 1592 * @deprecated This API is ICU internal only. 1593 * @hide draft / provisional / internal are hidden on Android 1594 */ 1595 @Deprecated 1596 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 1597 UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); 1598 UnicodeSet temp = new UnicodeSet(handleGetSourceSet()).retainAll(myFilter); 1599 // use old method, if we don't have anything better 1600 sourceSet.addAll(temp); 1601 // clumsy guess with target 1602 for (String s : temp) { 1603 String t = transliterate(s); 1604 if (!s.equals(t)) { 1605 targetSet.addAll(t); 1606 } 1607 } 1608 } 1609 1610 /** 1611 * Returns the intersectionof this instance's filter intersected with an external filter. 1612 * The externalFilter must be frozen (it is frozen if not). 1613 * The result may be frozen, so don't attempt to modify. 1614 * @deprecated This API is ICU internal only. 1615 * @hide draft / provisional / internal are hidden on Android 1616 */ 1617 @Deprecated 1618 // TODO change to getMergedFilter 1619 public UnicodeSet getFilterAsUnicodeSet(UnicodeSet externalFilter) { 1620 if (filter == null) { 1621 return externalFilter; 1622 } 1623 UnicodeSet filterSet = new UnicodeSet(externalFilter); 1624 // Most, but not all filters will be UnicodeSets. Optimize for 1625 // the high-runner case. 1626 UnicodeSet temp; 1627 try { 1628 temp = filter; 1629 } catch (ClassCastException e) { 1630 filter.addMatchSetTo(temp = new UnicodeSet()); 1631 } 1632 return filterSet.retainAll(temp).freeze(); 1633 } 1634 1635 /** 1636 * Returns this transliterator's inverse. See the class 1637 * documentation for details. This implementation simply inverts 1638 * the two entities in the ID and attempts to retrieve the 1639 * resulting transliterator. That is, if <code>getID()</code> 1640 * returns "A-B", then this method will return the result of 1641 * <code>getInstance("B-A")</code>, or <code>null</code> if that 1642 * call fails. 1643 * 1644 * <p>Subclasses with knowledge of their inverse may wish to 1645 * override this method. 1646 * 1647 * @return a transliterator that is an inverse, not necessarily 1648 * exact, of this transliterator, or <code>null</code> if no such 1649 * transliterator is registered. 1650 * @see #registerClass 1651 */ 1652 public final Transliterator getInverse() { 1653 return getInstance(ID, REVERSE); 1654 } 1655 1656 /** 1657 * Registers a subclass of <code>Transliterator</code> with the 1658 * system. This subclass must have a public constructor taking no 1659 * arguments. When that constructor is called, the resulting 1660 * object must return the <code>ID</code> passed to this method if 1661 * its <code>getID()</code> method is called. 1662 * 1663 * @param ID the result of <code>getID()</code> for this 1664 * transliterator 1665 * @param transClass a subclass of <code>Transliterator</code> 1666 * @see #unregister 1667 */ 1668 public static void registerClass(String ID, Class<? extends Transliterator> transClass, String displayName) { 1669 registry.put(ID, transClass, true); 1670 if (displayName != null) { 1671 displayNameCache.put(new CaseInsensitiveString(ID), displayName); 1672 } 1673 } 1674 1675 /** 1676 * Register a factory object with the given ID. The factory 1677 * method should return a new instance of the given transliterator. 1678 * 1679 * <p>Because ICU may choose to cache Transliterator objects internally, this must 1680 * be called at application startup, prior to any calls to 1681 * Transliterator.getInstance to avoid undefined behavior. 1682 * 1683 * @param ID the ID of this transliterator 1684 * @param factory the factory object 1685 */ 1686 public static void registerFactory(String ID, Factory factory) { 1687 registry.put(ID, factory, true); 1688 } 1689 1690 /** 1691 * Register a Transliterator object with the given ID. 1692 * 1693 * <p>Because ICU may choose to cache Transliterator objects internally, this must 1694 * be called at application startup, prior to any calls to 1695 * Transliterator.getInstance to avoid undefined behavior. 1696 * 1697 * @param trans the Transliterator object 1698 */ 1699 public static void registerInstance(Transliterator trans) { 1700 registry.put(trans.getID(), trans, true); 1701 } 1702 1703 /** 1704 * Register a Transliterator object. 1705 * 1706 * <p>Because ICU may choose to cache Transliterator objects internally, this must 1707 * be called at application startup, prior to any calls to 1708 * Transliterator.getInstance to avoid undefined behavior. 1709 * 1710 * @param trans the Transliterator object 1711 */ 1712 static void registerInstance(Transliterator trans, boolean visible) { 1713 registry.put(trans.getID(), trans, visible); 1714 } 1715 1716 /** 1717 * Register an ID as an alias of another ID. Instantiating 1718 * alias ID produces the same result as instantiating the original ID. 1719 * This is generally used to create short aliases of compound IDs. 1720 * 1721 * <p>Because ICU may choose to cache Transliterator objects internally, this must 1722 * be called at application startup, prior to any calls to 1723 * Transliterator.getInstance to avoid undefined behavior. 1724 * 1725 * @param aliasID The new ID being registered. 1726 * @param realID The existing ID that the new ID should be an alias of. 1727 */ 1728 public static void registerAlias(String aliasID, String realID) { 1729 registry.put(aliasID, realID, true); 1730 } 1731 1732 /** 1733 * Register two targets as being inverses of one another. For 1734 * example, calling registerSpecialInverse("NFC", "NFD", true) causes 1735 * Transliterator to form the following inverse relationships: 1736 * 1737 * <pre>NFC => NFD 1738 * Any-NFC => Any-NFD 1739 * NFD => NFC 1740 * Any-NFD => Any-NFC</pre> 1741 * 1742 * (Without the special inverse registration, the inverse of NFC 1743 * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but 1744 * that the presence or absence of "Any-" is preserved. 1745 * 1746 * <p>The relationship is symmetrical; registering (a, b) is 1747 * equivalent to registering (b, a). 1748 * 1749 * <p>The relevant IDs must still be registered separately as 1750 * factories or classes. 1751 * 1752 * <p>Only the targets are specified. Special inverses always 1753 * have the form Any-Target1 <=> Any-Target2. The target should 1754 * have canonical casing (the casing desired to be produced when 1755 * an inverse is formed) and should contain no whitespace or other 1756 * extraneous characters. 1757 * 1758 * @param target the target against which to register the inverse 1759 * @param inverseTarget the inverse of target, that is 1760 * Any-target.getInverse() => Any-inverseTarget 1761 * @param bidirectional if true, register the reverse relation 1762 * as well, that is, Any-inverseTarget.getInverse() => Any-target 1763 */ 1764 static void registerSpecialInverse(String target, 1765 String inverseTarget, 1766 boolean bidirectional) { 1767 TransliteratorIDParser.registerSpecialInverse(target, inverseTarget, bidirectional); 1768 } 1769 1770 /** 1771 * Unregisters a transliterator or class. This may be either 1772 * a system transliterator or a user transliterator or class. 1773 * 1774 * @param ID the ID of the transliterator or class 1775 * @see #registerClass 1776 */ 1777 public static void unregister(String ID) { 1778 displayNameCache.remove(new CaseInsensitiveString(ID)); 1779 registry.remove(ID); 1780 } 1781 1782 /** 1783 * Returns an enumeration over the programmatic names of registered 1784 * <code>Transliterator</code> objects. This includes both system 1785 * transliterators and user transliterators registered using 1786 * <code>registerClass()</code>. The enumerated names may be 1787 * passed to <code>getInstance()</code>. 1788 * 1789 * @return An <code>Enumeration</code> over <code>String</code> objects 1790 * @see #getInstance 1791 * @see #registerClass 1792 */ 1793 public static final Enumeration<String> getAvailableIDs() { 1794 return registry.getAvailableIDs(); 1795 } 1796 1797 /** 1798 * Returns an enumeration over the source names of registered 1799 * transliterators. Source names may be passed to 1800 * getAvailableTargets() to obtain available targets for each 1801 * source. 1802 */ 1803 public static final Enumeration<String> getAvailableSources() { 1804 return registry.getAvailableSources(); 1805 } 1806 1807 /** 1808 * Returns an enumeration over the target names of registered 1809 * transliterators having a given source name. Target names may 1810 * be passed to getAvailableVariants() to obtain available 1811 * variants for each source and target pair. 1812 */ 1813 public static final Enumeration<String> getAvailableTargets(String source) { 1814 return registry.getAvailableTargets(source); 1815 } 1816 1817 /** 1818 * Returns an enumeration over the variant names of registered 1819 * transliterators having a given source name and target name. 1820 */ 1821 public static final Enumeration<String> getAvailableVariants(String source, 1822 String target) { 1823 return registry.getAvailableVariants(source, target); 1824 } 1825 private static final String ROOT = "root", 1826 RB_RULE_BASED_IDS ="RuleBasedTransliteratorIDs"; 1827 static { 1828 registry = new TransliteratorRegistry(); 1829 1830 // The display name cache starts out empty 1831 displayNameCache = Collections.synchronizedMap(new HashMap<CaseInsensitiveString, String>()); 1832 /* The following code parses the index table located in 1833 * icu/data/translit/root.txt. The index is an n x 4 table 1834 * that follows this format: 1835 * <id>{ 1836 * file{ 1837 * resource{"<resource>"} 1838 * direction{"<direction>"} 1839 * } 1840 * } 1841 * <id>{ 1842 * internal{ 1843 * resource{"<resource>"} 1844 * direction{"<direction"} 1845 * } 1846 * } 1847 * <id>{ 1848 * alias{"<getInstanceArg"} 1849 * } 1850 * <id> is the ID of the system transliterator being defined. These 1851 * are public IDs enumerated by Transliterator.getAvailableIDs(), 1852 * unless the second field is "internal". 1853 * 1854 * <resource> is a ResourceReader resource name. Currently these refer 1855 * to file names under com/ibm/text/resources. This string is passed 1856 * directly to ResourceReader, together with <encoding>. 1857 * 1858 * <direction> is either "FORWARD" or "REVERSE". 1859 * 1860 * <getInstanceArg> is a string to be passed directly to 1861 * Transliterator.getInstance(). The returned Transliterator object 1862 * then has its ID changed to <id> and is returned. 1863 * 1864 * The extra blank field on "alias" lines is to make the array square. 1865 */ 1866 UResourceBundle bundle, transIDs, colBund; 1867 bundle = UResourceBundle.getBundleInstance(ICUData.ICU_TRANSLIT_BASE_NAME, ROOT); 1868 transIDs = bundle.get(RB_RULE_BASED_IDS); 1869 1870 int row, maxRows; 1871 maxRows = transIDs.getSize(); 1872 for (row = 0; row < maxRows; row++) { 1873 colBund = transIDs.get(row); 1874 String ID = colBund.getKey(); 1875 if (ID.indexOf("-t-") >= 0) { 1876 continue; 1877 } 1878 UResourceBundle res = colBund.get(0); 1879 String type = res.getKey(); 1880 if (type.equals("file") || type.equals("internal")) { 1881 // Rest of line is <resource>:<encoding>:<direction> 1882 // pos colon c2 1883 String resString = res.getString("resource"); 1884 int dir; 1885 String direction = res.getString("direction"); 1886 switch (direction.charAt(0)) { 1887 case 'F': 1888 dir = FORWARD; 1889 break; 1890 case 'R': 1891 dir = REVERSE; 1892 break; 1893 default: 1894 throw new RuntimeException("Can't parse direction: " + direction); 1895 } 1896 registry.put(ID, 1897 resString, // resource 1898 dir, 1899 !type.equals("internal")); 1900 } else if (type.equals("alias")) { 1901 //'alias'; row[2]=createInstance argument 1902 String resString = res.getString(); 1903 registry.put(ID, resString, true); 1904 } else { 1905 // Unknown type 1906 throw new RuntimeException("Unknow type: " + type); 1907 } 1908 } 1909 1910 registerSpecialInverse(NullTransliterator.SHORT_ID, NullTransliterator.SHORT_ID, false); 1911 1912 // Register non-rule-based transliterators 1913 registerClass(NullTransliterator._ID, 1914 NullTransliterator.class, null); 1915 RemoveTransliterator.register(); 1916 EscapeTransliterator.register(); 1917 UnescapeTransliterator.register(); 1918 LowercaseTransliterator.register(); 1919 UppercaseTransliterator.register(); 1920 TitlecaseTransliterator.register(); 1921 CaseFoldTransliterator.register(); 1922 UnicodeNameTransliterator.register(); 1923 NameUnicodeTransliterator.register(); 1924 NormalizationTransliterator.register(); 1925 BreakTransliterator.register(); 1926 AnyTransliterator.register(); // do this last! 1927 } 1928 1929 /** 1930 * Register the script-based "Any" transliterators: Any-Latin, Any-Greek 1931 * @deprecated This API is ICU internal only. 1932 * @hide draft / provisional / internal are hidden on Android 1933 */ 1934 @Deprecated 1935 public static void registerAny() { 1936 AnyTransliterator.register(); 1937 } 1938 1939 /** 1940 * The factory interface for transliterators. Transliterator 1941 * subclasses can register factory objects for IDs using the 1942 * registerFactory() method of Transliterator. When invoked, the 1943 * factory object will be passed the ID being instantiated. This 1944 * makes it possible to register one factory method to more than 1945 * one ID, or for a factory method to parameterize its result 1946 * based on the variant. 1947 */ 1948 public static interface Factory { 1949 /** 1950 * Return a transliterator for the given ID. 1951 */ 1952 Transliterator getInstance(String ID); 1953 } 1954 1955 /** 1956 * Implements StringTransform via this method. 1957 * @param source text to be transformed (eg lowercased) 1958 * @return result 1959 */ 1960 @Override 1961 public String transform(String source) { 1962 return transliterate(source); 1963 } 1964 } 1965