1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 1999-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Date Name Description 9 * 11/17/99 aliu Creation. 10 ********************************************************************** 11 */ 12 #ifndef TRANSLIT_H 13 #define TRANSLIT_H 14 15 #include "unicode/utypes.h" 16 17 /** 18 * \file 19 * \brief C++ API: Tranforms text from one format to another. 20 */ 21 22 #if !UCONFIG_NO_TRANSLITERATION 23 24 #include "unicode/uobject.h" 25 #include "unicode/unistr.h" 26 #include "unicode/parseerr.h" 27 #include "unicode/utrans.h" // UTransPosition, UTransDirection 28 #include "unicode/strenum.h" 29 30 U_NAMESPACE_BEGIN 31 32 class UnicodeFilter; 33 class UnicodeSet; 34 class CompoundTransliterator; 35 class TransliteratorParser; 36 class NormalizationTransliterator; 37 class TransliteratorIDParser; 38 39 /** 40 * 41 * <code>Transliterator</code> is an abstract class that 42 * transliterates text from one format to another. The most common 43 * kind of transliterator is a script, or alphabet, transliterator. 44 * For example, a Russian to Latin transliterator changes Russian text 45 * written in Cyrillic characters to phonetically equivalent Latin 46 * characters. It does not <em>translate</em> Russian to English! 47 * Transliteration, unlike translation, operates on characters, without 48 * reference to the meanings of words and sentences. 49 * 50 * <p>Although script conversion is its most common use, a 51 * transliterator can actually perform a more general class of tasks. 52 * In fact, <code>Transliterator</code> defines a very general API 53 * which specifies only that a segment of the input text is replaced 54 * by new text. The particulars of this conversion are determined 55 * entirely by subclasses of <code>Transliterator</code>. 56 * 57 * <p><b>Transliterators are stateless</b> 58 * 59 * <p><code>Transliterator</code> objects are <em>stateless</em>; they 60 * retain no information between calls to 61 * <code>transliterate()</code>. (However, this does <em>not</em> 62 * mean that threads may share transliterators without synchronizing 63 * them. Transliterators are not immutable, so they must be 64 * synchronized when shared between threads.) This might seem to 65 * limit the complexity of the transliteration operation. In 66 * practice, subclasses perform complex transliterations by delaying 67 * the replacement of text until it is known that no other 68 * replacements are possible. In other words, although the 69 * <code>Transliterator</code> objects are stateless, the source text 70 * itself embodies all the needed information, and delayed operation 71 * allows arbitrary complexity. 72 * 73 * <p><b>Batch transliteration</b> 74 * 75 * <p>The simplest way to perform transliteration is all at once, on a 76 * string of existing text. This is referred to as <em>batch</em> 77 * transliteration. For example, given a string <code>input</code> 78 * and a transliterator <code>t</code>, the call 79 * 80 * \htmlonly<blockquote>\endhtmlonly<code>String result = t.transliterate(input); 81 * </code>\htmlonly</blockquote>\endhtmlonly 82 * 83 * will transliterate it and return the result. Other methods allow 84 * the client to specify a substring to be transliterated and to use 85 * {@link Replaceable } objects instead of strings, in order to 86 * preserve out-of-band information (such as text styles). 87 * 88 * <p><b>Keyboard transliteration</b> 89 * 90 * <p>Somewhat more involved is <em>keyboard</em>, or incremental 91 * transliteration. This is the transliteration of text that is 92 * arriving from some source (typically the user's keyboard) one 93 * character at a time, or in some other piecemeal fashion. 94 * 95 * <p>In keyboard transliteration, a <code>Replaceable</code> buffer 96 * stores the text. As text is inserted, as much as possible is 97 * transliterated on the fly. This means a GUI that displays the 98 * contents of the buffer may show text being modified as each new 99 * character arrives. 100 * 101 * <p>Consider the simple <code>RuleBasedTransliterator</code>: 102 * 103 * \htmlonly<blockquote>\endhtmlonly<code> 104 * th>{theta}<br> 105 * t>{tau} 106 * </code>\htmlonly</blockquote>\endhtmlonly 107 * 108 * When the user types 't', nothing will happen, since the 109 * transliterator is waiting to see if the next character is 'h'. To 110 * remedy this, we introduce the notion of a cursor, marked by a '|' 111 * in the output string: 112 * 113 * \htmlonly<blockquote>\endhtmlonly<code> 114 * t>|{tau}<br> 115 * {tau}h>{theta} 116 * </code>\htmlonly</blockquote>\endhtmlonly 117 * 118 * Now when the user types 't', tau appears, and if the next character 119 * is 'h', the tau changes to a theta. This is accomplished by 120 * maintaining a cursor position (independent of the insertion point, 121 * and invisible in the GUI) across calls to 122 * <code>transliterate()</code>. Typically, the cursor will 123 * be coincident with the insertion point, but in a case like the one 124 * above, it will precede the insertion point. 125 * 126 * <p>Keyboard transliteration methods maintain a set of three indices 127 * that are updated with each call to 128 * <code>transliterate()</code>, including the cursor, start, 129 * and limit. Since these indices are changed by the method, they are 130 * passed in an <code>int[]</code> array. The <code>START</code> index 131 * marks the beginning of the substring that the transliterator will 132 * look at. It is advanced as text becomes committed (but it is not 133 * the committed index; that's the <code>CURSOR</code>). The 134 * <code>CURSOR</code> index, described above, marks the point at 135 * which the transliterator last stopped, either because it reached 136 * the end, or because it required more characters to disambiguate 137 * between possible inputs. The <code>CURSOR</code> can also be 138 * explicitly set by rules in a <code>RuleBasedTransliterator</code>. 139 * Any characters before the <code>CURSOR</code> index are frozen; 140 * future keyboard transliteration calls within this input sequence 141 * will not change them. New text is inserted at the 142 * <code>LIMIT</code> index, which marks the end of the substring that 143 * the transliterator looks at. 144 * 145 * <p>Because keyboard transliteration assumes that more characters 146 * are to arrive, it is conservative in its operation. It only 147 * transliterates when it can do so unambiguously. Otherwise it waits 148 * for more characters to arrive. When the client code knows that no 149 * more characters are forthcoming, perhaps because the user has 150 * performed some input termination operation, then it should call 151 * <code>finishTransliteration()</code> to complete any 152 * pending transliterations. 153 * 154 * <p><b>Inverses</b> 155 * 156 * <p>Pairs of transliterators may be inverses of one another. For 157 * example, if transliterator <b>A</b> transliterates characters by 158 * incrementing their Unicode value (so "abc" -> "def"), and 159 * transliterator <b>B</b> decrements character values, then <b>A</b> 160 * is an inverse of <b>B</b> and vice versa. If we compose <b>A</b> 161 * with <b>B</b> in a compound transliterator, the result is the 162 * indentity transliterator, that is, a transliterator that does not 163 * change its input text. 164 * 165 * The <code>Transliterator</code> method <code>getInverse()</code> 166 * returns a transliterator's inverse, if one exists, or 167 * <code>null</code> otherwise. However, the result of 168 * <code>getInverse()</code> usually will <em>not</em> be a true 169 * mathematical inverse. This is because true inverse transliterators 170 * are difficult to formulate. For example, consider two 171 * transliterators: <b>AB</b>, which transliterates the character 'A' 172 * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'. It might 173 * seem that these are exact inverses, since 174 * 175 * \htmlonly<blockquote>\endhtmlonly"A" x <b>AB</b> -> "B"<br> 176 * "B" x <b>BA</b> -> "A"\htmlonly</blockquote>\endhtmlonly 177 * 178 * where 'x' represents transliteration. However, 179 * 180 * \htmlonly<blockquote>\endhtmlonly"ABCD" x <b>AB</b> -> "BBCD"<br> 181 * "BBCD" x <b>BA</b> -> "AACD"\htmlonly</blockquote>\endhtmlonly 182 * 183 * so <b>AB</b> composed with <b>BA</b> is not the 184 * identity. Nonetheless, <b>BA</b> may be usefully considered to be 185 * <b>AB</b>'s inverse, and it is on this basis that 186 * <b>AB</b><code>.getInverse()</code> could legitimately return 187 * <b>BA</b>. 188 * 189 * <p><b>IDs and display names</b> 190 * 191 * <p>A transliterator is designated by a short identifier string or 192 * <em>ID</em>. IDs follow the format <em>source-destination</em>, 193 * where <em>source</em> describes the entity being replaced, and 194 * <em>destination</em> describes the entity replacing 195 * <em>source</em>. The entities may be the names of scripts, 196 * particular sequences of characters, or whatever else it is that the 197 * transliterator converts to or from. For example, a transliterator 198 * from Russian to Latin might be named "Russian-Latin". A 199 * transliterator from keyboard escape sequences to Latin-1 characters 200 * might be named "KeyboardEscape-Latin1". By convention, system 201 * entity names are in English, with the initial letters of words 202 * capitalized; user entity names may follow any format so long as 203 * they do not contain dashes. 204 * 205 * <p>In addition to programmatic IDs, transliterator objects have 206 * display names for presentation in user interfaces, returned by 207 * {@link #getDisplayName }. 208 * 209 * <p><b>Factory methods and registration</b> 210 * 211 * <p>In general, client code should use the factory method 212 * {@link #createInstance } to obtain an instance of a 213 * transliterator given its ID. Valid IDs may be enumerated using 214 * <code>getAvailableIDs()</code>. Since transliterators are mutable, 215 * multiple calls to {@link #createInstance } with the same ID will 216 * return distinct objects. 217 * 218 * <p>In addition to the system transliterators registered at startup, 219 * user transliterators may be registered by calling 220 * <code>registerInstance()</code> at run time. A registered instance 221 * acts a template; future calls to {@link #createInstance } with the ID 222 * of the registered object return clones of that object. Thus any 223 * object passed to <tt>registerInstance()</tt> must implement 224 * <tt>clone()</tt> propertly. To register a transliterator subclass 225 * without instantiating it (until it is needed), users may call 226 * {@link #registerFactory }. In this case, the objects are 227 * instantiated by invoking the zero-argument public constructor of 228 * the class. 229 * 230 * <p><b>Subclassing</b> 231 * 232 * Subclasses must implement the abstract method 233 * <code>handleTransliterate()</code>. <p>Subclasses should override 234 * the <code>transliterate()</code> method taking a 235 * <code>Replaceable</code> and the <code>transliterate()</code> 236 * method taking a <code>String</code> and <code>StringBuffer</code> 237 * if the performance of these methods can be improved over the 238 * performance obtained by the default implementations in this class. 239 * 240 * @author Alan Liu 241 * @stable ICU 2.0 242 */ 243 class U_I18N_API Transliterator : public UObject { 244 245 private: 246 247 /** 248 * Programmatic name, e.g., "Latin-Arabic". 249 */ 250 UnicodeString ID; 251 252 /** 253 * This transliterator's filter. Any character for which 254 * <tt>filter.contains()</tt> returns <tt>false</tt> will not be 255 * altered by this transliterator. If <tt>filter</tt> is 256 * <tt>null</tt> then no filtering is applied. 257 */ 258 UnicodeFilter* filter; 259 260 int32_t maximumContextLength; 261 262 public: 263 264 /** 265 * A context integer or pointer for a factory function, passed by 266 * value. 267 * @stable ICU 2.4 268 */ 269 union Token { 270 /** 271 * This token, interpreted as a 32-bit integer. 272 * @stable ICU 2.4 273 */ 274 int32_t integer; 275 /** 276 * This token, interpreted as a native pointer. 277 * @stable ICU 2.4 278 */ 279 void* pointer; 280 }; 281 282 #ifndef U_HIDE_INTERNAL_API 283 /** 284 * Return a token containing an integer. 285 * @return a token containing an integer. 286 * @internal 287 */ 288 inline static Token integerToken(int32_t); 289 290 /** 291 * Return a token containing a pointer. 292 * @return a token containing a pointer. 293 * @internal 294 */ 295 inline static Token pointerToken(void*); 296 #endif /* U_HIDE_INTERNAL_API */ 297 298 /** 299 * A function that creates and returns a Transliterator. When 300 * invoked, it will be passed the ID string that is being 301 * instantiated, together with the context pointer that was passed 302 * in when the factory function was first registered. Many 303 * factory functions will ignore both parameters, however, 304 * functions that are registered to more than one ID may use the 305 * ID or the context parameter to parameterize the transliterator 306 * they create. 307 * @param ID the string identifier for this transliterator 308 * @param context a context pointer that will be stored and 309 * later passed to the factory function when an ID matching 310 * the registration ID is being instantiated with this factory. 311 * @stable ICU 2.4 312 */ 313 typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context); 314 315 protected: 316 317 /** 318 * Default constructor. 319 * @param ID the string identifier for this transliterator 320 * @param adoptedFilter the filter. Any character for which 321 * <tt>filter.contains()</tt> returns <tt>false</tt> will not be 322 * altered by this transliterator. If <tt>filter</tt> is 323 * <tt>null</tt> then no filtering is applied. 324 * @stable ICU 2.4 325 */ 326 Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter); 327 328 /** 329 * Copy constructor. 330 * @stable ICU 2.4 331 */ 332 Transliterator(const Transliterator&); 333 334 /** 335 * Assignment operator. 336 * @stable ICU 2.4 337 */ 338 Transliterator& operator=(const Transliterator&); 339 340 /** 341 * Create a transliterator from a basic ID. This is an ID 342 * containing only the forward direction source, target, and 343 * variant. 344 * @param id a basic ID of the form S-T or S-T/V. 345 * @param canon canonical ID to assign to the object, or 346 * NULL to leave the ID unchanged 347 * @return a newly created Transliterator or null if the ID is 348 * invalid. 349 * @stable ICU 2.4 350 */ 351 static Transliterator* createBasicInstance(const UnicodeString& id, 352 const UnicodeString* canon); 353 354 friend class TransliteratorParser; // for parseID() 355 friend class TransliteratorIDParser; // for createBasicInstance() 356 friend class TransliteratorAlias; // for setID() 357 358 public: 359 360 /** 361 * Destructor. 362 * @stable ICU 2.0 363 */ 364 virtual ~Transliterator(); 365 366 /** 367 * Implements Cloneable. 368 * All subclasses are encouraged to implement this method if it is 369 * possible and reasonable to do so. Subclasses that are to be 370 * registered with the system using <tt>registerInstance()</tt> 371 * are required to implement this method. If a subclass does not 372 * implement clone() properly and is registered with the system 373 * using registerInstance(), then the default clone() implementation 374 * will return null, and calls to createInstance() will fail. 375 * 376 * @return a copy of the object. 377 * @see #registerInstance 378 * @stable ICU 2.0 379 */ 380 virtual Transliterator* clone() const; 381 382 /** 383 * Transliterates a segment of a string, with optional filtering. 384 * 385 * @param text the string to be transliterated 386 * @param start the beginning index, inclusive; <code>0 <= start 387 * <= limit</code>. 388 * @param limit the ending index, exclusive; <code>start <= limit 389 * <= text.length()</code>. 390 * @return The new limit index. The text previously occupying <code>[start, 391 * limit)</code> has been transliterated, possibly to a string of a different 392 * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where 393 * <em>new-limit</em> is the return value. If the input offsets are out of bounds, 394 * the returned value is -1 and the input string remains unchanged. 395 * @stable ICU 2.0 396 */ 397 virtual int32_t transliterate(Replaceable& text, 398 int32_t start, int32_t limit) const; 399 400 /** 401 * Transliterates an entire string in place. Convenience method. 402 * @param text the string to be transliterated 403 * @stable ICU 2.0 404 */ 405 virtual void transliterate(Replaceable& text) const; 406 407 /** 408 * Transliterates the portion of the text buffer that can be 409 * transliterated unambiguosly after new text has been inserted, 410 * typically as a result of a keyboard event. The new text in 411 * <code>insertion</code> will be inserted into <code>text</code> 412 * at <code>index.limit</code>, advancing 413 * <code>index.limit</code> by <code>insertion.length()</code>. 414 * Then the transliterator will try to transliterate characters of 415 * <code>text</code> between <code>index.cursor</code> and 416 * <code>index.limit</code>. Characters before 417 * <code>index.cursor</code> will not be changed. 418 * 419 * <p>Upon return, values in <code>index</code> will be updated. 420 * <code>index.start</code> will be advanced to the first 421 * character that future calls to this method will read. 422 * <code>index.cursor</code> and <code>index.limit</code> will 423 * be adjusted to delimit the range of text that future calls to 424 * this method may change. 425 * 426 * <p>Typical usage of this method begins with an initial call 427 * with <code>index.start</code> and <code>index.limit</code> 428 * set to indicate the portion of <code>text</code> to be 429 * transliterated, and <code>index.cursor == index.start</code>. 430 * Thereafter, <code>index</code> can be used without 431 * modification in future calls, provided that all changes to 432 * <code>text</code> are made via this method. 433 * 434 * <p>This method assumes that future calls may be made that will 435 * insert new text into the buffer. As a result, it only performs 436 * unambiguous transliterations. After the last call to this 437 * method, there may be untransliterated text that is waiting for 438 * more input to resolve an ambiguity. In order to perform these 439 * pending transliterations, clients should call {@link 440 * #finishTransliteration } after the last call to this 441 * method has been made. 442 * 443 * @param text the buffer holding transliterated and untransliterated text 444 * @param index an array of three integers. 445 * 446 * <ul><li><code>index.start</code>: the beginning index, 447 * inclusive; <code>0 <= index.start <= index.limit</code>. 448 * 449 * <li><code>index.limit</code>: the ending index, exclusive; 450 * <code>index.start <= index.limit <= text.length()</code>. 451 * <code>insertion</code> is inserted at 452 * <code>index.limit</code>. 453 * 454 * <li><code>index.cursor</code>: the next character to be 455 * considered for transliteration; <code>index.start <= 456 * index.cursor <= index.limit</code>. Characters before 457 * <code>index.cursor</code> will not be changed by future calls 458 * to this method.</ul> 459 * 460 * @param insertion text to be inserted and possibly 461 * transliterated into the translation buffer at 462 * <code>index.limit</code>. If <code>null</code> then no text 463 * is inserted. 464 * @param status Output param to filled in with a success or an error. 465 * @see #handleTransliterate 466 * @exception IllegalArgumentException if <code>index</code> 467 * is invalid 468 * @see UTransPosition 469 * @stable ICU 2.0 470 */ 471 virtual void transliterate(Replaceable& text, UTransPosition& index, 472 const UnicodeString& insertion, 473 UErrorCode& status) const; 474 475 /** 476 * Transliterates the portion of the text buffer that can be 477 * transliterated unambiguosly after a new character has been 478 * inserted, typically as a result of a keyboard event. This is a 479 * convenience method. 480 * @param text the buffer holding transliterated and 481 * untransliterated text 482 * @param index an array of three integers. 483 * @param insertion text to be inserted and possibly 484 * transliterated into the translation buffer at 485 * <code>index.limit</code>. 486 * @param status Output param to filled in with a success or an error. 487 * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const 488 * @stable ICU 2.0 489 */ 490 virtual void transliterate(Replaceable& text, UTransPosition& index, 491 UChar32 insertion, 492 UErrorCode& status) const; 493 494 /** 495 * Transliterates the portion of the text buffer that can be 496 * transliterated unambiguosly. This is a convenience method; see 497 * {@link 498 * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const } 499 * for details. 500 * @param text the buffer holding transliterated and 501 * untransliterated text 502 * @param index an array of three integers. See {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString*, UErrorCode&) const }. 503 * @param status Output param to filled in with a success or an error. 504 * @see #transliterate(Replaceable, int[], String) 505 * @stable ICU 2.0 506 */ 507 virtual void transliterate(Replaceable& text, UTransPosition& index, 508 UErrorCode& status) const; 509 510 /** 511 * Finishes any pending transliterations that were waiting for 512 * more characters. Clients should call this method as the last 513 * call after a sequence of one or more calls to 514 * <code>transliterate()</code>. 515 * @param text the buffer holding transliterated and 516 * untransliterated text. 517 * @param index the array of indices previously passed to {@link 518 * #transliterate } 519 * @stable ICU 2.0 520 */ 521 virtual void finishTransliteration(Replaceable& text, 522 UTransPosition& index) const; 523 524 private: 525 526 /** 527 * This internal method does incremental transliteration. If the 528 * 'insertion' is non-null then we append it to 'text' before 529 * proceeding. This method calls through to the pure virtual 530 * framework method handleTransliterate() to do the actual 531 * work. 532 * @param text the buffer holding transliterated and 533 * untransliterated text 534 * @param index an array of three integers. See {@link 535 * #transliterate(Replaceable, int[], String)}. 536 * @param insertion text to be inserted and possibly 537 * transliterated into the translation buffer at 538 * <code>index.limit</code>. 539 * @param status Output param to filled in with a success or an error. 540 */ 541 void _transliterate(Replaceable& text, 542 UTransPosition& index, 543 const UnicodeString* insertion, 544 UErrorCode &status) const; 545 546 protected: 547 548 /** 549 * Abstract method that concrete subclasses define to implement 550 * their transliteration algorithm. This method handles both 551 * incremental and non-incremental transliteration. Let 552 * <code>originalStart</code> refer to the value of 553 * <code>pos.start</code> upon entry. 554 * 555 * <ul> 556 * <li>If <code>incremental</code> is false, then this method 557 * should transliterate all characters between 558 * <code>pos.start</code> and <code>pos.limit</code>. Upon return 559 * <code>pos.start</code> must == <code> pos.limit</code>.</li> 560 * 561 * <li>If <code>incremental</code> is true, then this method 562 * should transliterate all characters between 563 * <code>pos.start</code> and <code>pos.limit</code> that can be 564 * unambiguously transliterated, regardless of future insertions 565 * of text at <code>pos.limit</code>. Upon return, 566 * <code>pos.start</code> should be in the range 567 * [<code>originalStart</code>, <code>pos.limit</code>). 568 * <code>pos.start</code> should be positioned such that 569 * characters [<code>originalStart</code>, <code> 570 * pos.start</code>) will not be changed in the future by this 571 * transliterator and characters [<code>pos.start</code>, 572 * <code>pos.limit</code>) are unchanged.</li> 573 * </ul> 574 * 575 * <p>Implementations of this method should also obey the 576 * following invariants:</p> 577 * 578 * <ul> 579 * <li> <code>pos.limit</code> and <code>pos.contextLimit</code> 580 * should be updated to reflect changes in length of the text 581 * between <code>pos.start</code> and <code>pos.limit</code>. The 582 * difference <code> pos.contextLimit - pos.limit</code> should 583 * not change.</li> 584 * 585 * <li><code>pos.contextStart</code> should not change.</li> 586 * 587 * <li>Upon return, neither <code>pos.start</code> nor 588 * <code>pos.limit</code> should be less than 589 * <code>originalStart</code>.</li> 590 * 591 * <li>Text before <code>originalStart</code> and text after 592 * <code>pos.limit</code> should not change.</li> 593 * 594 * <li>Text before <code>pos.contextStart</code> and text after 595 * <code> pos.contextLimit</code> should be ignored.</li> 596 * </ul> 597 * 598 * <p>Subclasses may safely assume that all characters in 599 * [<code>pos.start</code>, <code>pos.limit</code>) are filtered. 600 * In other words, the filter has already been applied by the time 601 * this method is called. See 602 * <code>filteredTransliterate()</code>. 603 * 604 * <p>This method is <b>not</b> for public consumption. Calling 605 * this method directly will transliterate 606 * [<code>pos.start</code>, <code>pos.limit</code>) without 607 * applying the filter. End user code should call <code> 608 * transliterate()</code> instead of this method. Subclass code 609 * and wrapping transliterators should call 610 * <code>filteredTransliterate()</code> instead of this method.<p> 611 * 612 * @param text the buffer holding transliterated and 613 * untransliterated text 614 * 615 * @param pos the indices indicating the start, limit, context 616 * start, and context limit of the text. 617 * 618 * @param incremental if true, assume more text may be inserted at 619 * <code>pos.limit</code> and act accordingly. Otherwise, 620 * transliterate all text between <code>pos.start</code> and 621 * <code>pos.limit</code> and move <code>pos.start</code> up to 622 * <code>pos.limit</code>. 623 * 624 * @see #transliterate 625 * @stable ICU 2.4 626 */ 627 virtual void handleTransliterate(Replaceable& text, 628 UTransPosition& pos, 629 UBool incremental) const = 0; 630 631 public: 632 /** 633 * Transliterate a substring of text, as specified by index, taking filters 634 * into account. This method is for subclasses that need to delegate to 635 * another transliterator, such as CompoundTransliterator. 636 * @param text the text to be transliterated 637 * @param index the position indices 638 * @param incremental if TRUE, then assume more characters may be inserted 639 * at index.limit, and postpone processing to accomodate future incoming 640 * characters 641 * @stable ICU 2.4 642 */ 643 virtual void filteredTransliterate(Replaceable& text, 644 UTransPosition& index, 645 UBool incremental) const; 646 647 private: 648 649 /** 650 * Top-level transliteration method, handling filtering, incremental and 651 * non-incremental transliteration, and rollback. All transliteration 652 * public API methods eventually call this method with a rollback argument 653 * of TRUE. Other entities may call this method but rollback should be 654 * FALSE. 655 * 656 * <p>If this transliterator has a filter, break up the input text into runs 657 * of unfiltered characters. Pass each run to 658 * subclass.handleTransliterate(). 659 * 660 * <p>In incremental mode, if rollback is TRUE, perform a special 661 * incremental procedure in which several passes are made over the input 662 * text, adding one character at a time, and committing successful 663 * transliterations as they occur. Unsuccessful transliterations are rolled 664 * back and retried with additional characters to give correct results. 665 * 666 * @param text the text to be transliterated 667 * @param index the position indices 668 * @param incremental if TRUE, then assume more characters may be inserted 669 * at index.limit, and postpone processing to accomodate future incoming 670 * characters 671 * @param rollback if TRUE and if incremental is TRUE, then perform special 672 * incremental processing, as described above, and undo partial 673 * transliterations where necessary. If incremental is FALSE then this 674 * parameter is ignored. 675 */ 676 virtual void filteredTransliterate(Replaceable& text, 677 UTransPosition& index, 678 UBool incremental, 679 UBool rollback) const; 680 681 public: 682 683 /** 684 * Returns the length of the longest context required by this transliterator. 685 * This is <em>preceding</em> context. The default implementation supplied 686 * by <code>Transliterator</code> returns zero; subclasses 687 * that use preceding context should override this method to return the 688 * correct value. For example, if a transliterator translates "ddd" (where 689 * d is any digit) to "555" when preceded by "(ddd)", then the preceding 690 * context length is 5, the length of "(ddd)". 691 * 692 * @return The maximum number of preceding context characters this 693 * transliterator needs to examine 694 * @stable ICU 2.0 695 */ 696 int32_t getMaximumContextLength(void) const; 697 698 protected: 699 700 /** 701 * Method for subclasses to use to set the maximum context length. 702 * @param maxContextLength the new value to be set. 703 * @see #getMaximumContextLength 704 * @stable ICU 2.4 705 */ 706 void setMaximumContextLength(int32_t maxContextLength); 707 708 public: 709 710 /** 711 * Returns a programmatic identifier for this transliterator. 712 * If this identifier is passed to <code>createInstance()</code>, it 713 * will return this object, if it has been registered. 714 * @return a programmatic identifier for this transliterator. 715 * @see #registerInstance 716 * @see #registerFactory 717 * @see #getAvailableIDs 718 * @stable ICU 2.0 719 */ 720 virtual const UnicodeString& getID(void) const; 721 722 /** 723 * Returns a name for this transliterator that is appropriate for 724 * display to the user in the default locale. See {@link 725 * #getDisplayName } for details. 726 * @param ID the string identifier for this transliterator 727 * @param result Output param to receive the display name 728 * @return A reference to 'result'. 729 * @stable ICU 2.0 730 */ 731 static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID, 732 UnicodeString& result); 733 734 /** 735 * Returns a name for this transliterator that is appropriate for 736 * display to the user in the given locale. This name is taken 737 * from the locale resource data in the standard manner of the 738 * <code>java.text</code> package. 739 * 740 * <p>If no localized names exist in the system resource bundles, 741 * a name is synthesized using a localized 742 * <code>MessageFormat</code> pattern from the resource data. The 743 * arguments to this pattern are an integer followed by one or two 744 * strings. The integer is the number of strings, either 1 or 2. 745 * The strings are formed by splitting the ID for this 746 * transliterator at the first '-'. If there is no '-', then the 747 * entire ID forms the only string. 748 * @param ID the string identifier for this transliterator 749 * @param inLocale the Locale in which the display name should be 750 * localized. 751 * @param result Output param to receive the display name 752 * @return A reference to 'result'. 753 * @stable ICU 2.0 754 */ 755 static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID, 756 const Locale& inLocale, 757 UnicodeString& result); 758 759 /** 760 * Returns the filter used by this transliterator, or <tt>NULL</tt> 761 * if this transliterator uses no filter. 762 * @return the filter used by this transliterator, or <tt>NULL</tt> 763 * if this transliterator uses no filter. 764 * @stable ICU 2.0 765 */ 766 const UnicodeFilter* getFilter(void) const; 767 768 /** 769 * Returns the filter used by this transliterator, or <tt>NULL</tt> if this 770 * transliterator uses no filter. The caller must eventually delete the 771 * result. After this call, this transliterator's filter is set to 772 * <tt>NULL</tt>. 773 * @return the filter used by this transliterator, or <tt>NULL</tt> if this 774 * transliterator uses no filter. 775 * @stable ICU 2.4 776 */ 777 UnicodeFilter* orphanFilter(void); 778 779 /** 780 * Changes the filter used by this transliterator. If the filter 781 * is set to <tt>null</tt> then no filtering will occur. 782 * 783 * <p>Callers must take care if a transliterator is in use by 784 * multiple threads. The filter should not be changed by one 785 * thread while another thread may be transliterating. 786 * @param adoptedFilter the new filter to be adopted. 787 * @stable ICU 2.0 788 */ 789 void adoptFilter(UnicodeFilter* adoptedFilter); 790 791 /** 792 * Returns this transliterator's inverse. See the class 793 * documentation for details. This implementation simply inverts 794 * the two entities in the ID and attempts to retrieve the 795 * resulting transliterator. That is, if <code>getID()</code> 796 * returns "A-B", then this method will return the result of 797 * <code>createInstance("B-A")</code>, or <code>null</code> if that 798 * call fails. 799 * 800 * <p>Subclasses with knowledge of their inverse may wish to 801 * override this method. 802 * 803 * @param status Output param to filled in with a success or an error. 804 * @return a transliterator that is an inverse, not necessarily 805 * exact, of this transliterator, or <code>null</code> if no such 806 * transliterator is registered. 807 * @see #registerInstance 808 * @stable ICU 2.0 809 */ 810 Transliterator* createInverse(UErrorCode& status) const; 811 812 /** 813 * Returns a <code>Transliterator</code> object given its ID. 814 * The ID must be either a system transliterator ID or a ID registered 815 * using <code>registerInstance()</code>. 816 * 817 * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> 818 * @param dir either FORWARD or REVERSE. 819 * @param parseError Struct to recieve information on position 820 * of error if an error is encountered 821 * @param status Output param to filled in with a success or an error. 822 * @return A <code>Transliterator</code> object with the given ID 823 * @see #registerInstance 824 * @see #getAvailableIDs 825 * @see #getID 826 * @stable ICU 2.0 827 */ 828 static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID, 829 UTransDirection dir, 830 UParseError& parseError, 831 UErrorCode& status); 832 833 /** 834 * Returns a <code>Transliterator</code> object given its ID. 835 * The ID must be either a system transliterator ID or a ID registered 836 * using <code>registerInstance()</code>. 837 * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> 838 * @param dir either FORWARD or REVERSE. 839 * @param status Output param to filled in with a success or an error. 840 * @return A <code>Transliterator</code> object with the given ID 841 * @stable ICU 2.0 842 */ 843 static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID, 844 UTransDirection dir, 845 UErrorCode& status); 846 847 /** 848 * Returns a <code>Transliterator</code> object constructed from 849 * the given rule string. This will be a RuleBasedTransliterator, 850 * if the rule string contains only rules, or a 851 * CompoundTransliterator, if it contains ID blocks, or a 852 * NullTransliterator, if it contains ID blocks which parse as 853 * empty for the given direction. 854 * @param ID the id for the transliterator. 855 * @param rules rules, separated by ';' 856 * @param dir either FORWARD or REVERSE. 857 * @param parseError Struct to recieve information on position 858 * of error if an error is encountered 859 * @param status Output param set to success/failure code. 860 * @stable ICU 2.0 861 */ 862 static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID, 863 const UnicodeString& rules, 864 UTransDirection dir, 865 UParseError& parseError, 866 UErrorCode& status); 867 868 /** 869 * Create a rule string that can be passed to createFromRules() 870 * to recreate this transliterator. 871 * @param result the string to receive the rules. Previous 872 * contents will be deleted. 873 * @param escapeUnprintable if TRUE then convert unprintable 874 * character to their hex escape representations, \\uxxxx or 875 * \\Uxxxxxxxx. Unprintable characters are those other than 876 * U+000A, U+0020..U+007E. 877 * @stable ICU 2.0 878 */ 879 virtual UnicodeString& toRules(UnicodeString& result, 880 UBool escapeUnprintable) const; 881 882 /** 883 * Return the number of elements that make up this transliterator. 884 * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" 885 * were created, the return value of this method would be 3. 886 * 887 * <p>If this transliterator is not composed of other 888 * transliterators, then this method returns 1. 889 * @return the number of transliterators that compose this 890 * transliterator, or 1 if this transliterator is not composed of 891 * multiple transliterators 892 * @stable ICU 3.0 893 */ 894 int32_t countElements() const; 895 896 /** 897 * Return an element that makes up this transliterator. For 898 * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" 899 * were created, the return value of this method would be one 900 * of the three transliterator objects that make up that 901 * transliterator: [NFD, Jamo-Latin, Latin-Greek]. 902 * 903 * <p>If this transliterator is not composed of other 904 * transliterators, then this method will return a reference to 905 * this transliterator when given the index 0. 906 * @param index a value from 0..countElements()-1 indicating the 907 * transliterator to return 908 * @param ec input-output error code 909 * @return one of the transliterators that makes up this 910 * transliterator, if this transliterator is made up of multiple 911 * transliterators, otherwise a reference to this object if given 912 * an index of 0 913 * @stable ICU 3.0 914 */ 915 const Transliterator& getElement(int32_t index, UErrorCode& ec) const; 916 917 /** 918 * Returns the set of all characters that may be modified in the 919 * input text by this Transliterator. This incorporates this 920 * object's current filter; if the filter is changed, the return 921 * value of this function will change. The default implementation 922 * returns an empty set. Some subclasses may override {@link 923 * #handleGetSourceSet } to return a more precise result. The 924 * return result is approximate in any case and is intended for 925 * use by tests, tools, or utilities. 926 * @param result receives result set; previous contents lost 927 * @return a reference to result 928 * @see #getTargetSet 929 * @see #handleGetSourceSet 930 * @stable ICU 2.4 931 */ 932 UnicodeSet& getSourceSet(UnicodeSet& result) const; 933 934 /** 935 * Framework method that returns the set of all characters that 936 * may be modified in the input text by this Transliterator, 937 * ignoring the effect of this object's filter. The base class 938 * implementation returns the empty set. Subclasses that wish to 939 * implement this should override this method. 940 * @return the set of characters that this transliterator may 941 * modify. The set may be modified, so subclasses should return a 942 * newly-created object. 943 * @param result receives result set; previous contents lost 944 * @see #getSourceSet 945 * @see #getTargetSet 946 * @stable ICU 2.4 947 */ 948 virtual void handleGetSourceSet(UnicodeSet& result) const; 949 950 /** 951 * Returns the set of all characters that may be generated as 952 * replacement text by this transliterator. The default 953 * implementation returns the empty set. Some subclasses may 954 * override this method to return a more precise result. The 955 * return result is approximate in any case and is intended for 956 * use by tests, tools, or utilities requiring such 957 * meta-information. 958 * @param result receives result set; previous contents lost 959 * @return a reference to result 960 * @see #getTargetSet 961 * @stable ICU 2.4 962 */ 963 virtual UnicodeSet& getTargetSet(UnicodeSet& result) const; 964 965 public: 966 967 /** 968 * Registers a factory function that creates transliterators of 969 * a given ID. 970 * 971 * Because ICU may choose to cache Transliterators internally, this must 972 * be called at application startup, prior to any calls to 973 * Transliterator::createXXX to avoid undefined behavior. 974 * 975 * @param id the ID being registered 976 * @param factory a function pointer that will be copied and 977 * called later when the given ID is passed to createInstance() 978 * @param context a context pointer that will be stored and 979 * later passed to the factory function when an ID matching 980 * the registration ID is being instantiated with this factory. 981 * @stable ICU 2.0 982 */ 983 static void U_EXPORT2 registerFactory(const UnicodeString& id, 984 Factory factory, 985 Token context); 986 987 /** 988 * Registers an instance <tt>obj</tt> of a subclass of 989 * <code>Transliterator</code> with the system. When 990 * <tt>createInstance()</tt> is called with an ID string that is 991 * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is 992 * returned. 993 * 994 * After this call the Transliterator class owns the adoptedObj 995 * and will delete it. 996 * 997 * Because ICU may choose to cache Transliterators internally, this must 998 * be called at application startup, prior to any calls to 999 * Transliterator::createXXX to avoid undefined behavior. 1000 * 1001 * @param adoptedObj an instance of subclass of 1002 * <code>Transliterator</code> that defines <tt>clone()</tt> 1003 * @see #createInstance 1004 * @see #registerFactory 1005 * @see #unregister 1006 * @stable ICU 2.0 1007 */ 1008 static void U_EXPORT2 registerInstance(Transliterator* adoptedObj); 1009 1010 /** 1011 * Registers an ID string as an alias of another ID string. 1012 * That is, after calling this function, <tt>createInstance(aliasID)</tt> 1013 * will return the same thing as <tt>createInstance(realID)</tt>. 1014 * This is generally used to create shorter, more mnemonic aliases 1015 * for long compound IDs. 1016 * 1017 * @param aliasID The new ID being registered. 1018 * @param realID The ID that the new ID is to be an alias for. 1019 * This can be a compound ID and can include filters and should 1020 * refer to transliterators that have already been registered with 1021 * the framework, although this isn't checked. 1022 * @stable ICU 3.6 1023 */ 1024 static void U_EXPORT2 registerAlias(const UnicodeString& aliasID, 1025 const UnicodeString& realID); 1026 1027 protected: 1028 1029 #ifndef U_HIDE_INTERNAL_API 1030 /** 1031 * @param id the ID being registered 1032 * @param factory a function pointer that will be copied and 1033 * called later when the given ID is passed to createInstance() 1034 * @param context a context pointer that will be stored and 1035 * later passed to the factory function when an ID matching 1036 * the registration ID is being instantiated with this factory. 1037 * @internal 1038 */ 1039 static void _registerFactory(const UnicodeString& id, 1040 Factory factory, 1041 Token context); 1042 1043 /** 1044 * @internal 1045 */ 1046 static void _registerInstance(Transliterator* adoptedObj); 1047 1048 /** 1049 * @internal 1050 */ 1051 static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID); 1052 1053 /** 1054 * Register two targets as being inverses of one another. For 1055 * example, calling registerSpecialInverse("NFC", "NFD", true) causes 1056 * Transliterator to form the following inverse relationships: 1057 * 1058 * <pre>NFC => NFD 1059 * Any-NFC => Any-NFD 1060 * NFD => NFC 1061 * Any-NFD => Any-NFC</pre> 1062 * 1063 * (Without the special inverse registration, the inverse of NFC 1064 * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but 1065 * that the presence or absence of "Any-" is preserved. 1066 * 1067 * <p>The relationship is symmetrical; registering (a, b) is 1068 * equivalent to registering (b, a). 1069 * 1070 * <p>The relevant IDs must still be registered separately as 1071 * factories or classes. 1072 * 1073 * <p>Only the targets are specified. Special inverses always 1074 * have the form Any-Target1 <=> Any-Target2. The target should 1075 * have canonical casing (the casing desired to be produced when 1076 * an inverse is formed) and should contain no whitespace or other 1077 * extraneous characters. 1078 * 1079 * @param target the target against which to register the inverse 1080 * @param inverseTarget the inverse of target, that is 1081 * Any-target.getInverse() => Any-inverseTarget 1082 * @param bidirectional if true, register the reverse relation 1083 * as well, that is, Any-inverseTarget.getInverse() => Any-target 1084 * @internal 1085 */ 1086 static void _registerSpecialInverse(const UnicodeString& target, 1087 const UnicodeString& inverseTarget, 1088 UBool bidirectional); 1089 #endif /* U_HIDE_INTERNAL_API */ 1090 1091 public: 1092 1093 /** 1094 * Unregisters a transliterator or class. This may be either 1095 * a system transliterator or a user transliterator or class. 1096 * Any attempt to construct an unregistered transliterator based 1097 * on its ID will fail. 1098 * 1099 * Because ICU may choose to cache Transliterators internally, this should 1100 * be called during application shutdown, after all calls to 1101 * Transliterator::createXXX to avoid undefined behavior. 1102 * 1103 * @param ID the ID of the transliterator or class 1104 * @return the <code>Object</code> that was registered with 1105 * <code>ID</code>, or <code>null</code> if none was 1106 * @see #registerInstance 1107 * @see #registerFactory 1108 * @stable ICU 2.0 1109 */ 1110 static void U_EXPORT2 unregister(const UnicodeString& ID); 1111 1112 public: 1113 1114 /** 1115 * Return a StringEnumeration over the IDs available at the time of the 1116 * call, including user-registered IDs. 1117 * @param ec input-output error code 1118 * @return a newly-created StringEnumeration over the transliterators 1119 * available at the time of the call. The caller should delete this object 1120 * when done using it. 1121 * @stable ICU 3.0 1122 */ 1123 static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec); 1124 1125 /** 1126 * Return the number of registered source specifiers. 1127 * @return the number of registered source specifiers. 1128 * @stable ICU 2.0 1129 */ 1130 static int32_t U_EXPORT2 countAvailableSources(void); 1131 1132 /** 1133 * Return a registered source specifier. 1134 * @param index which specifier to return, from 0 to n-1, where 1135 * n = countAvailableSources() 1136 * @param result fill-in paramter to receive the source specifier. 1137 * If index is out of range, result will be empty. 1138 * @return reference to result 1139 * @stable ICU 2.0 1140 */ 1141 static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index, 1142 UnicodeString& result); 1143 1144 /** 1145 * Return the number of registered target specifiers for a given 1146 * source specifier. 1147 * @param source the given source specifier. 1148 * @return the number of registered target specifiers for a given 1149 * source specifier. 1150 * @stable ICU 2.0 1151 */ 1152 static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source); 1153 1154 /** 1155 * Return a registered target specifier for a given source. 1156 * @param index which specifier to return, from 0 to n-1, where 1157 * n = countAvailableTargets(source) 1158 * @param source the source specifier 1159 * @param result fill-in paramter to receive the target specifier. 1160 * If source is invalid or if index is out of range, result will 1161 * be empty. 1162 * @return reference to result 1163 * @stable ICU 2.0 1164 */ 1165 static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index, 1166 const UnicodeString& source, 1167 UnicodeString& result); 1168 1169 /** 1170 * Return the number of registered variant specifiers for a given 1171 * source-target pair. 1172 * @param source the source specifiers. 1173 * @param target the target specifiers. 1174 * @stable ICU 2.0 1175 */ 1176 static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source, 1177 const UnicodeString& target); 1178 1179 /** 1180 * Return a registered variant specifier for a given source-target 1181 * pair. 1182 * @param index which specifier to return, from 0 to n-1, where 1183 * n = countAvailableVariants(source, target) 1184 * @param source the source specifier 1185 * @param target the target specifier 1186 * @param result fill-in paramter to receive the variant 1187 * specifier. If source is invalid or if target is invalid or if 1188 * index is out of range, result will be empty. 1189 * @return reference to result 1190 * @stable ICU 2.0 1191 */ 1192 static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index, 1193 const UnicodeString& source, 1194 const UnicodeString& target, 1195 UnicodeString& result); 1196 1197 protected: 1198 1199 #ifndef U_HIDE_INTERNAL_API 1200 /** 1201 * Non-mutexed internal method 1202 * @internal 1203 */ 1204 static int32_t _countAvailableSources(void); 1205 1206 /** 1207 * Non-mutexed internal method 1208 * @internal 1209 */ 1210 static UnicodeString& _getAvailableSource(int32_t index, 1211 UnicodeString& result); 1212 1213 /** 1214 * Non-mutexed internal method 1215 * @internal 1216 */ 1217 static int32_t _countAvailableTargets(const UnicodeString& source); 1218 1219 /** 1220 * Non-mutexed internal method 1221 * @internal 1222 */ 1223 static UnicodeString& _getAvailableTarget(int32_t index, 1224 const UnicodeString& source, 1225 UnicodeString& result); 1226 1227 /** 1228 * Non-mutexed internal method 1229 * @internal 1230 */ 1231 static int32_t _countAvailableVariants(const UnicodeString& source, 1232 const UnicodeString& target); 1233 1234 /** 1235 * Non-mutexed internal method 1236 * @internal 1237 */ 1238 static UnicodeString& _getAvailableVariant(int32_t index, 1239 const UnicodeString& source, 1240 const UnicodeString& target, 1241 UnicodeString& result); 1242 #endif /* U_HIDE_INTERNAL_API */ 1243 1244 protected: 1245 1246 /** 1247 * Set the ID of this transliterators. Subclasses shouldn't do 1248 * this, unless the underlying script behavior has changed. 1249 * @param id the new id t to be set. 1250 * @stable ICU 2.4 1251 */ 1252 void setID(const UnicodeString& id); 1253 1254 public: 1255 1256 /** 1257 * Return the class ID for this class. This is useful only for 1258 * comparing to a return value from getDynamicClassID(). 1259 * Note that Transliterator is an abstract base class, and therefor 1260 * no fully constructed object will have a dynamic 1261 * UCLassID that equals the UClassID returned from 1262 * TRansliterator::getStaticClassID(). 1263 * @return The class ID for class Transliterator. 1264 * @stable ICU 2.0 1265 */ 1266 static UClassID U_EXPORT2 getStaticClassID(void); 1267 1268 /** 1269 * Returns a unique class ID <b>polymorphically</b>. This method 1270 * is to implement a simple version of RTTI, since not all C++ 1271 * compilers support genuine RTTI. Polymorphic operator==() and 1272 * clone() methods call this method. 1273 * 1274 * <p>Concrete subclasses of Transliterator must use the 1275 * UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from 1276 * uobject.h to provide the RTTI functions. 1277 * 1278 * @return The class ID for this object. All objects of a given 1279 * class have the same class ID. Objects of other classes have 1280 * different class IDs. 1281 * @stable ICU 2.0 1282 */ 1283 virtual UClassID getDynamicClassID(void) const = 0; 1284 1285 private: 1286 static UBool initializeRegistry(UErrorCode &status); 1287 1288 public: 1289 #ifndef U_HIDE_OBSOLETE_API 1290 /** 1291 * Return the number of IDs currently registered with the system. 1292 * To retrieve the actual IDs, call getAvailableID(i) with 1293 * i from 0 to countAvailableIDs() - 1. 1294 * @return the number of IDs currently registered with the system. 1295 * @obsolete ICU 3.4 use getAvailableIDs() instead 1296 */ 1297 static int32_t U_EXPORT2 countAvailableIDs(void); 1298 1299 /** 1300 * Return the index-th available ID. index must be between 0 1301 * and countAvailableIDs() - 1, inclusive. If index is out of 1302 * range, the result of getAvailableID(0) is returned. 1303 * @param index the given ID index. 1304 * @return the index-th available ID. index must be between 0 1305 * and countAvailableIDs() - 1, inclusive. If index is out of 1306 * range, the result of getAvailableID(0) is returned. 1307 * @obsolete ICU 3.4 use getAvailableIDs() instead; this function 1308 * is not thread safe, since it returns a reference to storage that 1309 * may become invalid if another thread calls unregister 1310 */ 1311 static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index); 1312 #endif /* U_HIDE_OBSOLETE_API */ 1313 }; 1314 1315 inline int32_t Transliterator::getMaximumContextLength(void) const { 1316 return maximumContextLength; 1317 } 1318 1319 inline void Transliterator::setID(const UnicodeString& id) { 1320 ID = id; 1321 // NUL-terminate the ID string, which is a non-aliased copy. 1322 ID.append((char16_t)0); 1323 ID.truncate(ID.length()-1); 1324 } 1325 1326 #ifndef U_HIDE_INTERNAL_API 1327 inline Transliterator::Token Transliterator::integerToken(int32_t i) { 1328 Token t; 1329 t.integer = i; 1330 return t; 1331 } 1332 1333 inline Transliterator::Token Transliterator::pointerToken(void* p) { 1334 Token t; 1335 t.pointer = p; 1336 return t; 1337 } 1338 #endif /* U_HIDE_INTERNAL_API */ 1339 1340 U_NAMESPACE_END 1341 1342 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1343 1344 #endif 1345