1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2000-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package android.icu.text; 11 import java.nio.CharBuffer; 12 import java.text.CharacterIterator; 13 14 import android.icu.impl.Norm2AllModes; 15 import android.icu.impl.Normalizer2Impl; 16 import android.icu.impl.UCaseProps; 17 import android.icu.lang.UCharacter; 18 import android.icu.util.ICUCloneNotSupportedException; 19 20 /** 21 * Old Unicode normalization API. 22 * 23 * <p>This API has been replaced by the {@link Normalizer2} class and is only available 24 * for backward compatibility. This class simply delegates to the Normalizer2 class. 25 * There are two exceptions: The new API does not provide a replacement for 26 * <code>QuickCheckResult</code> and <code>compare()</code>. 27 * 28 * <p><code>normalize</code> transforms Unicode text into an equivalent composed or 29 * decomposed form, allowing for easier sorting and searching of text. 30 * <code>normalize</code> supports the standard normalization forms described in 31 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode"> 32 * Unicode Standard Annex #15 — Unicode Normalization Forms</a>. 33 * 34 * <p>Characters with accents or other adornments can be encoded in 35 * several different ways in Unicode. For example, take the character A-acute. 36 * In Unicode, this can be encoded as a single character (the 37 * "composed" form): 38 * 39 * <pre> 40 * 00C1 LATIN CAPITAL LETTER A WITH ACUTE 41 * </pre> 42 * 43 * or as two separate characters (the "decomposed" form): 44 * 45 * <pre> 46 * 0041 LATIN CAPITAL LETTER A 47 * 0301 COMBINING ACUTE ACCENT 48 * </pre> 49 * 50 * <p>To a user of your program, however, both of these sequences should be 51 * treated as the same "user-level" character "A with acute accent". When you 52 * are searching or comparing text, you must ensure that these two sequences are 53 * treated equivalently. In addition, you must handle characters with more than 54 * one accent. Sometimes the order of a character's combining accents is 55 * significant, while in other cases accent sequences in different orders are 56 * really equivalent. 57 * 58 * <p>Similarly, the string "ffi" can be encoded as three separate letters: 59 * 60 * <pre> 61 * 0066 LATIN SMALL LETTER F 62 * 0066 LATIN SMALL LETTER F 63 * 0069 LATIN SMALL LETTER I 64 * </pre> 65 * 66 * or as the single character 67 * 68 * <pre> 69 * FB03 LATIN SMALL LIGATURE FFI 70 * </pre> 71 * 72 * <p>The ffi ligature is not a distinct semantic character, and strictly speaking 73 * it shouldn't be in Unicode at all, but it was included for compatibility 74 * with existing character sets that already provided it. The Unicode standard 75 * identifies such characters by giving them "compatibility" decompositions 76 * into the corresponding semantic characters. When sorting and searching, you 77 * will often want to use these mappings. 78 * 79 * <p><code>normalize</code> helps solve these problems by transforming text into 80 * the canonical composed and decomposed forms as shown in the first example 81 * above. In addition, you can have it perform compatibility decompositions so 82 * that you can treat compatibility characters the same as their equivalents. 83 * Finally, <code>normalize</code> rearranges accents into the proper canonical 84 * order, so that you do not have to worry about accent rearrangement on your 85 * own. 86 * 87 * <p>Form FCD, "Fast C or D", is also designed for collation. 88 * It allows to work on strings that are not necessarily normalized 89 * with an algorithm (like in collation) that works under "canonical closure", 90 * i.e., it treats precomposed characters and their decomposed equivalents the 91 * same. 92 * 93 * <p>It is not a normalization form because it does not provide for uniqueness of 94 * representation. Multiple strings may be canonically equivalent (their NFDs 95 * are identical) and may all conform to FCD without being identical themselves. 96 * 97 * <p>The form is defined such that the "raw decomposition", the recursive 98 * canonical decomposition of each character, results in a string that is 99 * canonically ordered. This means that precomposed characters are allowed for 100 * as long as their decompositions do not need canonical reordering. 101 * 102 * <p>Its advantage for a process like collation is that all NFD and most NFC texts 103 * - and many unnormalized texts - already conform to FCD and do not need to be 104 * normalized (NFD) for such a process. The FCD quick check will return YES for 105 * most strings in practice. 106 * 107 * <p>normalize(FCD) may be implemented with NFD. 108 * 109 * <p>For more details on FCD see Unicode Technical Note #5 (Canonical Equivalence in Applications): 110 * http://www.unicode.org/notes/tn5/#FCD 111 * 112 * <p>ICU collation performs either NFD or FCD normalization automatically if 113 * normalization is turned on for the collator object. Beyond collation and 114 * string search, normalized strings may be useful for string equivalence 115 * comparisons, transliteration/transcription, unique representations, etc. 116 * 117 * <p>The W3C generally recommends to exchange texts in NFC. 118 * Note also that most legacy character encodings use only precomposed forms and 119 * often do not encode any combining marks by themselves. For conversion to such 120 * character encodings the Unicode text needs to be normalized to NFC. 121 * For more usage examples, see the Unicode Standard Annex. 122 * 123 * <p>Note: The Normalizer class also provides API for iterative normalization. 124 * While the setIndex() and getIndex() refer to indices in the 125 * underlying Unicode input text, the next() and previous() methods 126 * iterate through characters in the normalized output. 127 * This means that there is not necessarily a one-to-one correspondence 128 * between characters returned by next() and previous() and the indices 129 * passed to and returned from setIndex() and getIndex(). 130 * It is for this reason that Normalizer does not implement the CharacterIterator interface. 131 */ 132 public final class Normalizer implements Cloneable { 133 // The input text and our position in it 134 private UCharacterIterator text; 135 private Normalizer2 norm2; 136 private Mode mode; 137 private int options; 138 139 // The normalization buffer is the result of normalization 140 // of the source in [currentIndex..nextIndex[ . 141 private int currentIndex; 142 private int nextIndex; 143 144 // A buffer for holding intermediate results 145 private StringBuilder buffer; 146 private int bufferPos; 147 148 // Helper classes to defer loading of normalization data. 149 private static final class ModeImpl { 150 private ModeImpl(Normalizer2 n2) { 151 normalizer2 = n2; 152 } 153 private final Normalizer2 normalizer2; 154 } 155 private static final class NFDModeImpl { 156 private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFDInstance()); 157 } 158 private static final class NFKDModeImpl { 159 private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFKDInstance()); 160 } 161 private static final class NFCModeImpl { 162 private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFCInstance()); 163 } 164 private static final class NFKCModeImpl { 165 private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFKCInstance()); 166 } 167 private static final class FCDModeImpl { 168 private static final ModeImpl INSTANCE = new ModeImpl(Norm2AllModes.getFCDNormalizer2()); 169 } 170 171 private static final class Unicode32 { 172 private static final UnicodeSet INSTANCE = new UnicodeSet("[:age=3.2:]").freeze(); 173 } 174 private static final class NFD32ModeImpl { 175 private static final ModeImpl INSTANCE = 176 new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFDInstance(), 177 Unicode32.INSTANCE)); 178 } 179 private static final class NFKD32ModeImpl { 180 private static final ModeImpl INSTANCE = 181 new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFKDInstance(), 182 Unicode32.INSTANCE)); 183 } 184 private static final class NFC32ModeImpl { 185 private static final ModeImpl INSTANCE = 186 new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFCInstance(), 187 Unicode32.INSTANCE)); 188 } 189 private static final class NFKC32ModeImpl { 190 private static final ModeImpl INSTANCE = 191 new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFKCInstance(), 192 Unicode32.INSTANCE)); 193 } 194 private static final class FCD32ModeImpl { 195 private static final ModeImpl INSTANCE = 196 new ModeImpl(new FilteredNormalizer2(Norm2AllModes.getFCDNormalizer2(), 197 Unicode32.INSTANCE)); 198 } 199 200 /** 201 * Options bit set value to select Unicode 3.2 normalization 202 * (except NormalizationCorrections). 203 * At most one Unicode version can be selected at a time. 204 * 205 * @deprecated ICU 56 Use {@link FilteredNormalizer2} instead. 206 * @hide original deprecated declaration 207 */ 208 @Deprecated 209 public static final int UNICODE_3_2=0x20; 210 211 /** 212 * Constant indicating that the end of the iteration has been reached. 213 * This is guaranteed to have the same value as {@link UCharacterIterator#DONE}. 214 * 215 * @deprecated ICU 56 216 * @hide original deprecated declaration 217 */ 218 @Deprecated 219 public static final int DONE = UCharacterIterator.DONE; 220 221 /** 222 * Constants for normalization modes. 223 * <p> 224 * The Mode class is not intended for public subclassing. 225 * Only the Mode constants provided by the Normalizer class should be used, 226 * and any fields or methods should not be called or overridden by users. 227 * 228 * @deprecated ICU 56 Use {@link Normalizer2} instead. 229 * @hide original deprecated declaration 230 */ 231 @Deprecated 232 public static abstract class Mode { 233 /** 234 * Sole constructor 235 * @deprecated This API is ICU internal only. 236 * @hide original deprecated declaration 237 * @hide draft / provisional / internal are hidden on Android 238 */ 239 @Deprecated 240 protected Mode() { 241 } 242 243 /** 244 * @deprecated This API is ICU internal only. 245 * @hide original deprecated declaration 246 * @hide draft / provisional / internal are hidden on Android 247 */ 248 @Deprecated 249 protected abstract Normalizer2 getNormalizer2(int options); 250 } 251 252 private static final class NONEMode extends Mode { 253 @Override 254 protected Normalizer2 getNormalizer2(int options) { return Norm2AllModes.NOOP_NORMALIZER2; } 255 } 256 private static final class NFDMode extends Mode { 257 @Override 258 protected Normalizer2 getNormalizer2(int options) { 259 return (options&UNICODE_3_2) != 0 ? 260 NFD32ModeImpl.INSTANCE.normalizer2 : NFDModeImpl.INSTANCE.normalizer2; 261 } 262 } 263 private static final class NFKDMode extends Mode { 264 @Override 265 protected Normalizer2 getNormalizer2(int options) { 266 return (options&UNICODE_3_2) != 0 ? 267 NFKD32ModeImpl.INSTANCE.normalizer2 : NFKDModeImpl.INSTANCE.normalizer2; 268 } 269 } 270 private static final class NFCMode extends Mode { 271 @Override 272 protected Normalizer2 getNormalizer2(int options) { 273 return (options&UNICODE_3_2) != 0 ? 274 NFC32ModeImpl.INSTANCE.normalizer2 : NFCModeImpl.INSTANCE.normalizer2; 275 } 276 } 277 private static final class NFKCMode extends Mode { 278 @Override 279 protected Normalizer2 getNormalizer2(int options) { 280 return (options&UNICODE_3_2) != 0 ? 281 NFKC32ModeImpl.INSTANCE.normalizer2 : NFKCModeImpl.INSTANCE.normalizer2; 282 } 283 } 284 private static final class FCDMode extends Mode { 285 @Override 286 protected Normalizer2 getNormalizer2(int options) { 287 return (options&UNICODE_3_2) != 0 ? 288 FCD32ModeImpl.INSTANCE.normalizer2 : FCDModeImpl.INSTANCE.normalizer2; 289 } 290 } 291 292 /** 293 * No decomposition/composition. 294 * 295 * @deprecated ICU 56 Use {@link Normalizer2} instead. 296 * @hide original deprecated declaration 297 */ 298 @Deprecated 299 public static final Mode NONE = new NONEMode(); 300 301 /** 302 * Canonical decomposition. 303 * 304 * @deprecated ICU 56 Use {@link Normalizer2} instead. 305 * @hide original deprecated declaration 306 */ 307 @Deprecated 308 public static final Mode NFD = new NFDMode(); 309 310 /** 311 * Compatibility decomposition. 312 * 313 * @deprecated ICU 56 Use {@link Normalizer2} instead. 314 * @hide original deprecated declaration 315 */ 316 @Deprecated 317 public static final Mode NFKD = new NFKDMode(); 318 319 /** 320 * Canonical decomposition followed by canonical composition. 321 * 322 * @deprecated ICU 56 Use {@link Normalizer2} instead. 323 * @hide original deprecated declaration 324 */ 325 @Deprecated 326 public static final Mode NFC = new NFCMode(); 327 328 /** 329 * Default normalization. 330 * 331 * @deprecated ICU 56 Use {@link Normalizer2} instead. 332 * @hide original deprecated declaration 333 */ 334 @Deprecated 335 public static final Mode DEFAULT = NFC; 336 337 /** 338 * Compatibility decomposition followed by canonical composition. 339 * 340 * @deprecated ICU 56 Use {@link Normalizer2} instead. 341 * @hide original deprecated declaration 342 */ 343 @Deprecated 344 public static final Mode NFKC =new NFKCMode(); 345 346 /** 347 * "Fast C or D" form. 348 * 349 * @deprecated ICU 56 Use {@link Normalizer2} instead. 350 * @hide original deprecated declaration 351 */ 352 @Deprecated 353 public static final Mode FCD = new FCDMode(); 354 355 /** 356 * Null operation for use with the {@link android.icu.text.Normalizer constructors} 357 * and the static {@link #normalize normalize} method. This value tells 358 * the <tt>Normalizer</tt> to do nothing but return unprocessed characters 359 * from the underlying String or CharacterIterator. If you have code which 360 * requires raw text at some times and normalized text at others, you can 361 * use <tt>NO_OP</tt> for the cases where you want raw text, rather 362 * than having a separate code path that bypasses <tt>Normalizer</tt> 363 * altogether. 364 * <p> 365 * @see #setMode 366 * @deprecated ICU 2.8. Use Nomalizer.NONE 367 * @see #NONE 368 * @hide original deprecated declaration 369 */ 370 @Deprecated 371 public static final Mode NO_OP = NONE; 372 373 /** 374 * Canonical decomposition followed by canonical composition. Used with the 375 * {@link android.icu.text.Normalizer constructors} and the static 376 * {@link #normalize normalize} method to determine the operation to be 377 * performed. 378 * <p> 379 * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned 380 * off, this operation produces output that is in 381 * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical 382 * Form</a> 383 * <b>C</b>. 384 * <p> 385 * @see #setMode 386 * @deprecated ICU 2.8. Use Normalier.NFC 387 * @see #NFC 388 * @hide original deprecated declaration 389 */ 390 @Deprecated 391 public static final Mode COMPOSE = NFC; 392 393 /** 394 * Compatibility decomposition followed by canonical composition. 395 * Used with the {@link android.icu.text.Normalizer constructors} and the static 396 * {@link #normalize normalize} method to determine the operation to be 397 * performed. 398 * <p> 399 * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned 400 * off, this operation produces output that is in 401 * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical 402 * Form</a> 403 * <b>KC</b>. 404 * <p> 405 * @see #setMode 406 * @deprecated ICU 2.8. Use Normalizer.NFKC 407 * @see #NFKC 408 * @hide original deprecated declaration 409 */ 410 @Deprecated 411 public static final Mode COMPOSE_COMPAT = NFKC; 412 413 /** 414 * Canonical decomposition. This value is passed to the 415 * {@link android.icu.text.Normalizer constructors} and the static 416 * {@link #normalize normalize} 417 * method to determine the operation to be performed. 418 * <p> 419 * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned 420 * off, this operation produces output that is in 421 * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical 422 * Form</a> 423 * <b>D</b>. 424 * <p> 425 * @see #setMode 426 * @deprecated ICU 2.8. Use Normalizer.NFD 427 * @see #NFD 428 * @hide original deprecated declaration 429 */ 430 @Deprecated 431 public static final Mode DECOMP = NFD; 432 433 /** 434 * Compatibility decomposition. This value is passed to the 435 * {@link android.icu.text.Normalizer constructors} and the static 436 * {@link #normalize normalize} 437 * method to determine the operation to be performed. 438 * <p> 439 * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned 440 * off, this operation produces output that is in 441 * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical 442 * Form</a> 443 * <b>KD</b>. 444 * <p> 445 * @see #setMode 446 * @deprecated ICU 2.8. Use Normalizer.NFKD 447 * @see #NFKD 448 * @hide original deprecated declaration 449 */ 450 @Deprecated 451 public static final Mode DECOMP_COMPAT = NFKD; 452 453 /** 454 * Option to disable Hangul/Jamo composition and decomposition. 455 * This option applies to Korean text, 456 * which can be represented either in the Jamo alphabet or in Hangul 457 * characters, which are really just two or three Jamo combined 458 * into one visual glyph. Since Jamo takes up more storage space than 459 * Hangul, applications that process only Hangul text may wish to turn 460 * this option on when decomposing text. 461 * <p> 462 * The Unicode standard treates Hangul to Jamo conversion as a 463 * canonical decomposition, so this option must be turned <b>off</b> if you 464 * wish to transform strings into one of the standard 465 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode"> 466 * Unicode Normalization Forms</a>. 467 * <p> 468 * @see #setOption 469 * @deprecated ICU 2.8. This option is no longer supported. 470 * @hide original deprecated declaration 471 */ 472 @Deprecated 473 public static final int IGNORE_HANGUL = 0x0001; 474 475 /** 476 * Result values for quickCheck(). 477 * For details see Unicode Technical Report 15. 478 */ 479 public static final class QuickCheckResult{ 480 //private int resultValue; 481 private QuickCheckResult(int value) { 482 //resultValue=value; 483 } 484 } 485 /** 486 * Indicates that string is not in the normalized format 487 */ 488 public static final QuickCheckResult NO = new QuickCheckResult(0); 489 490 /** 491 * Indicates that string is in the normalized format 492 */ 493 public static final QuickCheckResult YES = new QuickCheckResult(1); 494 495 /** 496 * Indicates it cannot be determined if string is in the normalized 497 * format without further thorough checks. 498 */ 499 public static final QuickCheckResult MAYBE = new QuickCheckResult(2); 500 501 /** 502 * Option bit for compare: 503 * Case sensitively compare the strings 504 */ 505 public static final int FOLD_CASE_DEFAULT = UCharacter.FOLD_CASE_DEFAULT; 506 507 /** 508 * Option bit for compare: 509 * Both input strings are assumed to fulfill FCD conditions. 510 */ 511 public static final int INPUT_IS_FCD = 0x20000; 512 513 /** 514 * Option bit for compare: 515 * Perform case-insensitive comparison. 516 */ 517 public static final int COMPARE_IGNORE_CASE = 0x10000; 518 519 /** 520 * Option bit for compare: 521 * Compare strings in code point order instead of code unit order. 522 */ 523 public static final int COMPARE_CODE_POINT_ORDER = 0x8000; 524 525 /** 526 * Option value for case folding: 527 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 528 * and dotless i appropriately for Turkic languages (tr, az). 529 * @see UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I 530 */ 531 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I; 532 533 /** 534 * Lowest-order bit number of compare() options bits corresponding to 535 * normalization options bits. 536 * 537 * The options parameter for compare() uses most bits for 538 * itself and for various comparison and folding flags. 539 * The most significant bits, however, are shifted down and passed on 540 * to the normalization implementation. 541 * (That is, from compare(..., options, ...), 542 * options>>COMPARE_NORM_OPTIONS_SHIFT will be passed on to the 543 * internal normalization functions.) 544 * 545 * @see #compare 546 * @deprecated ICU 56 Use {@link Normalizer2} instead. 547 * @hide original deprecated declaration 548 */ 549 @Deprecated 550 public static final int COMPARE_NORM_OPTIONS_SHIFT = 20; 551 552 //------------------------------------------------------------------------- 553 // Iterator constructors 554 //------------------------------------------------------------------------- 555 556 /** 557 * Creates a new <tt>Normalizer</tt> object for iterating over the 558 * normalized form of a given string. 559 * <p> 560 * The <tt>options</tt> parameter specifies which optional 561 * <tt>Normalizer</tt> features are to be enabled for this object. 562 * <p> 563 * @param str The string to be normalized. The normalization 564 * will start at the beginning of the string. 565 * 566 * @param mode The normalization mode. 567 * 568 * @param opt Any optional features to be enabled. 569 * Currently the only available option is {@link #UNICODE_3_2}. 570 * If you want the default behavior corresponding to one of the 571 * standard Unicode Normalization Forms, use 0 for this argument. 572 * @deprecated ICU 56 Use {@link Normalizer2} instead. 573 * @hide original deprecated declaration 574 */ 575 @Deprecated 576 public Normalizer(String str, Mode mode, int opt) { 577 this.text = UCharacterIterator.getInstance(str); 578 this.mode = mode; 579 this.options=opt; 580 norm2 = mode.getNormalizer2(opt); 581 buffer = new StringBuilder(); 582 } 583 584 /** 585 * Creates a new <tt>Normalizer</tt> object for iterating over the 586 * normalized form of the given text. 587 * <p> 588 * @param iter The input text to be normalized. The normalization 589 * will start at the beginning of the string. 590 * 591 * @param mode The normalization mode. 592 * 593 * @param opt Any optional features to be enabled. 594 * Currently the only available option is {@link #UNICODE_3_2}. 595 * If you want the default behavior corresponding to one of the 596 * standard Unicode Normalization Forms, use 0 for this argument. 597 * @deprecated ICU 56 Use {@link Normalizer2} instead. 598 * @hide original deprecated declaration 599 */ 600 @Deprecated 601 public Normalizer(CharacterIterator iter, Mode mode, int opt) { 602 this.text = UCharacterIterator.getInstance((CharacterIterator)iter.clone()); 603 this.mode = mode; 604 this.options = opt; 605 norm2 = mode.getNormalizer2(opt); 606 buffer = new StringBuilder(); 607 } 608 609 /** 610 * Creates a new <tt>Normalizer</tt> object for iterating over the 611 * normalized form of the given text. 612 * <p> 613 * @param iter The input text to be normalized. The normalization 614 * will start at the beginning of the string. 615 * 616 * @param mode The normalization mode. 617 * @param options The normalization options, ORed together (0 for no options). 618 * @deprecated ICU 56 Use {@link Normalizer2} instead. 619 * @hide original deprecated declaration 620 */ 621 @Deprecated 622 public Normalizer(UCharacterIterator iter, Mode mode, int options) { 623 try { 624 this.text = (UCharacterIterator)iter.clone(); 625 this.mode = mode; 626 this.options = options; 627 norm2 = mode.getNormalizer2(options); 628 buffer = new StringBuilder(); 629 } catch (CloneNotSupportedException e) { 630 throw new ICUCloneNotSupportedException(e); 631 } 632 } 633 634 /** 635 * Clones this <tt>Normalizer</tt> object. All properties of this 636 * object are duplicated in the new object, including the cloning of any 637 * {@link CharacterIterator} that was passed in to the constructor 638 * or to {@link #setText(CharacterIterator) setText}. 639 * However, the text storage underlying 640 * the <tt>CharacterIterator</tt> is not duplicated unless the 641 * iterator's <tt>clone</tt> method does so. 642 * 643 * @deprecated ICU 56 Use {@link Normalizer2} instead. 644 * @hide original deprecated declaration 645 */ 646 @Deprecated 647 @Override 648 public Object clone() { 649 try { 650 Normalizer copy = (Normalizer) super.clone(); 651 copy.text = (UCharacterIterator) text.clone(); 652 copy.mode = mode; 653 copy.options = options; 654 copy.norm2 = norm2; 655 copy.buffer = new StringBuilder(buffer); 656 copy.bufferPos = bufferPos; 657 copy.currentIndex = currentIndex; 658 copy.nextIndex = nextIndex; 659 return copy; 660 } 661 catch (CloneNotSupportedException e) { 662 throw new ICUCloneNotSupportedException(e); 663 } 664 } 665 666 //-------------------------------------------------------------------------- 667 // Static Utility methods 668 //-------------------------------------------------------------------------- 669 670 private static final Normalizer2 getComposeNormalizer2(boolean compat, int options) { 671 return (compat ? NFKC : NFC).getNormalizer2(options); 672 } 673 private static final Normalizer2 getDecomposeNormalizer2(boolean compat, int options) { 674 return (compat ? NFKD : NFD).getNormalizer2(options); 675 } 676 677 /** 678 * Compose a string. 679 * The string will be composed to according to the specified mode. 680 * @param str The string to compose. 681 * @param compat If true the string will be composed according to 682 * NFKC rules and if false will be composed according to 683 * NFC rules. 684 * @return String The composed string 685 * @deprecated ICU 56 Use {@link Normalizer2} instead. 686 * @hide original deprecated declaration 687 */ 688 @Deprecated 689 public static String compose(String str, boolean compat) { 690 return compose(str,compat,0); 691 } 692 693 /** 694 * Compose a string. 695 * The string will be composed to according to the specified mode. 696 * @param str The string to compose. 697 * @param compat If true the string will be composed according to 698 * NFKC rules and if false will be composed according to 699 * NFC rules. 700 * @param options The only recognized option is UNICODE_3_2 701 * @return String The composed string 702 * @deprecated ICU 56 Use {@link Normalizer2} instead. 703 * @hide original deprecated declaration 704 */ 705 @Deprecated 706 public static String compose(String str, boolean compat, int options) { 707 return getComposeNormalizer2(compat, options).normalize(str); 708 } 709 710 /** 711 * Compose a string. 712 * The string will be composed to according to the specified mode. 713 * @param source The char array to compose. 714 * @param target A char buffer to receive the normalized text. 715 * @param compat If true the char array will be composed according to 716 * NFKC rules and if false will be composed according to 717 * NFC rules. 718 * @param options The normalization options, ORed together (0 for no options). 719 * @return int The total buffer size needed;if greater than length of 720 * result, the output was truncated. 721 * @exception IndexOutOfBoundsException if target.length is less than the 722 * required length 723 * @deprecated ICU 56 Use {@link Normalizer2} instead. 724 * @hide original deprecated declaration 725 */ 726 @Deprecated 727 public static int compose(char[] source,char[] target, boolean compat, int options) { 728 return compose(source, 0, source.length, target, 0, target.length, compat, options); 729 } 730 731 /** 732 * Compose a string. 733 * The string will be composed to according to the specified mode. 734 * @param src The char array to compose. 735 * @param srcStart Start index of the source 736 * @param srcLimit Limit index of the source 737 * @param dest The char buffer to fill in 738 * @param destStart Start index of the destination buffer 739 * @param destLimit End index of the destination buffer 740 * @param compat If true the char array will be composed according to 741 * NFKC rules and if false will be composed according to 742 * NFC rules. 743 * @param options The normalization options, ORed together (0 for no options). 744 * @return int The total buffer size needed;if greater than length of 745 * result, the output was truncated. 746 * @exception IndexOutOfBoundsException if target.length is less than the 747 * required length 748 * @deprecated ICU 56 Use {@link Normalizer2} instead. 749 * @hide original deprecated declaration 750 */ 751 @Deprecated 752 public static int compose(char[] src,int srcStart, int srcLimit, 753 char[] dest,int destStart, int destLimit, 754 boolean compat, int options) { 755 CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart); 756 CharsAppendable app = new CharsAppendable(dest, destStart, destLimit); 757 getComposeNormalizer2(compat, options).normalize(srcBuffer, app); 758 return app.length(); 759 } 760 761 /** 762 * Decompose a string. 763 * The string will be decomposed to according to the specified mode. 764 * @param str The string to decompose. 765 * @param compat If true the string will be decomposed according to NFKD 766 * rules and if false will be decomposed according to NFD 767 * rules. 768 * @return String The decomposed string 769 * @deprecated ICU 56 Use {@link Normalizer2} instead. 770 * @hide original deprecated declaration 771 */ 772 @Deprecated 773 public static String decompose(String str, boolean compat) { 774 return decompose(str,compat,0); 775 } 776 777 /** 778 * Decompose a string. 779 * The string will be decomposed to according to the specified mode. 780 * @param str The string to decompose. 781 * @param compat If true the string will be decomposed according to NFKD 782 * rules and if false will be decomposed according to NFD 783 * rules. 784 * @param options The normalization options, ORed together (0 for no options). 785 * @return String The decomposed string 786 * @deprecated ICU 56 Use {@link Normalizer2} instead. 787 * @hide original deprecated declaration 788 */ 789 @Deprecated 790 public static String decompose(String str, boolean compat, int options) { 791 return getDecomposeNormalizer2(compat, options).normalize(str); 792 } 793 794 /** 795 * Decompose a string. 796 * The string will be decomposed to according to the specified mode. 797 * @param source The char array to decompose. 798 * @param target A char buffer to receive the normalized text. 799 * @param compat If true the char array will be decomposed according to NFKD 800 * rules and if false will be decomposed according to 801 * NFD rules. 802 * @return int The total buffer size needed;if greater than length of 803 * result,the output was truncated. 804 * @param options The normalization options, ORed together (0 for no options). 805 * @exception IndexOutOfBoundsException if the target capacity is less than 806 * the required length 807 * @deprecated ICU 56 Use {@link Normalizer2} instead. 808 * @hide original deprecated declaration 809 */ 810 @Deprecated 811 public static int decompose(char[] source,char[] target, boolean compat, int options) { 812 return decompose(source, 0, source.length, target, 0, target.length, compat, options); 813 } 814 815 /** 816 * Decompose a string. 817 * The string will be decomposed to according to the specified mode. 818 * @param src The char array to compose. 819 * @param srcStart Start index of the source 820 * @param srcLimit Limit index of the source 821 * @param dest The char buffer to fill in 822 * @param destStart Start index of the destination buffer 823 * @param destLimit End index of the destination buffer 824 * @param compat If true the char array will be decomposed according to NFKD 825 * rules and if false will be decomposed according to 826 * NFD rules. 827 * @param options The normalization options, ORed together (0 for no options). 828 * @return int The total buffer size needed;if greater than length of 829 * result,the output was truncated. 830 * @exception IndexOutOfBoundsException if the target capacity is less than 831 * the required length 832 * @deprecated ICU 56 Use {@link Normalizer2} instead. 833 * @hide original deprecated declaration 834 */ 835 @Deprecated 836 public static int decompose(char[] src,int srcStart, int srcLimit, 837 char[] dest,int destStart, int destLimit, 838 boolean compat, int options) { 839 CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart); 840 CharsAppendable app = new CharsAppendable(dest, destStart, destLimit); 841 getDecomposeNormalizer2(compat, options).normalize(srcBuffer, app); 842 return app.length(); 843 } 844 845 /** 846 * Normalizes a <tt>String</tt> using the given normalization operation. 847 * <p> 848 * The <tt>options</tt> parameter specifies which optional 849 * <tt>Normalizer</tt> features are to be enabled for this operation. 850 * Currently the only available option is {@link #UNICODE_3_2}. 851 * If you want the default behavior corresponding to one of the standard 852 * Unicode Normalization Forms, use 0 for this argument. 853 * <p> 854 * @param str the input string to be normalized. 855 * @param mode the normalization mode 856 * @param options the optional features to be enabled. 857 * @return String the normalized string 858 * @deprecated ICU 56 Use {@link Normalizer2} instead. 859 * @hide original deprecated declaration 860 */ 861 @Deprecated 862 public static String normalize(String str, Mode mode, int options) { 863 return mode.getNormalizer2(options).normalize(str); 864 } 865 866 /** 867 * Normalize a string. 868 * The string will be normalized according to the specified normalization 869 * mode and options. 870 * @param src The string to normalize. 871 * @param mode The normalization mode; one of Normalizer.NONE, 872 * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC, 873 * Normalizer.NFKD, Normalizer.DEFAULT 874 * @return the normalized string 875 * @deprecated ICU 56 Use {@link Normalizer2} instead. 876 * @hide original deprecated declaration 877 */ 878 @Deprecated 879 public static String normalize(String src,Mode mode) { 880 return normalize(src, mode, 0); 881 } 882 /** 883 * Normalize a string. 884 * The string will be normalized according to the specified normalization 885 * mode and options. 886 * @param source The char array to normalize. 887 * @param target A char buffer to receive the normalized text. 888 * @param mode The normalization mode; one of Normalizer.NONE, 889 * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC, 890 * Normalizer.NFKD, Normalizer.DEFAULT 891 * @param options The normalization options, ORed together (0 for no options). 892 * @return int The total buffer size needed;if greater than length of 893 * result, the output was truncated. 894 * @exception IndexOutOfBoundsException if the target capacity is less 895 * than the required length 896 * @deprecated ICU 56 Use {@link Normalizer2} instead. 897 * @hide original deprecated declaration 898 */ 899 @Deprecated 900 public static int normalize(char[] source,char[] target, Mode mode, int options) { 901 return normalize(source,0,source.length,target,0,target.length,mode, options); 902 } 903 904 /** 905 * Normalize a string. 906 * The string will be normalized according to the specified normalization 907 * mode and options. 908 * @param src The char array to compose. 909 * @param srcStart Start index of the source 910 * @param srcLimit Limit index of the source 911 * @param dest The char buffer to fill in 912 * @param destStart Start index of the destination buffer 913 * @param destLimit End index of the destination buffer 914 * @param mode The normalization mode; one of Normalizer.NONE, 915 * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC, 916 * Normalizer.NFKD, Normalizer.DEFAULT 917 * @param options The normalization options, ORed together (0 for no options). 918 * @return int The total buffer size needed;if greater than length of 919 * result, the output was truncated. 920 * @exception IndexOutOfBoundsException if the target capacity is 921 * less than the required length 922 * @deprecated ICU 56 Use {@link Normalizer2} instead. 923 * @hide original deprecated declaration 924 */ 925 @Deprecated 926 public static int normalize(char[] src,int srcStart, int srcLimit, 927 char[] dest,int destStart, int destLimit, 928 Mode mode, int options) { 929 CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart); 930 CharsAppendable app = new CharsAppendable(dest, destStart, destLimit); 931 mode.getNormalizer2(options).normalize(srcBuffer, app); 932 return app.length(); 933 } 934 935 /** 936 * Normalize a codepoint according to the given mode 937 * @param char32 The input string to be normalized. 938 * @param mode The normalization mode 939 * @param options Options for use with exclusion set and tailored Normalization 940 * The only option that is currently recognized is UNICODE_3_2 941 * @return String The normalized string 942 * @see #UNICODE_3_2 943 * @deprecated ICU 56 Use {@link Normalizer2} instead. 944 * @hide original deprecated declaration 945 */ 946 @Deprecated 947 public static String normalize(int char32, Mode mode, int options) { 948 if(mode == NFD && options == 0) { 949 String decomposition = Normalizer2.getNFCInstance().getDecomposition(char32); 950 if(decomposition == null) { 951 decomposition = UTF16.valueOf(char32); 952 } 953 return decomposition; 954 } 955 return normalize(UTF16.valueOf(char32), mode, options); 956 } 957 958 /** 959 * Convenience method to normalize a codepoint according to the given mode 960 * @param char32 The input string to be normalized. 961 * @param mode The normalization mode 962 * @return String The normalized string 963 * @deprecated ICU 56 Use {@link Normalizer2} instead. 964 * @hide original deprecated declaration 965 */ 966 @Deprecated 967 public static String normalize(int char32, Mode mode) { 968 return normalize(char32, mode, 0); 969 } 970 971 /** 972 * Convenience method. 973 * 974 * @param source string for determining if it is in a normalized format 975 * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, 976 * Normalizer.NFKC,Normalizer.NFKD) 977 * @return Return code to specify if the text is normalized or not 978 * (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE) 979 * @deprecated ICU 56 Use {@link Normalizer2} instead. 980 * @hide original deprecated declaration 981 */ 982 @Deprecated 983 public static QuickCheckResult quickCheck(String source, Mode mode) { 984 return quickCheck(source, mode, 0); 985 } 986 987 /** 988 * Performing quick check on a string, to quickly determine if the string is 989 * in a particular normalization format. 990 * Three types of result can be returned Normalizer.YES, Normalizer.NO or 991 * Normalizer.MAYBE. Result Normalizer.YES indicates that the argument 992 * string is in the desired normalized format, Normalizer.NO determines that 993 * argument string is not in the desired normalized format. A 994 * Normalizer.MAYBE result indicates that a more thorough check is required, 995 * the user may have to put the string in its normalized form and compare 996 * the results. 997 * 998 * @param source string for determining if it is in a normalized format 999 * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, 1000 * Normalizer.NFKC,Normalizer.NFKD) 1001 * @param options Options for use with exclusion set and tailored Normalization 1002 * The only option that is currently recognized is UNICODE_3_2 1003 * @return Return code to specify if the text is normalized or not 1004 * (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE) 1005 * @deprecated ICU 56 Use {@link Normalizer2} instead. 1006 * @hide original deprecated declaration 1007 */ 1008 @Deprecated 1009 public static QuickCheckResult quickCheck(String source, Mode mode, int options) { 1010 return mode.getNormalizer2(options).quickCheck(source); 1011 } 1012 1013 /** 1014 * Convenience method. 1015 * 1016 * @param source Array of characters for determining if it is in a 1017 * normalized format 1018 * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, 1019 * Normalizer.NFKC,Normalizer.NFKD) 1020 * @param options Options for use with exclusion set and tailored Normalization 1021 * The only option that is currently recognized is UNICODE_3_2 1022 * @return Return code to specify if the text is normalized or not 1023 * (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE) 1024 * @deprecated ICU 56 Use {@link Normalizer2} instead. 1025 * @hide original deprecated declaration 1026 */ 1027 @Deprecated 1028 public static QuickCheckResult quickCheck(char[] source, Mode mode, int options) { 1029 return quickCheck(source, 0, source.length, mode, options); 1030 } 1031 1032 /** 1033 * Performing quick check on a string, to quickly determine if the string is 1034 * in a particular normalization format. 1035 * Three types of result can be returned Normalizer.YES, Normalizer.NO or 1036 * Normalizer.MAYBE. Result Normalizer.YES indicates that the argument 1037 * string is in the desired normalized format, Normalizer.NO determines that 1038 * argument string is not in the desired normalized format. A 1039 * Normalizer.MAYBE result indicates that a more thorough check is required, 1040 * the user may have to put the string in its normalized form and compare 1041 * the results. 1042 * 1043 * @param source string for determining if it is in a normalized format 1044 * @param start the start index of the source 1045 * @param limit the limit index of the source it is equal to the length 1046 * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, 1047 * Normalizer.NFKC,Normalizer.NFKD) 1048 * @param options Options for use with exclusion set and tailored Normalization 1049 * The only option that is currently recognized is UNICODE_3_2 1050 * @return Return code to specify if the text is normalized or not 1051 * (Normalizer.YES, Normalizer.NO or 1052 * Normalizer.MAYBE) 1053 * @deprecated ICU 56 Use {@link Normalizer2} instead. 1054 * @hide original deprecated declaration 1055 */ 1056 @Deprecated 1057 public static QuickCheckResult quickCheck(char[] source,int start, 1058 int limit, Mode mode,int options) { 1059 CharBuffer srcBuffer = CharBuffer.wrap(source, start, limit - start); 1060 return mode.getNormalizer2(options).quickCheck(srcBuffer); 1061 } 1062 1063 /** 1064 * Test if a string is in a given normalization form. 1065 * This is semantically equivalent to source.equals(normalize(source, mode)). 1066 * 1067 * Unlike quickCheck(), this function returns a definitive result, 1068 * never a "maybe". 1069 * For NFD, NFKD, and FCD, both functions work exactly the same. 1070 * For NFC and NFKC where quickCheck may return "maybe", this function will 1071 * perform further tests to arrive at a true/false result. 1072 * @param src The input array of characters to be checked to see if 1073 * it is normalized 1074 * @param start The strart index in the source 1075 * @param limit The limit index in the source 1076 * @param mode the normalization mode 1077 * @param options Options for use with exclusion set and tailored Normalization 1078 * The only option that is currently recognized is UNICODE_3_2 1079 * @return Boolean value indicating whether the source string is in the 1080 * "mode" normalization form 1081 * @deprecated ICU 56 Use {@link Normalizer2} instead. 1082 * @hide original deprecated declaration 1083 */ 1084 @Deprecated 1085 public static boolean isNormalized(char[] src,int start, 1086 int limit, Mode mode, 1087 int options) { 1088 CharBuffer srcBuffer = CharBuffer.wrap(src, start, limit - start); 1089 return mode.getNormalizer2(options).isNormalized(srcBuffer); 1090 } 1091 1092 /** 1093 * Test if a string is in a given normalization form. 1094 * This is semantically equivalent to source.equals(normalize(source, mode)). 1095 * 1096 * Unlike quickCheck(), this function returns a definitive result, 1097 * never a "maybe". 1098 * For NFD, NFKD, and FCD, both functions work exactly the same. 1099 * For NFC and NFKC where quickCheck may return "maybe", this function will 1100 * perform further tests to arrive at a true/false result. 1101 * @param str the input string to be checked to see if it is 1102 * normalized 1103 * @param mode the normalization mode 1104 * @param options Options for use with exclusion set and tailored Normalization 1105 * The only option that is currently recognized is UNICODE_3_2 1106 * @see #isNormalized 1107 * @deprecated ICU 56 Use {@link Normalizer2} instead. 1108 * @hide original deprecated declaration 1109 */ 1110 @Deprecated 1111 public static boolean isNormalized(String str, Mode mode, int options) { 1112 return mode.getNormalizer2(options).isNormalized(str); 1113 } 1114 1115 /** 1116 * Convenience Method 1117 * @param char32 the input code point to be checked to see if it is 1118 * normalized 1119 * @param mode the normalization mode 1120 * @param options Options for use with exclusion set and tailored Normalization 1121 * The only option that is currently recognized is UNICODE_3_2 1122 * 1123 * @see #isNormalized 1124 * @deprecated ICU 56 Use {@link Normalizer2} instead. 1125 * @hide original deprecated declaration 1126 */ 1127 @Deprecated 1128 public static boolean isNormalized(int char32, Mode mode,int options) { 1129 return isNormalized(UTF16.valueOf(char32), mode, options); 1130 } 1131 1132 /** 1133 * Compare two strings for canonical equivalence. 1134 * Further options include case-insensitive comparison and 1135 * code point order (as opposed to code unit order). 1136 * 1137 * Canonical equivalence between two strings is defined as their normalized 1138 * forms (NFD or NFC) being identical. 1139 * This function compares strings incrementally instead of normalizing 1140 * (and optionally case-folding) both strings entirely, 1141 * improving performance significantly. 1142 * 1143 * Bulk normalization is only necessary if the strings do not fulfill the 1144 * FCD conditions. Only in this case, and only if the strings are relatively 1145 * long, is memory allocated temporarily. 1146 * For FCD strings and short non-FCD strings there is no memory allocation. 1147 * 1148 * Semantically, this is equivalent to 1149 * strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2))) 1150 * where code point order and foldCase are all optional. 1151 * 1152 * @param s1 First source character array. 1153 * @param s1Start start index of source 1154 * @param s1Limit limit of the source 1155 * 1156 * @param s2 Second source character array. 1157 * @param s2Start start index of the source 1158 * @param s2Limit limit of the source 1159 * 1160 * @param options A bit set of options: 1161 * - FOLD_CASE_DEFAULT or 0 is used for default options: 1162 * Case-sensitive comparison in code unit order, and the input strings 1163 * are quick-checked for FCD. 1164 * 1165 * - INPUT_IS_FCD 1166 * Set if the caller knows that both s1 and s2 fulfill the FCD 1167 * conditions.If not set, the function will quickCheck for FCD 1168 * and normalize if necessary. 1169 * 1170 * - COMPARE_CODE_POINT_ORDER 1171 * Set to choose code point order instead of code unit order 1172 * 1173 * - COMPARE_IGNORE_CASE 1174 * Set to compare strings case-insensitively using case folding, 1175 * instead of case-sensitively. 1176 * If set, then the following case folding options are used. 1177 * 1178 * 1179 * @return <0 or 0 or >0 as usual for string comparisons 1180 * 1181 * @see #normalize 1182 * @see #FCD 1183 */ 1184 public static int compare(char[] s1, int s1Start, int s1Limit, 1185 char[] s2, int s2Start, int s2Limit, 1186 int options) { 1187 if( s1==null || s1Start<0 || s1Limit<0 || 1188 s2==null || s2Start<0 || s2Limit<0 || 1189 s1Limit<s1Start || s2Limit<s2Start 1190 ) { 1191 throw new IllegalArgumentException(); 1192 } 1193 return internalCompare(CharBuffer.wrap(s1, s1Start, s1Limit-s1Start), 1194 CharBuffer.wrap(s2, s2Start, s2Limit-s2Start), 1195 options); 1196 } 1197 1198 /** 1199 * Compare two strings for canonical equivalence. 1200 * Further options include case-insensitive comparison and 1201 * code point order (as opposed to code unit order). 1202 * 1203 * Canonical equivalence between two strings is defined as their normalized 1204 * forms (NFD or NFC) being identical. 1205 * This function compares strings incrementally instead of normalizing 1206 * (and optionally case-folding) both strings entirely, 1207 * improving performance significantly. 1208 * 1209 * Bulk normalization is only necessary if the strings do not fulfill the 1210 * FCD conditions. Only in this case, and only if the strings are relatively 1211 * long, is memory allocated temporarily. 1212 * For FCD strings and short non-FCD strings there is no memory allocation. 1213 * 1214 * Semantically, this is equivalent to 1215 * strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2))) 1216 * where code point order and foldCase are all optional. 1217 * 1218 * @param s1 First source string. 1219 * @param s2 Second source string. 1220 * 1221 * @param options A bit set of options: 1222 * - FOLD_CASE_DEFAULT or 0 is used for default options: 1223 * Case-sensitive comparison in code unit order, and the input strings 1224 * are quick-checked for FCD. 1225 * 1226 * - INPUT_IS_FCD 1227 * Set if the caller knows that both s1 and s2 fulfill the FCD 1228 * conditions. If not set, the function will quickCheck for FCD 1229 * and normalize if necessary. 1230 * 1231 * - COMPARE_CODE_POINT_ORDER 1232 * Set to choose code point order instead of code unit order 1233 * 1234 * - COMPARE_IGNORE_CASE 1235 * Set to compare strings case-insensitively using case folding, 1236 * instead of case-sensitively. 1237 * If set, then the following case folding options are used. 1238 * 1239 * @return <0 or 0 or >0 as usual for string comparisons 1240 * 1241 * @see #normalize 1242 * @see #FCD 1243 */ 1244 public static int compare(String s1, String s2, int options) { 1245 return internalCompare(s1, s2, options); 1246 } 1247 1248 /** 1249 * Compare two strings for canonical equivalence. 1250 * Further options include case-insensitive comparison and 1251 * code point order (as opposed to code unit order). 1252 * Convenience method. 1253 * 1254 * @param s1 First source string. 1255 * @param s2 Second source string. 1256 * 1257 * @param options A bit set of options: 1258 * - FOLD_CASE_DEFAULT or 0 is used for default options: 1259 * Case-sensitive comparison in code unit order, and the input strings 1260 * are quick-checked for FCD. 1261 * 1262 * - INPUT_IS_FCD 1263 * Set if the caller knows that both s1 and s2 fulfill the FCD 1264 * conditions. If not set, the function will quickCheck for FCD 1265 * and normalize if necessary. 1266 * 1267 * - COMPARE_CODE_POINT_ORDER 1268 * Set to choose code point order instead of code unit order 1269 * 1270 * - COMPARE_IGNORE_CASE 1271 * Set to compare strings case-insensitively using case folding, 1272 * instead of case-sensitively. 1273 * If set, then the following case folding options are used. 1274 * 1275 * @return <0 or 0 or >0 as usual for string comparisons 1276 * 1277 * @see #normalize 1278 * @see #FCD 1279 */ 1280 public static int compare(char[] s1, char[] s2, int options) { 1281 return internalCompare(CharBuffer.wrap(s1), CharBuffer.wrap(s2), options); 1282 } 1283 1284 /** 1285 * Convenience method that can have faster implementation 1286 * by not allocating buffers. 1287 * @param char32a the first code point to be checked against the 1288 * @param char32b the second code point 1289 * @param options A bit set of options 1290 */ 1291 public static int compare(int char32a, int char32b, int options) { 1292 return internalCompare(UTF16.valueOf(char32a), UTF16.valueOf(char32b), options|INPUT_IS_FCD); 1293 } 1294 1295 /** 1296 * Convenience method that can have faster implementation 1297 * by not allocating buffers. 1298 * @param char32a the first code point to be checked against 1299 * @param str2 the second string 1300 * @param options A bit set of options 1301 */ 1302 public static int compare(int char32a, String str2, int options) { 1303 return internalCompare(UTF16.valueOf(char32a), str2, options); 1304 } 1305 1306 /* Concatenation of normalized strings --------------------------------- */ 1307 /** 1308 * Concatenate normalized strings, making sure that the result is normalized 1309 * as well. 1310 * 1311 * If both the left and the right strings are in 1312 * the normalization form according to "mode", 1313 * then the result will be 1314 * 1315 * <code> 1316 * dest=normalize(left+right, mode) 1317 * </code> 1318 * 1319 * With the input strings already being normalized, 1320 * this function will use next() and previous() 1321 * to find the adjacent end pieces of the input strings. 1322 * Only the concatenation of these end pieces will be normalized and 1323 * then concatenated with the remaining parts of the input strings. 1324 * 1325 * It is allowed to have dest==left to avoid copying the entire left string. 1326 * 1327 * @param left Left source array, may be same as dest. 1328 * @param leftStart start in the left array. 1329 * @param leftLimit limit in the left array (==length) 1330 * @param right Right source array. 1331 * @param rightStart start in the right array. 1332 * @param rightLimit limit in the right array (==length) 1333 * @param dest The output buffer; can be null if destStart==destLimit==0 1334 * for pure preflighting. 1335 * @param destStart start in the destination array 1336 * @param destLimit limit in the destination array (==length) 1337 * @param mode The normalization mode. 1338 * @param options The normalization options, ORed together (0 for no options). 1339 * @return Length of output (number of chars) when successful or 1340 * IndexOutOfBoundsException 1341 * @exception IndexOutOfBoundsException whose message has the string 1342 * representation of destination capacity required. 1343 * @see #normalize 1344 * @see #next 1345 * @see #previous 1346 * @exception IndexOutOfBoundsException if target capacity is less than the 1347 * required length 1348 * @deprecated ICU 56 Use {@link Normalizer2} instead. 1349 * @hide original deprecated declaration 1350 */ 1351 @Deprecated 1352 public static int concatenate(char[] left, int leftStart, int leftLimit, 1353 char[] right, int rightStart, int rightLimit, 1354 char[] dest, int destStart, int destLimit, 1355 Normalizer.Mode mode, int options) { 1356 if(dest == null) { 1357 throw new IllegalArgumentException(); 1358 } 1359 1360 /* check for overlapping right and destination */ 1361 if (right == dest && rightStart < destLimit && destStart < rightLimit) { 1362 throw new IllegalArgumentException("overlapping right and dst ranges"); 1363 } 1364 1365 /* allow left==dest */ 1366 StringBuilder destBuilder=new StringBuilder(leftLimit-leftStart+rightLimit-rightStart+16); 1367 destBuilder.append(left, leftStart, leftLimit-leftStart); 1368 CharBuffer rightBuffer=CharBuffer.wrap(right, rightStart, rightLimit-rightStart); 1369 mode.getNormalizer2(options).append(destBuilder, rightBuffer); 1370 int destLength=destBuilder.length(); 1371 if(destLength<=(destLimit-destStart)) { 1372 destBuilder.getChars(0, destLength, dest, destStart); 1373 return destLength; 1374 } else { 1375 throw new IndexOutOfBoundsException(Integer.toString(destLength)); 1376 } 1377 } 1378 1379 /** 1380 * Concatenate normalized strings, making sure that the result is normalized 1381 * as well. 1382 * 1383 * If both the left and the right strings are in 1384 * the normalization form according to "mode", 1385 * then the result will be 1386 * 1387 * <code> 1388 * dest=normalize(left+right, mode) 1389 * </code> 1390 * 1391 * For details see concatenate 1392 * 1393 * @param left Left source string. 1394 * @param right Right source string. 1395 * @param mode The normalization mode. 1396 * @param options The normalization options, ORed together (0 for no options). 1397 * @return result 1398 * 1399 * @see #concatenate 1400 * @see #normalize 1401 * @see #next 1402 * @see #previous 1403 * @see #concatenate 1404 * @deprecated ICU 56 Use {@link Normalizer2} instead. 1405 * @hide original deprecated declaration 1406 */ 1407 @Deprecated 1408 public static String concatenate(char[] left, char[] right,Mode mode, int options) { 1409 StringBuilder dest=new StringBuilder(left.length+right.length+16).append(left); 1410 return mode.getNormalizer2(options).append(dest, CharBuffer.wrap(right)).toString(); 1411 } 1412 1413 /** 1414 * Concatenate normalized strings, making sure that the result is normalized 1415 * as well. 1416 * 1417 * If both the left and the right strings are in 1418 * the normalization form according to "mode", 1419 * then the result will be 1420 * 1421 * <code> 1422 * dest=normalize(left+right, mode) 1423 * </code> 1424 * 1425 * With the input strings already being normalized, 1426 * this function will use next() and previous() 1427 * to find the adjacent end pieces of the input strings. 1428 * Only the concatenation of these end pieces will be normalized and 1429 * then concatenated with the remaining parts of the input strings. 1430 * 1431 * @param left Left source string. 1432 * @param right Right source string. 1433 * @param mode The normalization mode. 1434 * @param options The normalization options, ORed together (0 for no options). 1435 * @return result 1436 * 1437 * @see #concatenate 1438 * @see #normalize 1439 * @see #next 1440 * @see #previous 1441 * @see #concatenate 1442 * @deprecated ICU 56 Use {@link Normalizer2} instead. 1443 * @hide original deprecated declaration 1444 */ 1445 @Deprecated 1446 public static String concatenate(String left, String right, Mode mode, int options) { 1447 StringBuilder dest=new StringBuilder(left.length()+right.length()+16).append(left); 1448 return mode.getNormalizer2(options).append(dest, right).toString(); 1449 } 1450 1451 /** 1452 * Gets the FC_NFKC closure value. 1453 * @param c The code point whose closure value is to be retrieved 1454 * @param dest The char array to receive the closure value 1455 * @return the length of the closure value; 0 if there is none 1456 * @deprecated ICU 56 1457 * @hide original deprecated declaration 1458 */ 1459 @Deprecated 1460 public static int getFC_NFKC_Closure(int c,char[] dest) { 1461 String closure=getFC_NFKC_Closure(c); 1462 int length=closure.length(); 1463 if(length!=0 && dest!=null && length<=dest.length) { 1464 closure.getChars(0, length, dest, 0); 1465 } 1466 return length; 1467 } 1468 /** 1469 * Gets the FC_NFKC closure value. 1470 * @param c The code point whose closure value is to be retrieved 1471 * @return String representation of the closure value; "" if there is none 1472 * @deprecated ICU 56 1473 * @hide original deprecated declaration 1474 */ 1475 @Deprecated 1476 public static String getFC_NFKC_Closure(int c) { 1477 // Compute the FC_NFKC_Closure on the fly: 1478 // We have the API for complete coverage of Unicode properties, although 1479 // this value by itself is not useful via API. 1480 // (What could be useful is a custom normalization table that combines 1481 // case folding and NFKC.) 1482 // For the derivation, see Unicode's DerivedNormalizationProps.txt. 1483 Normalizer2 nfkc=NFKCModeImpl.INSTANCE.normalizer2; 1484 UCaseProps csp=UCaseProps.INSTANCE; 1485 // first: b = NFKC(Fold(a)) 1486 StringBuilder folded=new StringBuilder(); 1487 int folded1Length=csp.toFullFolding(c, folded, 0); 1488 if(folded1Length<0) { 1489 Normalizer2Impl nfkcImpl=((Norm2AllModes.Normalizer2WithImpl)nfkc).impl; 1490 if(nfkcImpl.getCompQuickCheck(nfkcImpl.getNorm16(c))!=0) { 1491 return ""; // c does not change at all under CaseFolding+NFKC 1492 } 1493 folded.appendCodePoint(c); 1494 } else { 1495 if(folded1Length>UCaseProps.MAX_STRING_LENGTH) { 1496 folded.appendCodePoint(folded1Length); 1497 } 1498 } 1499 String kc1=nfkc.normalize(folded); 1500 // second: c = NFKC(Fold(b)) 1501 String kc2=nfkc.normalize(UCharacter.foldCase(kc1, 0)); 1502 // if (c != b) add the mapping from a to c 1503 if(kc1.equals(kc2)) { 1504 return ""; 1505 } else { 1506 return kc2; 1507 } 1508 } 1509 1510 //------------------------------------------------------------------------- 1511 // Iteration API 1512 //------------------------------------------------------------------------- 1513 1514 /** 1515 * Return the current character in the normalized text. 1516 * @return The codepoint as an int 1517 * @deprecated ICU 56 1518 * @hide original deprecated declaration 1519 */ 1520 @Deprecated 1521 public int current() { 1522 if(bufferPos<buffer.length() || nextNormalize()) { 1523 return buffer.codePointAt(bufferPos); 1524 } else { 1525 return DONE; 1526 } 1527 } 1528 1529 /** 1530 * Return the next character in the normalized text and advance 1531 * the iteration position by one. If the end 1532 * of the text has already been reached, {@link #DONE} is returned. 1533 * @return The codepoint as an int 1534 * @deprecated ICU 56 1535 * @hide original deprecated declaration 1536 */ 1537 @Deprecated 1538 public int next() { 1539 if(bufferPos<buffer.length() || nextNormalize()) { 1540 int c=buffer.codePointAt(bufferPos); 1541 bufferPos+=Character.charCount(c); 1542 return c; 1543 } else { 1544 return DONE; 1545 } 1546 } 1547 1548 1549 /** 1550 * Return the previous character in the normalized text and decrement 1551 * the iteration position by one. If the beginning 1552 * of the text has already been reached, {@link #DONE} is returned. 1553 * @return The codepoint as an int 1554 * @deprecated ICU 56 1555 * @hide original deprecated declaration 1556 */ 1557 @Deprecated 1558 public int previous() { 1559 if(bufferPos>0 || previousNormalize()) { 1560 int c=buffer.codePointBefore(bufferPos); 1561 bufferPos-=Character.charCount(c); 1562 return c; 1563 } else { 1564 return DONE; 1565 } 1566 } 1567 1568 /** 1569 * Reset the index to the beginning of the text. 1570 * This is equivalent to setIndexOnly(startIndex)). 1571 * @deprecated ICU 56 1572 * @hide original deprecated declaration 1573 */ 1574 @Deprecated 1575 public void reset() { 1576 text.setToStart(); 1577 currentIndex=nextIndex=0; 1578 clearBuffer(); 1579 } 1580 1581 /** 1582 * Set the iteration position in the input text that is being normalized, 1583 * without any immediate normalization. 1584 * After setIndexOnly(), getIndex() will return the same index that is 1585 * specified here. 1586 * 1587 * @param index the desired index in the input text. 1588 * @deprecated ICU 56 1589 * @hide original deprecated declaration 1590 */ 1591 @Deprecated 1592 public void setIndexOnly(int index) { 1593 text.setIndex(index); // validates index 1594 currentIndex=nextIndex=index; 1595 clearBuffer(); 1596 } 1597 1598 /** 1599 * Set the iteration position in the input text that is being normalized 1600 * and return the first normalized character at that position. 1601 * <p> 1602 * <b>Note:</b> This method sets the position in the <em>input</em> text, 1603 * while {@link #next} and {@link #previous} iterate through characters 1604 * in the normalized <em>output</em>. This means that there is not 1605 * necessarily a one-to-one correspondence between characters returned 1606 * by <tt>next</tt> and <tt>previous</tt> and the indices passed to and 1607 * returned from <tt>setIndex</tt> and {@link #getIndex}. 1608 * <p> 1609 * @param index the desired index in the input text. 1610 * 1611 * @return the first normalized character that is the result of iterating 1612 * forward starting at the given index. 1613 * 1614 * @throws IllegalArgumentException if the given index is less than 1615 * {@link #getBeginIndex} or greater than {@link #getEndIndex}. 1616 * @deprecated ICU 3.2 1617 * @obsolete ICU 3.2 1618 * @hide original deprecated declaration 1619 */ 1620 @Deprecated 1621 ///CLOVER:OFF 1622 public int setIndex(int index) { 1623 setIndexOnly(index); 1624 return current(); 1625 } 1626 ///CLOVER:ON 1627 /** 1628 * Retrieve the index of the start of the input text. This is the begin 1629 * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the 1630 * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating 1631 * @deprecated ICU 2.2. Use startIndex() instead. 1632 * @return The codepoint as an int 1633 * @see #startIndex 1634 * @hide original deprecated declaration 1635 */ 1636 @Deprecated 1637 public int getBeginIndex() { 1638 return 0; 1639 } 1640 1641 /** 1642 * Retrieve the index of the end of the input text. This is the end index 1643 * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt> 1644 * over which this <tt>Normalizer</tt> is iterating 1645 * @deprecated ICU 2.2. Use endIndex() instead. 1646 * @return The codepoint as an int 1647 * @see #endIndex 1648 * @hide original deprecated declaration 1649 */ 1650 @Deprecated 1651 public int getEndIndex() { 1652 return endIndex(); 1653 } 1654 /** 1655 * Return the first character in the normalized text. This resets 1656 * the <tt>Normalizer's</tt> position to the beginning of the text. 1657 * @return The codepoint as an int 1658 * @deprecated ICU 56 1659 * @hide original deprecated declaration 1660 */ 1661 @Deprecated 1662 public int first() { 1663 reset(); 1664 return next(); 1665 } 1666 1667 /** 1668 * Return the last character in the normalized text. This resets 1669 * the <tt>Normalizer's</tt> position to be just before the 1670 * the input text corresponding to that normalized character. 1671 * @return The codepoint as an int 1672 * @deprecated ICU 56 1673 * @hide original deprecated declaration 1674 */ 1675 @Deprecated 1676 public int last() { 1677 text.setToLimit(); 1678 currentIndex=nextIndex=text.getIndex(); 1679 clearBuffer(); 1680 return previous(); 1681 } 1682 1683 /** 1684 * Retrieve the current iteration position in the input text that is 1685 * being normalized. This method is useful in applications such as 1686 * searching, where you need to be able to determine the position in 1687 * the input text that corresponds to a given normalized output character. 1688 * <p> 1689 * <b>Note:</b> This method sets the position in the <em>input</em>, while 1690 * {@link #next} and {@link #previous} iterate through characters in the 1691 * <em>output</em>. This means that there is not necessarily a one-to-one 1692 * correspondence between characters returned by <tt>next</tt> and 1693 * <tt>previous</tt> and the indices passed to and returned from 1694 * <tt>setIndex</tt> and {@link #getIndex}. 1695 * @return The current iteration position 1696 * @deprecated ICU 56 1697 * @hide original deprecated declaration 1698 */ 1699 @Deprecated 1700 public int getIndex() { 1701 if(bufferPos<buffer.length()) { 1702 return currentIndex; 1703 } else { 1704 return nextIndex; 1705 } 1706 } 1707 1708 /** 1709 * Retrieve the index of the start of the input text. This is the begin 1710 * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the 1711 * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating 1712 * @return The current iteration position 1713 * @deprecated ICU 56 1714 * @hide original deprecated declaration 1715 */ 1716 @Deprecated 1717 public int startIndex() { 1718 return 0; 1719 } 1720 1721 /** 1722 * Retrieve the index of the end of the input text. This is the end index 1723 * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt> 1724 * over which this <tt>Normalizer</tt> is iterating 1725 * @return The current iteration position 1726 * @deprecated ICU 56 1727 * @hide original deprecated declaration 1728 */ 1729 @Deprecated 1730 public int endIndex() { 1731 return text.getLength(); 1732 } 1733 1734 //------------------------------------------------------------------------- 1735 // Iterator attributes 1736 //------------------------------------------------------------------------- 1737 /** 1738 * Set the normalization mode for this object. 1739 * <p> 1740 * <b>Note:</b>If the normalization mode is changed while iterating 1741 * over a string, calls to {@link #next} and {@link #previous} may 1742 * return previously buffers characters in the old normalization mode 1743 * until the iteration is able to re-sync at the next base character. 1744 * It is safest to call {@link #setText setText()}, {@link #first}, 1745 * {@link #last}, etc. after calling <tt>setMode</tt>. 1746 * <p> 1747 * @param newMode the new mode for this <tt>Normalizer</tt>. 1748 * The supported modes are: 1749 * <ul> 1750 * <li>{@link #NFC} - Unicode canonical decompositiion 1751 * followed by canonical composition. 1752 * <li>{@link #NFKC} - Unicode compatibility decompositiion 1753 * follwed by canonical composition. 1754 * <li>{@link #NFD} - Unicode canonical decomposition 1755 * <li>{@link #NFKD} - Unicode compatibility decomposition. 1756 * <li>{@link #NONE} - Do nothing but return characters 1757 * from the underlying input text. 1758 * </ul> 1759 * 1760 * @see #getMode 1761 * @deprecated ICU 56 1762 * @hide original deprecated declaration 1763 */ 1764 @Deprecated 1765 public void setMode(Mode newMode) { 1766 mode = newMode; 1767 norm2 = mode.getNormalizer2(options); 1768 } 1769 /** 1770 * Return the basic operation performed by this <tt>Normalizer</tt> 1771 * 1772 * @see #setMode 1773 * @deprecated ICU 56 1774 * @hide original deprecated declaration 1775 */ 1776 @Deprecated 1777 public Mode getMode() { 1778 return mode; 1779 } 1780 /** 1781 * Set options that affect this <tt>Normalizer</tt>'s operation. 1782 * Options do not change the basic composition or decomposition operation 1783 * that is being performed , but they control whether 1784 * certain optional portions of the operation are done. 1785 * Currently the only available option is: 1786 * 1787 * <ul> 1788 * <li>{@link #UNICODE_3_2} - Use Normalization conforming to Unicode version 3.2. 1789 * </ul> 1790 * 1791 * @param option the option whose value is to be set. 1792 * @param value the new setting for the option. Use <tt>true</tt> to 1793 * turn the option on and <tt>false</tt> to turn it off. 1794 * 1795 * @see #getOption 1796 * @deprecated ICU 56 1797 * @hide original deprecated declaration 1798 */ 1799 @Deprecated 1800 public void setOption(int option,boolean value) { 1801 if (value) { 1802 options |= option; 1803 } else { 1804 options &= (~option); 1805 } 1806 norm2 = mode.getNormalizer2(options); 1807 } 1808 1809 /** 1810 * Determine whether an option is turned on or off. 1811 * <p> 1812 * @see #setOption 1813 * @deprecated ICU 56 1814 * @hide original deprecated declaration 1815 */ 1816 @Deprecated 1817 public int getOption(int option) { 1818 if((options & option)!=0) { 1819 return 1 ; 1820 } else { 1821 return 0; 1822 } 1823 } 1824 1825 /** 1826 * Gets the underlying text storage 1827 * @param fillIn the char buffer to fill the UTF-16 units. 1828 * The length of the buffer should be equal to the length of the 1829 * underlying text storage 1830 * @throws IndexOutOfBoundsException If the index passed for the array is invalid. 1831 * @see #getLength 1832 * @deprecated ICU 56 1833 * @hide original deprecated declaration 1834 */ 1835 @Deprecated 1836 public int getText(char[] fillIn) { 1837 return text.getText(fillIn); 1838 } 1839 1840 /** 1841 * Gets the length of underlying text storage 1842 * @return the length 1843 * @deprecated ICU 56 1844 * @hide original deprecated declaration 1845 */ 1846 @Deprecated 1847 public int getLength() { 1848 return text.getLength(); 1849 } 1850 1851 /** 1852 * Returns the text under iteration as a string 1853 * @return a copy of the text under iteration. 1854 * @deprecated ICU 56 1855 * @hide original deprecated declaration 1856 */ 1857 @Deprecated 1858 public String getText() { 1859 return text.getText(); 1860 } 1861 1862 /** 1863 * Set the input text over which this <tt>Normalizer</tt> will iterate. 1864 * The iteration position is set to the beginning of the input text. 1865 * @param newText The new string to be normalized. 1866 * @deprecated ICU 56 1867 * @hide original deprecated declaration 1868 */ 1869 @Deprecated 1870 public void setText(StringBuffer newText) { 1871 UCharacterIterator newIter = UCharacterIterator.getInstance(newText); 1872 if (newIter == null) { 1873 throw new IllegalStateException("Could not create a new UCharacterIterator"); 1874 } 1875 text = newIter; 1876 reset(); 1877 } 1878 1879 /** 1880 * Set the input text over which this <tt>Normalizer</tt> will iterate. 1881 * The iteration position is set to the beginning of the input text. 1882 * @param newText The new string to be normalized. 1883 * @deprecated ICU 56 1884 * @hide original deprecated declaration 1885 */ 1886 @Deprecated 1887 public void setText(char[] newText) { 1888 UCharacterIterator newIter = UCharacterIterator.getInstance(newText); 1889 if (newIter == null) { 1890 throw new IllegalStateException("Could not create a new UCharacterIterator"); 1891 } 1892 text = newIter; 1893 reset(); 1894 } 1895 1896 /** 1897 * Set the input text over which this <tt>Normalizer</tt> will iterate. 1898 * The iteration position is set to the beginning of the input text. 1899 * @param newText The new string to be normalized. 1900 * @deprecated ICU 56 1901 * @hide original deprecated declaration 1902 */ 1903 @Deprecated 1904 public void setText(String newText) { 1905 UCharacterIterator newIter = UCharacterIterator.getInstance(newText); 1906 if (newIter == null) { 1907 throw new IllegalStateException("Could not create a new UCharacterIterator"); 1908 } 1909 text = newIter; 1910 reset(); 1911 } 1912 1913 /** 1914 * Set the input text over which this <tt>Normalizer</tt> will iterate. 1915 * The iteration position is set to the beginning of the input text. 1916 * @param newText The new string to be normalized. 1917 * @deprecated ICU 56 1918 * @hide original deprecated declaration 1919 */ 1920 @Deprecated 1921 public void setText(CharacterIterator newText) { 1922 UCharacterIterator newIter = UCharacterIterator.getInstance(newText); 1923 if (newIter == null) { 1924 throw new IllegalStateException("Could not create a new UCharacterIterator"); 1925 } 1926 text = newIter; 1927 reset(); 1928 } 1929 1930 /** 1931 * Set the input text over which this <tt>Normalizer</tt> will iterate. 1932 * The iteration position is set to the beginning of the string. 1933 * @param newText The new string to be normalized. 1934 * @deprecated ICU 56 1935 * @hide original deprecated declaration 1936 */ 1937 @Deprecated 1938 public void setText(UCharacterIterator newText) { 1939 try{ 1940 UCharacterIterator newIter = (UCharacterIterator)newText.clone(); 1941 if (newIter == null) { 1942 throw new IllegalStateException("Could not create a new UCharacterIterator"); 1943 } 1944 text = newIter; 1945 reset(); 1946 }catch(CloneNotSupportedException e) { 1947 throw new ICUCloneNotSupportedException("Could not clone the UCharacterIterator", e); 1948 } 1949 } 1950 1951 private void clearBuffer() { 1952 buffer.setLength(0); 1953 bufferPos=0; 1954 } 1955 1956 private boolean nextNormalize() { 1957 clearBuffer(); 1958 currentIndex=nextIndex; 1959 text.setIndex(nextIndex); 1960 // Skip at least one character so we make progress. 1961 int c=text.nextCodePoint(); 1962 if(c<0) { 1963 return false; 1964 } 1965 StringBuilder segment=new StringBuilder().appendCodePoint(c); 1966 while((c=text.nextCodePoint())>=0) { 1967 if(norm2.hasBoundaryBefore(c)) { 1968 text.moveCodePointIndex(-1); 1969 break; 1970 } 1971 segment.appendCodePoint(c); 1972 } 1973 nextIndex=text.getIndex(); 1974 norm2.normalize(segment, buffer); 1975 return buffer.length()!=0; 1976 } 1977 1978 private boolean previousNormalize() { 1979 clearBuffer(); 1980 nextIndex=currentIndex; 1981 text.setIndex(currentIndex); 1982 StringBuilder segment=new StringBuilder(); 1983 int c; 1984 while((c=text.previousCodePoint())>=0) { 1985 if(c<=0xffff) { 1986 segment.insert(0, (char)c); 1987 } else { 1988 segment.insert(0, Character.toChars(c)); 1989 } 1990 if(norm2.hasBoundaryBefore(c)) { 1991 break; 1992 } 1993 } 1994 currentIndex=text.getIndex(); 1995 norm2.normalize(segment, buffer); 1996 bufferPos=buffer.length(); 1997 return buffer.length()!=0; 1998 } 1999 2000 /* compare canonically equivalent ------------------------------------------- */ 2001 2002 // TODO: Broaden the public compare(String, String, options) API like this. Ticket #7407 2003 private static int internalCompare(CharSequence s1, CharSequence s2, int options) { 2004 int normOptions=options>>>COMPARE_NORM_OPTIONS_SHIFT; 2005 options|= COMPARE_EQUIV; 2006 2007 /* 2008 * UAX #21 Case Mappings, as fixed for Unicode version 4 2009 * (see Jitterbug 2021), defines a canonical caseless match as 2010 * 2011 * A string X is a canonical caseless match 2012 * for a string Y if and only if 2013 * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y))) 2014 * 2015 * For better performance, we check for FCD (or let the caller tell us that 2016 * both strings are in FCD) for the inner normalization. 2017 * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that 2018 * case-folding preserves the FCD-ness of a string. 2019 * The outer normalization is then only performed by NormalizerImpl.cmpEquivFold() 2020 * when there is a difference. 2021 * 2022 * Exception: When using the Turkic case-folding option, we do perform 2023 * full NFD first. This is because in the Turkic case precomposed characters 2024 * with 0049 capital I or 0069 small i fold differently whether they 2025 * are first decomposed or not, so an FCD check - a check only for 2026 * canonical order - is not sufficient. 2027 */ 2028 if((options&INPUT_IS_FCD)==0 || (options&FOLD_CASE_EXCLUDE_SPECIAL_I)!=0) { 2029 Normalizer2 n2; 2030 if((options&FOLD_CASE_EXCLUDE_SPECIAL_I)!=0) { 2031 n2=NFD.getNormalizer2(normOptions); 2032 } else { 2033 n2=FCD.getNormalizer2(normOptions); 2034 } 2035 2036 // check if s1 and/or s2 fulfill the FCD conditions 2037 int spanQCYes1=n2.spanQuickCheckYes(s1); 2038 int spanQCYes2=n2.spanQuickCheckYes(s2); 2039 2040 /* 2041 * ICU 2.4 had a further optimization: 2042 * If both strings were not in FCD, then they were both NFD'ed, 2043 * and the COMPARE_EQUIV option was turned off. 2044 * It is not entirely clear that this is valid with the current 2045 * definition of the canonical caseless match. 2046 * Therefore, ICU 2.6 removes that optimization. 2047 */ 2048 2049 if(spanQCYes1<s1.length()) { 2050 StringBuilder fcd1=new StringBuilder(s1.length()+16).append(s1, 0, spanQCYes1); 2051 s1=n2.normalizeSecondAndAppend(fcd1, s1.subSequence(spanQCYes1, s1.length())); 2052 } 2053 if(spanQCYes2<s2.length()) { 2054 StringBuilder fcd2=new StringBuilder(s2.length()+16).append(s2, 0, spanQCYes2); 2055 s2=n2.normalizeSecondAndAppend(fcd2, s2.subSequence(spanQCYes2, s2.length())); 2056 } 2057 } 2058 2059 return cmpEquivFold(s1, s2, options); 2060 } 2061 2062 /* 2063 * Compare two strings for canonical equivalence. 2064 * Further options include case-insensitive comparison and 2065 * code point order (as opposed to code unit order). 2066 * 2067 * In this function, canonical equivalence is optional as well. 2068 * If canonical equivalence is tested, then both strings must fulfill 2069 * the FCD check. 2070 * 2071 * Semantically, this is equivalent to 2072 * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2))) 2073 * where code point order, NFD and foldCase are all optional. 2074 * 2075 * String comparisons almost always yield results before processing both strings 2076 * completely. 2077 * They are generally more efficient working incrementally instead of 2078 * performing the sub-processing (strlen, normalization, case-folding) 2079 * on the entire strings first. 2080 * 2081 * It is also unnecessary to not normalize identical characters. 2082 * 2083 * This function works in principle as follows: 2084 * 2085 * loop { 2086 * get one code unit c1 from s1 (-1 if end of source) 2087 * get one code unit c2 from s2 (-1 if end of source) 2088 * 2089 * if(either string finished) { 2090 * return result; 2091 * } 2092 * if(c1==c2) { 2093 * continue; 2094 * } 2095 * 2096 * // c1!=c2 2097 * try to decompose/case-fold c1/c2, and continue if one does; 2098 * 2099 * // still c1!=c2 and neither decomposes/case-folds, return result 2100 * return c1-c2; 2101 * } 2102 * 2103 * When a character decomposes, then the pointer for that source changes to 2104 * the decomposition, pushing the previous pointer onto a stack. 2105 * When the end of the decomposition is reached, then the code unit reader 2106 * pops the previous source from the stack. 2107 * (Same for case-folding.) 2108 * 2109 * This is complicated further by operating on variable-width UTF-16. 2110 * The top part of the loop works on code units, while lookups for decomposition 2111 * and case-folding need code points. 2112 * Code points are assembled after the equality/end-of-source part. 2113 * The source pointer is only advanced beyond all code units when the code point 2114 * actually decomposes/case-folds. 2115 * 2116 * If we were on a trail surrogate unit when assembling a code point, 2117 * and the code point decomposes/case-folds, then the decomposition/folding 2118 * result must be compared with the part of the other string that corresponds to 2119 * this string's lead surrogate. 2120 * Since we only assemble a code point when hitting a trail unit when the 2121 * preceding lead units were identical, we back up the other string by one unit 2122 * in such a case. 2123 * 2124 * The optional code point order comparison at the end works with 2125 * the same fix-up as the other code point order comparison functions. 2126 * See ustring.c and the comment near the end of this function. 2127 * 2128 * Assumption: A decomposition or case-folding result string never contains 2129 * a single surrogate. This is a safe assumption in the Unicode Standard. 2130 * Therefore, we do not need to check for surrogate pairs across 2131 * decomposition/case-folding boundaries. 2132 * 2133 * Further assumptions (see verifications tstnorm.cpp): 2134 * The API function checks for FCD first, while the core function 2135 * first case-folds and then decomposes. This requires that case-folding does not 2136 * un-FCD any strings. 2137 * 2138 * The API function may also NFD the input and turn off decomposition. 2139 * This requires that case-folding does not un-NFD strings either. 2140 * 2141 * TODO If any of the above two assumptions is violated, 2142 * then this entire code must be re-thought. 2143 * If this happens, then a simple solution is to case-fold both strings up front 2144 * and to turn off UNORM_INPUT_IS_FCD. 2145 * We already do this when not both strings are in FCD because makeFCD 2146 * would be a partial NFD before the case folding, which does not work. 2147 * Note that all of this is only a problem when case-folding _and_ 2148 * canonical equivalence come together. 2149 * (Comments in unorm_compare() are more up to date than this TODO.) 2150 */ 2151 2152 /* stack element for previous-level source/decomposition pointers */ 2153 private static final class CmpEquivLevel { 2154 CharSequence cs; 2155 int s; 2156 }; 2157 private static final CmpEquivLevel[] createCmpEquivLevelStack() { 2158 return new CmpEquivLevel[] { 2159 new CmpEquivLevel(), new CmpEquivLevel() 2160 }; 2161 } 2162 2163 /** 2164 * Internal option for unorm_cmpEquivFold() for decomposing. 2165 * If not set, just do strcasecmp(). 2166 */ 2167 private static final int COMPARE_EQUIV=0x80000; 2168 2169 /* internal function; package visibility for use by UTF16.StringComparator */ 2170 /*package*/ static int cmpEquivFold(CharSequence cs1, CharSequence cs2, int options) { 2171 Normalizer2Impl nfcImpl; 2172 UCaseProps csp; 2173 2174 /* current-level start/limit - s1/s2 as current */ 2175 int s1, s2, limit1, limit2; 2176 2177 /* decomposition and case folding variables */ 2178 int length; 2179 2180 /* stacks of previous-level start/current/limit */ 2181 CmpEquivLevel[] stack1=null, stack2=null; 2182 2183 /* buffers for algorithmic decompositions */ 2184 String decomp1, decomp2; 2185 2186 /* case folding buffers, only use current-level start/limit */ 2187 StringBuilder fold1, fold2; 2188 2189 /* track which is the current level per string */ 2190 int level1, level2; 2191 2192 /* current code units, and code points for lookups */ 2193 int c1, c2, cp1, cp2; 2194 2195 /* no argument error checking because this itself is not an API */ 2196 2197 /* 2198 * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set 2199 * otherwise this function must behave exactly as uprv_strCompare() 2200 * not checking for that here makes testing this function easier 2201 */ 2202 2203 /* normalization/properties data loaded? */ 2204 if((options&COMPARE_EQUIV)!=0) { 2205 nfcImpl=Norm2AllModes.getNFCInstance().impl; 2206 } else { 2207 nfcImpl=null; 2208 } 2209 if((options&COMPARE_IGNORE_CASE)!=0) { 2210 csp=UCaseProps.INSTANCE; 2211 fold1=new StringBuilder(); 2212 fold2=new StringBuilder(); 2213 } else { 2214 csp=null; 2215 fold1=fold2=null; 2216 } 2217 2218 /* initialize */ 2219 s1=0; 2220 limit1=cs1.length(); 2221 s2=0; 2222 limit2=cs2.length(); 2223 2224 level1=level2=0; 2225 c1=c2=-1; 2226 2227 /* comparison loop */ 2228 for(;;) { 2229 /* 2230 * here a code unit value of -1 means "get another code unit" 2231 * below it will mean "this source is finished" 2232 */ 2233 2234 if(c1<0) { 2235 /* get next code unit from string 1, post-increment */ 2236 for(;;) { 2237 if(s1==limit1) { 2238 if(level1==0) { 2239 c1=-1; 2240 break; 2241 } 2242 } else { 2243 c1=cs1.charAt(s1++); 2244 break; 2245 } 2246 2247 /* reached end of level buffer, pop one level */ 2248 do { 2249 --level1; 2250 cs1=stack1[level1].cs; 2251 } while(cs1==null); 2252 s1=stack1[level1].s; 2253 limit1=cs1.length(); 2254 } 2255 } 2256 2257 if(c2<0) { 2258 /* get next code unit from string 2, post-increment */ 2259 for(;;) { 2260 if(s2==limit2) { 2261 if(level2==0) { 2262 c2=-1; 2263 break; 2264 } 2265 } else { 2266 c2=cs2.charAt(s2++); 2267 break; 2268 } 2269 2270 /* reached end of level buffer, pop one level */ 2271 do { 2272 --level2; 2273 cs2=stack2[level2].cs; 2274 } while(cs2==null); 2275 s2=stack2[level2].s; 2276 limit2=cs2.length(); 2277 } 2278 } 2279 2280 /* 2281 * compare c1 and c2 2282 * either variable c1, c2 is -1 only if the corresponding string is finished 2283 */ 2284 if(c1==c2) { 2285 if(c1<0) { 2286 return 0; /* c1==c2==-1 indicating end of strings */ 2287 } 2288 c1=c2=-1; /* make us fetch new code units */ 2289 continue; 2290 } else if(c1<0) { 2291 return -1; /* string 1 ends before string 2 */ 2292 } else if(c2<0) { 2293 return 1; /* string 2 ends before string 1 */ 2294 } 2295 /* c1!=c2 && c1>=0 && c2>=0 */ 2296 2297 /* get complete code points for c1, c2 for lookups if either is a surrogate */ 2298 cp1=c1; 2299 if(UTF16.isSurrogate((char)c1)) { 2300 char c; 2301 2302 if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) { 2303 if(s1!=limit1 && Character.isLowSurrogate(c=cs1.charAt(s1))) { 2304 /* advance ++s1; only below if cp1 decomposes/case-folds */ 2305 cp1=Character.toCodePoint((char)c1, c); 2306 } 2307 } else /* isTrail(c1) */ { 2308 if(0<=(s1-2) && Character.isHighSurrogate(c=cs1.charAt(s1-2))) { 2309 cp1=Character.toCodePoint(c, (char)c1); 2310 } 2311 } 2312 } 2313 2314 cp2=c2; 2315 if(UTF16.isSurrogate((char)c2)) { 2316 char c; 2317 2318 if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) { 2319 if(s2!=limit2 && Character.isLowSurrogate(c=cs2.charAt(s2))) { 2320 /* advance ++s2; only below if cp2 decomposes/case-folds */ 2321 cp2=Character.toCodePoint((char)c2, c); 2322 } 2323 } else /* isTrail(c2) */ { 2324 if(0<=(s2-2) && Character.isHighSurrogate(c=cs2.charAt(s2-2))) { 2325 cp2=Character.toCodePoint(c, (char)c2); 2326 } 2327 } 2328 } 2329 2330 /* 2331 * go down one level for each string 2332 * continue with the main loop as soon as there is a real change 2333 */ 2334 2335 if( level1==0 && (options&COMPARE_IGNORE_CASE)!=0 && 2336 (length=csp.toFullFolding(cp1, fold1, options))>=0 2337 ) { 2338 /* cp1 case-folds to the code point "length" or to p[length] */ 2339 if(UTF16.isSurrogate((char)c1)) { 2340 if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) { 2341 /* advance beyond source surrogate pair if it case-folds */ 2342 ++s1; 2343 } else /* isTrail(c1) */ { 2344 /* 2345 * we got a supplementary code point when hitting its trail surrogate, 2346 * therefore the lead surrogate must have been the same as in the other string; 2347 * compare this decomposition with the lead surrogate in the other string 2348 * remember that this simulates bulk text replacement: 2349 * the decomposition would replace the entire code point 2350 */ 2351 --s2; 2352 c2=cs2.charAt(s2-1); 2353 } 2354 } 2355 2356 /* push current level pointers */ 2357 if(stack1==null) { 2358 stack1=createCmpEquivLevelStack(); 2359 } 2360 stack1[0].cs=cs1; 2361 stack1[0].s=s1; 2362 ++level1; 2363 2364 /* copy the folding result to fold1[] */ 2365 /* Java: the buffer was probably not empty, remove the old contents */ 2366 if(length<=UCaseProps.MAX_STRING_LENGTH) { 2367 fold1.delete(0, fold1.length()-length); 2368 } else { 2369 fold1.setLength(0); 2370 fold1.appendCodePoint(length); 2371 } 2372 2373 /* set next level pointers to case folding */ 2374 cs1=fold1; 2375 s1=0; 2376 limit1=fold1.length(); 2377 2378 /* get ready to read from decomposition, continue with loop */ 2379 c1=-1; 2380 continue; 2381 } 2382 2383 if( level2==0 && (options&COMPARE_IGNORE_CASE)!=0 && 2384 (length=csp.toFullFolding(cp2, fold2, options))>=0 2385 ) { 2386 /* cp2 case-folds to the code point "length" or to p[length] */ 2387 if(UTF16.isSurrogate((char)c2)) { 2388 if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) { 2389 /* advance beyond source surrogate pair if it case-folds */ 2390 ++s2; 2391 } else /* isTrail(c2) */ { 2392 /* 2393 * we got a supplementary code point when hitting its trail surrogate, 2394 * therefore the lead surrogate must have been the same as in the other string; 2395 * compare this decomposition with the lead surrogate in the other string 2396 * remember that this simulates bulk text replacement: 2397 * the decomposition would replace the entire code point 2398 */ 2399 --s1; 2400 c1=cs1.charAt(s1-1); 2401 } 2402 } 2403 2404 /* push current level pointers */ 2405 if(stack2==null) { 2406 stack2=createCmpEquivLevelStack(); 2407 } 2408 stack2[0].cs=cs2; 2409 stack2[0].s=s2; 2410 ++level2; 2411 2412 /* copy the folding result to fold2[] */ 2413 /* Java: the buffer was probably not empty, remove the old contents */ 2414 if(length<=UCaseProps.MAX_STRING_LENGTH) { 2415 fold2.delete(0, fold2.length()-length); 2416 } else { 2417 fold2.setLength(0); 2418 fold2.appendCodePoint(length); 2419 } 2420 2421 /* set next level pointers to case folding */ 2422 cs2=fold2; 2423 s2=0; 2424 limit2=fold2.length(); 2425 2426 /* get ready to read from decomposition, continue with loop */ 2427 c2=-1; 2428 continue; 2429 } 2430 2431 if( level1<2 && (options&COMPARE_EQUIV)!=0 && 2432 (decomp1=nfcImpl.getDecomposition(cp1))!=null 2433 ) { 2434 /* cp1 decomposes into p[length] */ 2435 if(UTF16.isSurrogate((char)c1)) { 2436 if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) { 2437 /* advance beyond source surrogate pair if it decomposes */ 2438 ++s1; 2439 } else /* isTrail(c1) */ { 2440 /* 2441 * we got a supplementary code point when hitting its trail surrogate, 2442 * therefore the lead surrogate must have been the same as in the other string; 2443 * compare this decomposition with the lead surrogate in the other string 2444 * remember that this simulates bulk text replacement: 2445 * the decomposition would replace the entire code point 2446 */ 2447 --s2; 2448 c2=cs2.charAt(s2-1); 2449 } 2450 } 2451 2452 /* push current level pointers */ 2453 if(stack1==null) { 2454 stack1=createCmpEquivLevelStack(); 2455 } 2456 stack1[level1].cs=cs1; 2457 stack1[level1].s=s1; 2458 ++level1; 2459 2460 /* set empty intermediate level if skipped */ 2461 if(level1<2) { 2462 stack1[level1++].cs=null; 2463 } 2464 2465 /* set next level pointers to decomposition */ 2466 cs1=decomp1; 2467 s1=0; 2468 limit1=decomp1.length(); 2469 2470 /* get ready to read from decomposition, continue with loop */ 2471 c1=-1; 2472 continue; 2473 } 2474 2475 if( level2<2 && (options&COMPARE_EQUIV)!=0 && 2476 (decomp2=nfcImpl.getDecomposition(cp2))!=null 2477 ) { 2478 /* cp2 decomposes into p[length] */ 2479 if(UTF16.isSurrogate((char)c2)) { 2480 if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) { 2481 /* advance beyond source surrogate pair if it decomposes */ 2482 ++s2; 2483 } else /* isTrail(c2) */ { 2484 /* 2485 * we got a supplementary code point when hitting its trail surrogate, 2486 * therefore the lead surrogate must have been the same as in the other string; 2487 * compare this decomposition with the lead surrogate in the other string 2488 * remember that this simulates bulk text replacement: 2489 * the decomposition would replace the entire code point 2490 */ 2491 --s1; 2492 c1=cs1.charAt(s1-1); 2493 } 2494 } 2495 2496 /* push current level pointers */ 2497 if(stack2==null) { 2498 stack2=createCmpEquivLevelStack(); 2499 } 2500 stack2[level2].cs=cs2; 2501 stack2[level2].s=s2; 2502 ++level2; 2503 2504 /* set empty intermediate level if skipped */ 2505 if(level2<2) { 2506 stack2[level2++].cs=null; 2507 } 2508 2509 /* set next level pointers to decomposition */ 2510 cs2=decomp2; 2511 s2=0; 2512 limit2=decomp2.length(); 2513 2514 /* get ready to read from decomposition, continue with loop */ 2515 c2=-1; 2516 continue; 2517 } 2518 2519 /* 2520 * no decomposition/case folding, max level for both sides: 2521 * return difference result 2522 * 2523 * code point order comparison must not just return cp1-cp2 2524 * because when single surrogates are present then the surrogate pairs 2525 * that formed cp1 and cp2 may be from different string indexes 2526 * 2527 * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units 2528 * c1=d800 cp1=10001 c2=dc00 cp2=10000 2529 * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } 2530 * 2531 * therefore, use same fix-up as in ustring.c/uprv_strCompare() 2532 * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ 2533 * so we have slightly different pointer/start/limit comparisons here 2534 */ 2535 2536 if(c1>=0xd800 && c2>=0xd800 && (options&COMPARE_CODE_POINT_ORDER)!=0) { 2537 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 2538 if( 2539 (c1<=0xdbff && s1!=limit1 && Character.isLowSurrogate(cs1.charAt(s1))) || 2540 (Character.isLowSurrogate((char)c1) && 0!=(s1-1) && Character.isHighSurrogate(cs1.charAt(s1-2))) 2541 ) { 2542 /* part of a surrogate pair, leave >=d800 */ 2543 } else { 2544 /* BMP code point - may be surrogate code point - make <d800 */ 2545 c1-=0x2800; 2546 } 2547 2548 if( 2549 (c2<=0xdbff && s2!=limit2 && Character.isLowSurrogate(cs2.charAt(s2))) || 2550 (Character.isLowSurrogate((char)c2) && 0!=(s2-1) && Character.isHighSurrogate(cs2.charAt(s2-2))) 2551 ) { 2552 /* part of a surrogate pair, leave >=d800 */ 2553 } else { 2554 /* BMP code point - may be surrogate code point - make <d800 */ 2555 c2-=0x2800; 2556 } 2557 } 2558 2559 return c1-c2; 2560 } 2561 } 2562 2563 /** 2564 * An Appendable that writes into a char array with a capacity that may be 2565 * less than array.length. 2566 * (By contrast, CharBuffer will write beyond destLimit all the way up to array.length.) 2567 * <p> 2568 * An overflow is only reported at the end, for the old Normalizer API functions that write 2569 * to char arrays. 2570 */ 2571 private static final class CharsAppendable implements Appendable { 2572 public CharsAppendable(char[] dest, int destStart, int destLimit) { 2573 chars=dest; 2574 start=offset=destStart; 2575 limit=destLimit; 2576 } 2577 public int length() { 2578 int len=offset-start; 2579 if(offset<=limit) { 2580 return len; 2581 } else { 2582 throw new IndexOutOfBoundsException(Integer.toString(len)); 2583 } 2584 } 2585 @Override 2586 public Appendable append(char c) { 2587 if(offset<limit) { 2588 chars[offset]=c; 2589 } 2590 ++offset; 2591 return this; 2592 } 2593 @Override 2594 public Appendable append(CharSequence s) { 2595 return append(s, 0, s.length()); 2596 } 2597 @Override 2598 public Appendable append(CharSequence s, int sStart, int sLimit) { 2599 int len=sLimit-sStart; 2600 if(len<=(limit-offset)) { 2601 while(sStart<sLimit) { // TODO: Is there a better way to copy the characters? 2602 chars[offset++]=s.charAt(sStart++); 2603 } 2604 } else { 2605 offset+=len; 2606 } 2607 return this; 2608 } 2609 2610 private final char[] chars; 2611 private final int start, limit; 2612 private int offset; 2613 } 2614 } 2615