1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package android.icu.lang; 12 13 import java.lang.ref.SoftReference; 14 import java.util.HashMap; 15 import java.util.Iterator; 16 import java.util.Locale; 17 import java.util.Map; 18 19 import android.icu.impl.CaseMapImpl; 20 import android.icu.impl.IllegalIcuArgumentException; 21 import android.icu.impl.Trie2; 22 import android.icu.impl.UBiDiProps; 23 import android.icu.impl.UCaseProps; 24 import android.icu.impl.UCharacterName; 25 import android.icu.impl.UCharacterNameChoice; 26 import android.icu.impl.UCharacterProperty; 27 import android.icu.impl.UCharacterUtility; 28 import android.icu.impl.UPropertyAliases; 29 import android.icu.lang.UCharacterEnums.ECharacterCategory; 30 import android.icu.lang.UCharacterEnums.ECharacterDirection; 31 import android.icu.text.BreakIterator; 32 import android.icu.text.Normalizer2; 33 import android.icu.util.RangeValueIterator; 34 import android.icu.util.ULocale; 35 import android.icu.util.ValueIterator; 36 import android.icu.util.VersionInfo; 37 38 /** 39 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 40 * 41 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 42 * These extensions provide support for more Unicode properties. 43 * Each ICU release supports the latest version of Unicode available at that time. 44 * 45 * <p>For some time before Java 5 added support for supplementary Unicode code points, 46 * The ICU UCharacter class and many other ICU classes already supported them. 47 * Some UCharacter methods and constants were widened slightly differently than 48 * how the Character class methods and constants were widened later. 49 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 50 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 51 * 52 * <p>Code points are represented in these API using ints. While it would be 53 * more convenient in Java to have a separate primitive datatype for them, 54 * ints suffice in the meantime. 55 * 56 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 57 * properties, the main differences between UCharacter and Character are: 58 * <ul> 59 * <li> UCharacter is not designed to be a char wrapper and does not have 60 * APIs to which involves management of that single char.<br> 61 * These include: 62 * <ul> 63 * <li> char charValue(), 64 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 65 * </ul> 66 * <li> UCharacter does not include Character APIs that are deprecated, nor 67 * does it include the Java-specific character information, such as 68 * boolean isJavaIdentifierPart(char ch). 69 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 70 * values '10' - '35'. UCharacter also does this in digit and 71 * getNumericValue, to adhere to the java semantics of these 72 * methods. New methods unicodeDigit, and 73 * getUnicodeNumericValue do not treat the above code points 74 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 75 * </ul> 76 * <p> 77 * In addition to Java compatibility functions, which calculate derived properties, 78 * this API provides low-level access to the Unicode Character Database. 79 * <p> 80 * Unicode assigns each code point (not just assigned character) values for 81 * many properties. 82 * Most of them are simple boolean flags, or constants from a small enumerated list. 83 * For some properties, values are strings or other relatively more complex types. 84 * <p> 85 * For more information see 86 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 87 * (http://www.unicode.org/ucd/) 88 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU 89 * User Guide chapter on Properties</a> 90 * (http://www.icu-project.org/userguide/properties.html). 91 * <p> 92 * There are also functions that provide easy migration from C/POSIX functions 93 * like isblank(). Their use is generally discouraged because the C/POSIX 94 * standards do not define their semantics beyond the ASCII range, which means 95 * that different implementations exhibit very different behavior. 96 * Instead, Unicode properties should be used directly. 97 * <p> 98 * There are also only a few, broad C/POSIX character classes, and they tend 99 * to be used for conflicting purposes. For example, the "isalpha()" class 100 * is sometimes used to determine word boundaries, while a more sophisticated 101 * approach would at least distinguish initial letters from continuation 102 * characters (the latter including combining marks). 103 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 104 * Another example: There is no "istitle()" class for titlecase characters. 105 * <p> 106 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 107 * ICU implements them according to the Standard Recommendations in 108 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 109 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 110 * <p> 111 * API access for C/POSIX character classes is as follows: 112 * <pre>{@code 113 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 114 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 115 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 116 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 117 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 118 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 119 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 120 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 121 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 122 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 123 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 124 * - cntrl: getType(c)==CONTROL 125 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 126 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 127 * <p> 128 * The C/POSIX character classes are also available in UnicodeSet patterns, 129 * using patterns like [:graph:] or \p{graph}. 130 * 131 * <p><strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions. 132 * Comparison:<ul> 133 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 134 * most of general categories "Z" (separators) + most whitespace ISO controls 135 * (including no-break spaces, but excluding IS1..IS4 and ZWSP) 136 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 137 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 138 * 139 * <p> 140 * This class is not subclassable. 141 * 142 * @author Syn Wee Quek 143 * @see android.icu.lang.UCharacterEnums 144 */ 145 146 public final class UCharacter implements ECharacterCategory, ECharacterDirection 147 { 148 // public inner classes ---------------------------------------------- 149 150 /** 151 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 152 * 153 * A family of character subsets representing the character blocks in the 154 * Unicode specification, generated from Unicode Data file Blocks.txt. 155 * Character blocks generally define characters used for a specific script 156 * or purpose. A character is contained by at most one Unicode block. 157 * 158 * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU. 159 */ 160 public static final class UnicodeBlock extends Character.Subset 161 { 162 // block id corresponding to icu4c ----------------------------------- 163 164 /** 165 */ 166 public static final int INVALID_CODE_ID = -1; 167 /** 168 */ 169 public static final int BASIC_LATIN_ID = 1; 170 /** 171 */ 172 public static final int LATIN_1_SUPPLEMENT_ID = 2; 173 /** 174 */ 175 public static final int LATIN_EXTENDED_A_ID = 3; 176 /** 177 */ 178 public static final int LATIN_EXTENDED_B_ID = 4; 179 /** 180 */ 181 public static final int IPA_EXTENSIONS_ID = 5; 182 /** 183 */ 184 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 185 /** 186 */ 187 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 188 /** 189 * Unicode 3.2 renames this block to "Greek and Coptic". 190 */ 191 public static final int GREEK_ID = 8; 192 /** 193 */ 194 public static final int CYRILLIC_ID = 9; 195 /** 196 */ 197 public static final int ARMENIAN_ID = 10; 198 /** 199 */ 200 public static final int HEBREW_ID = 11; 201 /** 202 */ 203 public static final int ARABIC_ID = 12; 204 /** 205 */ 206 public static final int SYRIAC_ID = 13; 207 /** 208 */ 209 public static final int THAANA_ID = 14; 210 /** 211 */ 212 public static final int DEVANAGARI_ID = 15; 213 /** 214 */ 215 public static final int BENGALI_ID = 16; 216 /** 217 */ 218 public static final int GURMUKHI_ID = 17; 219 /** 220 */ 221 public static final int GUJARATI_ID = 18; 222 /** 223 */ 224 public static final int ORIYA_ID = 19; 225 /** 226 */ 227 public static final int TAMIL_ID = 20; 228 /** 229 */ 230 public static final int TELUGU_ID = 21; 231 /** 232 */ 233 public static final int KANNADA_ID = 22; 234 /** 235 */ 236 public static final int MALAYALAM_ID = 23; 237 /** 238 */ 239 public static final int SINHALA_ID = 24; 240 /** 241 */ 242 public static final int THAI_ID = 25; 243 /** 244 */ 245 public static final int LAO_ID = 26; 246 /** 247 */ 248 public static final int TIBETAN_ID = 27; 249 /** 250 */ 251 public static final int MYANMAR_ID = 28; 252 /** 253 */ 254 public static final int GEORGIAN_ID = 29; 255 /** 256 */ 257 public static final int HANGUL_JAMO_ID = 30; 258 /** 259 */ 260 public static final int ETHIOPIC_ID = 31; 261 /** 262 */ 263 public static final int CHEROKEE_ID = 32; 264 /** 265 */ 266 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 267 /** 268 */ 269 public static final int OGHAM_ID = 34; 270 /** 271 */ 272 public static final int RUNIC_ID = 35; 273 /** 274 */ 275 public static final int KHMER_ID = 36; 276 /** 277 */ 278 public static final int MONGOLIAN_ID = 37; 279 /** 280 */ 281 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 282 /** 283 */ 284 public static final int GREEK_EXTENDED_ID = 39; 285 /** 286 */ 287 public static final int GENERAL_PUNCTUATION_ID = 40; 288 /** 289 */ 290 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 291 /** 292 */ 293 public static final int CURRENCY_SYMBOLS_ID = 42; 294 /** 295 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 296 * Symbols". 297 */ 298 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 299 /** 300 */ 301 public static final int LETTERLIKE_SYMBOLS_ID = 44; 302 /** 303 */ 304 public static final int NUMBER_FORMS_ID = 45; 305 /** 306 */ 307 public static final int ARROWS_ID = 46; 308 /** 309 */ 310 public static final int MATHEMATICAL_OPERATORS_ID = 47; 311 /** 312 */ 313 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 314 /** 315 */ 316 public static final int CONTROL_PICTURES_ID = 49; 317 /** 318 */ 319 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 320 /** 321 */ 322 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 323 /** 324 */ 325 public static final int BOX_DRAWING_ID = 52; 326 /** 327 */ 328 public static final int BLOCK_ELEMENTS_ID = 53; 329 /** 330 */ 331 public static final int GEOMETRIC_SHAPES_ID = 54; 332 /** 333 */ 334 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 335 /** 336 */ 337 public static final int DINGBATS_ID = 56; 338 /** 339 */ 340 public static final int BRAILLE_PATTERNS_ID = 57; 341 /** 342 */ 343 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 344 /** 345 */ 346 public static final int KANGXI_RADICALS_ID = 59; 347 /** 348 */ 349 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 350 /** 351 */ 352 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 353 /** 354 */ 355 public static final int HIRAGANA_ID = 62; 356 /** 357 */ 358 public static final int KATAKANA_ID = 63; 359 /** 360 */ 361 public static final int BOPOMOFO_ID = 64; 362 /** 363 */ 364 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 365 /** 366 */ 367 public static final int KANBUN_ID = 66; 368 /** 369 */ 370 public static final int BOPOMOFO_EXTENDED_ID = 67; 371 /** 372 */ 373 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 374 /** 375 */ 376 public static final int CJK_COMPATIBILITY_ID = 69; 377 /** 378 */ 379 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 380 /** 381 */ 382 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 383 /** 384 */ 385 public static final int YI_SYLLABLES_ID = 72; 386 /** 387 */ 388 public static final int YI_RADICALS_ID = 73; 389 /** 390 */ 391 public static final int HANGUL_SYLLABLES_ID = 74; 392 /** 393 */ 394 public static final int HIGH_SURROGATES_ID = 75; 395 /** 396 */ 397 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 398 /** 399 */ 400 public static final int LOW_SURROGATES_ID = 77; 401 /** 402 * Same as public static final int PRIVATE_USE. 403 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 404 * and multiple code point ranges had this block. 405 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 406 * and adds separate blocks for the supplementary PUAs. 407 */ 408 public static final int PRIVATE_USE_AREA_ID = 78; 409 /** 410 * Same as public static final int PRIVATE_USE_AREA. 411 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 412 * and multiple code point ranges had this block. 413 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 414 * and adds separate blocks for the supplementary PUAs. 415 */ 416 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 417 /** 418 */ 419 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 420 /** 421 */ 422 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 423 /** 424 */ 425 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 426 /** 427 */ 428 public static final int COMBINING_HALF_MARKS_ID = 82; 429 /** 430 */ 431 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 432 /** 433 */ 434 public static final int SMALL_FORM_VARIANTS_ID = 84; 435 /** 436 */ 437 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 438 /** 439 */ 440 public static final int SPECIALS_ID = 86; 441 /** 442 */ 443 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 444 /** 445 */ 446 public static final int OLD_ITALIC_ID = 88; 447 /** 448 */ 449 public static final int GOTHIC_ID = 89; 450 /** 451 */ 452 public static final int DESERET_ID = 90; 453 /** 454 */ 455 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 456 /** 457 */ 458 public static final int MUSICAL_SYMBOLS_ID = 92; 459 /** 460 */ 461 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 462 /** 463 */ 464 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 465 /** 466 */ 467 public static final int 468 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 469 /** 470 */ 471 public static final int TAGS_ID = 96; 472 473 // New blocks in Unicode 3.2 474 475 /** 476 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 477 */ 478 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 479 /** 480 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 481 */ 482 483 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 484 /** 485 */ 486 public static final int TAGALOG_ID = 98; 487 /** 488 */ 489 public static final int HANUNOO_ID = 99; 490 /** 491 */ 492 public static final int BUHID_ID = 100; 493 /** 494 */ 495 public static final int TAGBANWA_ID = 101; 496 /** 497 */ 498 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 499 /** 500 */ 501 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 502 /** 503 */ 504 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 505 /** 506 */ 507 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 508 /** 509 */ 510 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 511 /** 512 */ 513 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 514 /** 515 */ 516 public static final int VARIATION_SELECTORS_ID = 108; 517 /** 518 */ 519 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 520 /** 521 */ 522 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 523 524 /** 525 */ 526 public static final int LIMBU_ID = 111; /*[1900]*/ 527 /** 528 */ 529 public static final int TAI_LE_ID = 112; /*[1950]*/ 530 /** 531 */ 532 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 533 /** 534 */ 535 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 536 /** 537 */ 538 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 539 /** 540 */ 541 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 542 /** 543 */ 544 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 545 /** 546 */ 547 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 548 /** 549 */ 550 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 551 /** 552 */ 553 public static final int UGARITIC_ID = 120; /*[10380]*/ 554 /** 555 */ 556 public static final int SHAVIAN_ID = 121; /*[10450]*/ 557 /** 558 */ 559 public static final int OSMANYA_ID = 122; /*[10480]*/ 560 /** 561 */ 562 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 563 /** 564 */ 565 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 566 /** 567 */ 568 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 569 570 /* New blocks in Unicode 4.1 */ 571 572 /** 573 */ 574 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 575 576 /** 577 */ 578 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 579 580 /** 581 */ 582 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 583 584 /** 585 */ 586 public static final int BUGINESE_ID = 129; /*[1A00]*/ 587 588 /** 589 */ 590 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 591 592 /** 593 */ 594 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 595 596 /** 597 */ 598 public static final int COPTIC_ID = 132; /*[2C80]*/ 599 600 /** 601 */ 602 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 603 604 /** 605 */ 606 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 607 608 /** 609 */ 610 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 611 612 /** 613 */ 614 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 615 616 /** 617 */ 618 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 619 620 /** 621 */ 622 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 623 624 /** 625 */ 626 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 627 628 /** 629 */ 630 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 631 632 /** 633 */ 634 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 635 636 /** 637 */ 638 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 639 640 /** 641 */ 642 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 643 644 /** 645 */ 646 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 647 648 /** 649 */ 650 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 651 652 /* New blocks in Unicode 5.0 */ 653 654 /** 655 */ 656 public static final int NKO_ID = 146; /*[07C0]*/ 657 /** 658 */ 659 public static final int BALINESE_ID = 147; /*[1B00]*/ 660 /** 661 */ 662 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 663 /** 664 */ 665 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 666 /** 667 */ 668 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 669 /** 670 */ 671 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 672 /** 673 */ 674 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 675 /** 676 */ 677 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 678 /** 679 */ 680 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 681 682 /** 683 */ 684 public static final int SUNDANESE_ID = 155; /* [1B80] */ 685 686 /** 687 */ 688 public static final int LEPCHA_ID = 156; /* [1C00] */ 689 690 /** 691 */ 692 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 693 694 /** 695 */ 696 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 697 698 /** 699 */ 700 public static final int VAI_ID = 159; /* [A500] */ 701 702 /** 703 */ 704 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 705 706 /** 707 */ 708 public static final int SAURASHTRA_ID = 161; /* [A880] */ 709 710 /** 711 */ 712 public static final int KAYAH_LI_ID = 162; /* [A900] */ 713 714 /** 715 */ 716 public static final int REJANG_ID = 163; /* [A930] */ 717 718 /** 719 */ 720 public static final int CHAM_ID = 164; /* [AA00] */ 721 722 /** 723 */ 724 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 725 726 /** 727 */ 728 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 729 730 /** 731 */ 732 public static final int LYCIAN_ID = 167; /* [10280] */ 733 734 /** 735 */ 736 public static final int CARIAN_ID = 168; /* [102A0] */ 737 738 /** 739 */ 740 public static final int LYDIAN_ID = 169; /* [10920] */ 741 742 /** 743 */ 744 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 745 746 /** 747 */ 748 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 749 750 /* New blocks in Unicode 5.2 */ 751 752 /***/ 753 public static final int SAMARITAN_ID = 172; /*[0800]*/ 754 /***/ 755 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 756 /***/ 757 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 758 /***/ 759 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 760 /***/ 761 public static final int LISU_ID = 176; /*[A4D0]*/ 762 /***/ 763 public static final int BAMUM_ID = 177; /*[A6A0]*/ 764 /***/ 765 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 766 /***/ 767 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 768 /***/ 769 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 770 /***/ 771 public static final int JAVANESE_ID = 181; /*[A980]*/ 772 /***/ 773 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 774 /***/ 775 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 776 /***/ 777 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 778 /***/ 779 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 780 /***/ 781 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 782 /***/ 783 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 784 /***/ 785 public static final int AVESTAN_ID = 188; /*[10B00]*/ 786 /***/ 787 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 788 /***/ 789 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 790 /***/ 791 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 792 /***/ 793 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 794 /***/ 795 public static final int KAITHI_ID = 193; /*[11080]*/ 796 /***/ 797 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 798 /***/ 799 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 800 /***/ 801 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 802 /***/ 803 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 804 805 /* New blocks in Unicode 6.0 */ 806 807 /***/ 808 public static final int MANDAIC_ID = 198; /*[0840]*/ 809 /***/ 810 public static final int BATAK_ID = 199; /*[1BC0]*/ 811 /***/ 812 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 813 /***/ 814 public static final int BRAHMI_ID = 201; /*[11000]*/ 815 /***/ 816 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 817 /***/ 818 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 819 /***/ 820 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 821 /***/ 822 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 823 /***/ 824 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 825 /***/ 826 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 827 /***/ 828 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 829 /***/ 830 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 831 832 /* New blocks in Unicode 6.1 */ 833 834 /***/ 835 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 836 /***/ 837 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 838 /***/ 839 public static final int CHAKMA_ID = 212; /*[11100]*/ 840 /***/ 841 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 842 /***/ 843 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 844 /***/ 845 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 846 /***/ 847 public static final int MIAO_ID = 216; /*[16F00]*/ 848 /***/ 849 public static final int SHARADA_ID = 217; /*[11180]*/ 850 /***/ 851 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 852 /***/ 853 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 854 /***/ 855 public static final int TAKRI_ID = 220; /*[11680]*/ 856 857 /* New blocks in Unicode 7.0 */ 858 859 /***/ 860 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 861 /***/ 862 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 863 /***/ 864 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 865 /***/ 866 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 867 /***/ 868 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 869 /***/ 870 public static final int ELBASAN_ID = 226; /*[10500]*/ 871 /***/ 872 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 873 /***/ 874 public static final int GRANTHA_ID = 228; /*[11300]*/ 875 /***/ 876 public static final int KHOJKI_ID = 229; /*[11200]*/ 877 /***/ 878 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 879 /***/ 880 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 881 /***/ 882 public static final int LINEAR_A_ID = 232; /*[10600]*/ 883 /***/ 884 public static final int MAHAJANI_ID = 233; /*[11150]*/ 885 /***/ 886 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 887 /***/ 888 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 889 /***/ 890 public static final int MODI_ID = 236; /*[11600]*/ 891 /***/ 892 public static final int MRO_ID = 237; /*[16A40]*/ 893 /***/ 894 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 895 /***/ 896 public static final int NABATAEAN_ID = 239; /*[10880]*/ 897 /***/ 898 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 899 /***/ 900 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 901 /***/ 902 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 903 /***/ 904 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 905 /***/ 906 public static final int PALMYRENE_ID = 244; /*[10860]*/ 907 /***/ 908 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 909 /***/ 910 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 911 /***/ 912 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 913 /***/ 914 public static final int SIDDHAM_ID = 248; /*[11580]*/ 915 /***/ 916 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 917 /***/ 918 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 919 /***/ 920 public static final int TIRHUTA_ID = 251; /*[11480]*/ 921 /***/ 922 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 923 924 /* New blocks in Unicode 8.0 */ 925 926 /***/ 927 public static final int AHOM_ID = 253; /*[11700]*/ 928 /***/ 929 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 930 /***/ 931 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 932 /***/ 933 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 934 /***/ 935 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 936 /***/ 937 public static final int HATRAN_ID = 258; /*[108E0]*/ 938 /***/ 939 public static final int MULTANI_ID = 259; /*[11280]*/ 940 /***/ 941 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 942 /***/ 943 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 944 /***/ 945 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 946 947 /* New blocks in Unicode 9.0 */ 948 949 /***/ 950 public static final int ADLAM_ID = 263; /*[1E900]*/ 951 /***/ 952 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 953 /***/ 954 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 955 /***/ 956 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 957 /***/ 958 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 959 /***/ 960 public static final int MARCHEN_ID = 268; /*[11C70]*/ 961 /***/ 962 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 963 /***/ 964 public static final int NEWA_ID = 270; /*[11400]*/ 965 /***/ 966 public static final int OSAGE_ID = 271; /*[104B0]*/ 967 /***/ 968 public static final int TANGUT_ID = 272; /*[17000]*/ 969 /***/ 970 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 971 972 // New blocks in Unicode 10.0 973 974 /***/ 975 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/ 976 /***/ 977 public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/ 978 /***/ 979 public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/ 980 /***/ 981 public static final int NUSHU_ID = 277; /*[1B170]*/ 982 /***/ 983 public static final int SOYOMBO_ID = 278; /*[11A50]*/ 984 /***/ 985 public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/ 986 /***/ 987 public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/ 988 989 /** 990 * One more than the highest normal UnicodeBlock value. 991 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 992 * 993 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 994 * @hide unsupported on Android 995 */ 996 @Deprecated 997 public static final int COUNT = 281; 998 999 // blocks objects --------------------------------------------------- 1000 1001 /** 1002 * Array of UnicodeBlocks, for easy access in getInstance(int) 1003 */ 1004 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1005 1006 /** 1007 */ 1008 public static final UnicodeBlock NO_BLOCK 1009 = new UnicodeBlock("NO_BLOCK", 0); 1010 1011 /** 1012 */ 1013 public static final UnicodeBlock BASIC_LATIN 1014 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1015 /** 1016 */ 1017 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1018 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1019 /** 1020 */ 1021 public static final UnicodeBlock LATIN_EXTENDED_A 1022 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1023 /** 1024 */ 1025 public static final UnicodeBlock LATIN_EXTENDED_B 1026 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1027 /** 1028 */ 1029 public static final UnicodeBlock IPA_EXTENSIONS 1030 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1031 /** 1032 */ 1033 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1034 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1035 /** 1036 */ 1037 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1038 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1039 /** 1040 * Unicode 3.2 renames this block to "Greek and Coptic". 1041 */ 1042 public static final UnicodeBlock GREEK 1043 = new UnicodeBlock("GREEK", GREEK_ID); 1044 /** 1045 */ 1046 public static final UnicodeBlock CYRILLIC 1047 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1048 /** 1049 */ 1050 public static final UnicodeBlock ARMENIAN 1051 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1052 /** 1053 */ 1054 public static final UnicodeBlock HEBREW 1055 = new UnicodeBlock("HEBREW", HEBREW_ID); 1056 /** 1057 */ 1058 public static final UnicodeBlock ARABIC 1059 = new UnicodeBlock("ARABIC", ARABIC_ID); 1060 /** 1061 */ 1062 public static final UnicodeBlock SYRIAC 1063 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1064 /** 1065 */ 1066 public static final UnicodeBlock THAANA 1067 = new UnicodeBlock("THAANA", THAANA_ID); 1068 /** 1069 */ 1070 public static final UnicodeBlock DEVANAGARI 1071 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1072 /** 1073 */ 1074 public static final UnicodeBlock BENGALI 1075 = new UnicodeBlock("BENGALI", BENGALI_ID); 1076 /** 1077 */ 1078 public static final UnicodeBlock GURMUKHI 1079 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1080 /** 1081 */ 1082 public static final UnicodeBlock GUJARATI 1083 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1084 /** 1085 */ 1086 public static final UnicodeBlock ORIYA 1087 = new UnicodeBlock("ORIYA", ORIYA_ID); 1088 /** 1089 */ 1090 public static final UnicodeBlock TAMIL 1091 = new UnicodeBlock("TAMIL", TAMIL_ID); 1092 /** 1093 */ 1094 public static final UnicodeBlock TELUGU 1095 = new UnicodeBlock("TELUGU", TELUGU_ID); 1096 /** 1097 */ 1098 public static final UnicodeBlock KANNADA 1099 = new UnicodeBlock("KANNADA", KANNADA_ID); 1100 /** 1101 */ 1102 public static final UnicodeBlock MALAYALAM 1103 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1104 /** 1105 */ 1106 public static final UnicodeBlock SINHALA 1107 = new UnicodeBlock("SINHALA", SINHALA_ID); 1108 /** 1109 */ 1110 public static final UnicodeBlock THAI 1111 = new UnicodeBlock("THAI", THAI_ID); 1112 /** 1113 */ 1114 public static final UnicodeBlock LAO 1115 = new UnicodeBlock("LAO", LAO_ID); 1116 /** 1117 */ 1118 public static final UnicodeBlock TIBETAN 1119 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1120 /** 1121 */ 1122 public static final UnicodeBlock MYANMAR 1123 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1124 /** 1125 */ 1126 public static final UnicodeBlock GEORGIAN 1127 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1128 /** 1129 */ 1130 public static final UnicodeBlock HANGUL_JAMO 1131 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1132 /** 1133 */ 1134 public static final UnicodeBlock ETHIOPIC 1135 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1136 /** 1137 */ 1138 public static final UnicodeBlock CHEROKEE 1139 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1140 /** 1141 */ 1142 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1143 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1144 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1145 /** 1146 */ 1147 public static final UnicodeBlock OGHAM 1148 = new UnicodeBlock("OGHAM", OGHAM_ID); 1149 /** 1150 */ 1151 public static final UnicodeBlock RUNIC 1152 = new UnicodeBlock("RUNIC", RUNIC_ID); 1153 /** 1154 */ 1155 public static final UnicodeBlock KHMER 1156 = new UnicodeBlock("KHMER", KHMER_ID); 1157 /** 1158 */ 1159 public static final UnicodeBlock MONGOLIAN 1160 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1161 /** 1162 */ 1163 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1164 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1165 /** 1166 */ 1167 public static final UnicodeBlock GREEK_EXTENDED 1168 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1169 /** 1170 */ 1171 public static final UnicodeBlock GENERAL_PUNCTUATION 1172 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1173 /** 1174 */ 1175 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1176 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1177 /** 1178 */ 1179 public static final UnicodeBlock CURRENCY_SYMBOLS 1180 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1181 /** 1182 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1183 * Symbols". 1184 */ 1185 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1186 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1187 /** 1188 */ 1189 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1190 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1191 /** 1192 */ 1193 public static final UnicodeBlock NUMBER_FORMS 1194 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1195 /** 1196 */ 1197 public static final UnicodeBlock ARROWS 1198 = new UnicodeBlock("ARROWS", ARROWS_ID); 1199 /** 1200 */ 1201 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1202 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1203 /** 1204 */ 1205 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1206 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1207 /** 1208 */ 1209 public static final UnicodeBlock CONTROL_PICTURES 1210 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1211 /** 1212 */ 1213 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1214 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1215 /** 1216 */ 1217 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1218 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1219 /** 1220 */ 1221 public static final UnicodeBlock BOX_DRAWING 1222 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1223 /** 1224 */ 1225 public static final UnicodeBlock BLOCK_ELEMENTS 1226 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1227 /** 1228 */ 1229 public static final UnicodeBlock GEOMETRIC_SHAPES 1230 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1231 /** 1232 */ 1233 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1234 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1235 /** 1236 */ 1237 public static final UnicodeBlock DINGBATS 1238 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1239 /** 1240 */ 1241 public static final UnicodeBlock BRAILLE_PATTERNS 1242 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1243 /** 1244 */ 1245 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1246 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1247 /** 1248 */ 1249 public static final UnicodeBlock KANGXI_RADICALS 1250 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1251 /** 1252 */ 1253 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1254 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1255 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1256 /** 1257 */ 1258 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1259 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1260 /** 1261 */ 1262 public static final UnicodeBlock HIRAGANA 1263 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1264 /** 1265 */ 1266 public static final UnicodeBlock KATAKANA 1267 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1268 /** 1269 */ 1270 public static final UnicodeBlock BOPOMOFO 1271 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1272 /** 1273 */ 1274 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1275 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1276 /** 1277 */ 1278 public static final UnicodeBlock KANBUN 1279 = new UnicodeBlock("KANBUN", KANBUN_ID); 1280 /** 1281 */ 1282 public static final UnicodeBlock BOPOMOFO_EXTENDED 1283 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1284 /** 1285 */ 1286 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1287 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1288 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1289 /** 1290 */ 1291 public static final UnicodeBlock CJK_COMPATIBILITY 1292 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1293 /** 1294 */ 1295 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1296 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1297 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1298 /** 1299 */ 1300 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1301 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1302 /** 1303 */ 1304 public static final UnicodeBlock YI_SYLLABLES 1305 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1306 /** 1307 */ 1308 public static final UnicodeBlock YI_RADICALS 1309 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1310 /** 1311 */ 1312 public static final UnicodeBlock HANGUL_SYLLABLES 1313 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1314 /** 1315 */ 1316 public static final UnicodeBlock HIGH_SURROGATES 1317 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1318 /** 1319 */ 1320 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1321 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1322 /** 1323 */ 1324 public static final UnicodeBlock LOW_SURROGATES 1325 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1326 /** 1327 * Same as public static final int PRIVATE_USE. 1328 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1329 * and multiple code point ranges had this block. 1330 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1331 * and adds separate blocks for the supplementary PUAs. 1332 */ 1333 public static final UnicodeBlock PRIVATE_USE_AREA 1334 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1335 /** 1336 * Same as public static final int PRIVATE_USE_AREA. 1337 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1338 * and multiple code point ranges had this block. 1339 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1340 * and adds separate blocks for the supplementary PUAs. 1341 */ 1342 public static final UnicodeBlock PRIVATE_USE 1343 = PRIVATE_USE_AREA; 1344 /** 1345 */ 1346 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1347 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1348 /** 1349 */ 1350 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1351 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1352 /** 1353 */ 1354 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1355 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1356 /** 1357 */ 1358 public static final UnicodeBlock COMBINING_HALF_MARKS 1359 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1360 /** 1361 */ 1362 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1363 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1364 /** 1365 */ 1366 public static final UnicodeBlock SMALL_FORM_VARIANTS 1367 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1368 /** 1369 */ 1370 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1371 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1372 /** 1373 */ 1374 public static final UnicodeBlock SPECIALS 1375 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1376 /** 1377 */ 1378 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1379 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1380 /** 1381 */ 1382 public static final UnicodeBlock OLD_ITALIC 1383 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1384 /** 1385 */ 1386 public static final UnicodeBlock GOTHIC 1387 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1388 /** 1389 */ 1390 public static final UnicodeBlock DESERET 1391 = new UnicodeBlock("DESERET", DESERET_ID); 1392 /** 1393 */ 1394 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1395 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1396 /** 1397 */ 1398 public static final UnicodeBlock MUSICAL_SYMBOLS 1399 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1400 /** 1401 */ 1402 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1403 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1404 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1405 /** 1406 */ 1407 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1408 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1409 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1410 /** 1411 */ 1412 public static final UnicodeBlock 1413 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1414 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1415 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1416 /** 1417 */ 1418 public static final UnicodeBlock TAGS 1419 = new UnicodeBlock("TAGS", TAGS_ID); 1420 1421 // New blocks in Unicode 3.2 1422 1423 /** 1424 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1425 */ 1426 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1427 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1428 /** 1429 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1430 */ 1431 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1432 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1433 /** 1434 */ 1435 public static final UnicodeBlock TAGALOG 1436 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1437 /** 1438 */ 1439 public static final UnicodeBlock HANUNOO 1440 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1441 /** 1442 */ 1443 public static final UnicodeBlock BUHID 1444 = new UnicodeBlock("BUHID", BUHID_ID); 1445 /** 1446 */ 1447 public static final UnicodeBlock TAGBANWA 1448 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1449 /** 1450 */ 1451 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1452 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1453 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1454 /** 1455 */ 1456 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1457 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1458 /** 1459 */ 1460 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1461 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1462 /** 1463 */ 1464 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1465 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1466 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1467 /** 1468 */ 1469 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1470 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1471 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1472 /** 1473 */ 1474 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1475 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1476 /** 1477 */ 1478 public static final UnicodeBlock VARIATION_SELECTORS 1479 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1480 /** 1481 */ 1482 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1483 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1484 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1485 /** 1486 */ 1487 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1488 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1489 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1490 1491 /** 1492 */ 1493 public static final UnicodeBlock LIMBU 1494 = new UnicodeBlock("LIMBU", LIMBU_ID); 1495 /** 1496 */ 1497 public static final UnicodeBlock TAI_LE 1498 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1499 /** 1500 */ 1501 public static final UnicodeBlock KHMER_SYMBOLS 1502 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1503 1504 /** 1505 */ 1506 public static final UnicodeBlock PHONETIC_EXTENSIONS 1507 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1508 1509 /** 1510 */ 1511 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1512 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1513 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1514 /** 1515 */ 1516 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1517 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1518 /** 1519 */ 1520 public static final UnicodeBlock LINEAR_B_SYLLABARY 1521 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1522 /** 1523 */ 1524 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1525 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1526 /** 1527 */ 1528 public static final UnicodeBlock AEGEAN_NUMBERS 1529 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1530 /** 1531 */ 1532 public static final UnicodeBlock UGARITIC 1533 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1534 /** 1535 */ 1536 public static final UnicodeBlock SHAVIAN 1537 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1538 /** 1539 */ 1540 public static final UnicodeBlock OSMANYA 1541 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1542 /** 1543 */ 1544 public static final UnicodeBlock CYPRIOT_SYLLABARY 1545 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1546 /** 1547 */ 1548 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1549 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1550 1551 /** 1552 */ 1553 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1554 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1555 1556 /* New blocks in Unicode 4.1 */ 1557 1558 /** 1559 */ 1560 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1561 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1562 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1563 1564 /** 1565 */ 1566 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1567 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1568 1569 /** 1570 */ 1571 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1572 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1573 1574 /** 1575 */ 1576 public static final UnicodeBlock BUGINESE = 1577 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1578 1579 /** 1580 */ 1581 public static final UnicodeBlock CJK_STROKES = 1582 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1583 1584 /** 1585 */ 1586 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1587 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1588 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1589 1590 /** 1591 */ 1592 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1593 1594 /** 1595 */ 1596 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1597 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1598 1599 /** 1600 */ 1601 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1602 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1603 1604 /** 1605 */ 1606 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1607 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 1608 1609 /** 1610 */ 1611 public static final UnicodeBlock GLAGOLITIC = 1612 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 1613 1614 /** 1615 */ 1616 public static final UnicodeBlock KHAROSHTHI = 1617 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 1618 1619 /** 1620 */ 1621 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1622 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 1623 1624 /** 1625 */ 1626 public static final UnicodeBlock NEW_TAI_LUE = 1627 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 1628 1629 /** 1630 */ 1631 public static final UnicodeBlock OLD_PERSIAN = 1632 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 1633 1634 /** 1635 */ 1636 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1637 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1638 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 1639 1640 /** 1641 */ 1642 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1643 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 1644 1645 /** 1646 */ 1647 public static final UnicodeBlock SYLOTI_NAGRI = 1648 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 1649 1650 /** 1651 */ 1652 public static final UnicodeBlock TIFINAGH = 1653 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 1654 1655 /** 1656 */ 1657 public static final UnicodeBlock VERTICAL_FORMS = 1658 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 1659 1660 /** 1661 */ 1662 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 1663 /** 1664 */ 1665 public static final UnicodeBlock BALINESE = 1666 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 1667 /** 1668 */ 1669 public static final UnicodeBlock LATIN_EXTENDED_C = 1670 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 1671 /** 1672 */ 1673 public static final UnicodeBlock LATIN_EXTENDED_D = 1674 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 1675 /** 1676 */ 1677 public static final UnicodeBlock PHAGS_PA = 1678 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 1679 /** 1680 */ 1681 public static final UnicodeBlock PHOENICIAN = 1682 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 1683 /** 1684 */ 1685 public static final UnicodeBlock CUNEIFORM = 1686 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 1687 /** 1688 */ 1689 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 1690 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 1691 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 1692 /** 1693 */ 1694 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 1695 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 1696 1697 /** 1698 */ 1699 public static final UnicodeBlock SUNDANESE = 1700 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 1701 1702 /** 1703 */ 1704 public static final UnicodeBlock LEPCHA = 1705 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 1706 1707 /** 1708 */ 1709 public static final UnicodeBlock OL_CHIKI = 1710 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 1711 1712 /** 1713 */ 1714 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1715 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 1716 1717 /** 1718 */ 1719 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 1720 1721 /** 1722 */ 1723 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1724 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 1725 1726 /** 1727 */ 1728 public static final UnicodeBlock SAURASHTRA = 1729 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 1730 1731 /** 1732 */ 1733 public static final UnicodeBlock KAYAH_LI = 1734 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 1735 1736 /** 1737 */ 1738 public static final UnicodeBlock REJANG = 1739 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 1740 1741 /** 1742 */ 1743 public static final UnicodeBlock CHAM = 1744 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 1745 1746 /** 1747 */ 1748 public static final UnicodeBlock ANCIENT_SYMBOLS = 1749 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 1750 1751 /** 1752 */ 1753 public static final UnicodeBlock PHAISTOS_DISC = 1754 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 1755 1756 /** 1757 */ 1758 public static final UnicodeBlock LYCIAN = 1759 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 1760 1761 /** 1762 */ 1763 public static final UnicodeBlock CARIAN = 1764 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 1765 1766 /** 1767 */ 1768 public static final UnicodeBlock LYDIAN = 1769 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 1770 1771 /** 1772 */ 1773 public static final UnicodeBlock MAHJONG_TILES = 1774 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 1775 1776 /** 1777 */ 1778 public static final UnicodeBlock DOMINO_TILES = 1779 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 1780 1781 /* New blocks in Unicode 5.2 */ 1782 1783 /***/ 1784 public static final UnicodeBlock SAMARITAN = 1785 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 1786 /***/ 1787 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1788 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1789 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 1790 /***/ 1791 public static final UnicodeBlock TAI_THAM = 1792 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 1793 /***/ 1794 public static final UnicodeBlock VEDIC_EXTENSIONS = 1795 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 1796 /***/ 1797 public static final UnicodeBlock LISU = 1798 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 1799 /***/ 1800 public static final UnicodeBlock BAMUM = 1801 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 1802 /***/ 1803 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 1804 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 1805 /***/ 1806 public static final UnicodeBlock DEVANAGARI_EXTENDED = 1807 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 1808 /***/ 1809 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 1810 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 1811 /***/ 1812 public static final UnicodeBlock JAVANESE = 1813 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 1814 /***/ 1815 public static final UnicodeBlock MYANMAR_EXTENDED_A = 1816 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 1817 /***/ 1818 public static final UnicodeBlock TAI_VIET = 1819 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 1820 /***/ 1821 public static final UnicodeBlock MEETEI_MAYEK = 1822 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 1823 /***/ 1824 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 1825 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 1826 /***/ 1827 public static final UnicodeBlock IMPERIAL_ARAMAIC = 1828 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 1829 /***/ 1830 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 1831 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 1832 /***/ 1833 public static final UnicodeBlock AVESTAN = 1834 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 1835 /***/ 1836 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 1837 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 1838 /***/ 1839 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 1840 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 1841 /***/ 1842 public static final UnicodeBlock OLD_TURKIC = 1843 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 1844 /***/ 1845 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 1846 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 1847 /***/ 1848 public static final UnicodeBlock KAITHI = 1849 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 1850 /***/ 1851 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 1852 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 1853 /***/ 1854 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 1855 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 1856 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 1857 /***/ 1858 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 1859 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 1860 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 1861 /***/ 1862 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 1863 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 1864 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 1865 1866 /* New blocks in Unicode 6.0 */ 1867 1868 /***/ 1869 public static final UnicodeBlock MANDAIC = 1870 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 1871 /***/ 1872 public static final UnicodeBlock BATAK = 1873 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 1874 /***/ 1875 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 1876 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 1877 /***/ 1878 public static final UnicodeBlock BRAHMI = 1879 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 1880 /***/ 1881 public static final UnicodeBlock BAMUM_SUPPLEMENT = 1882 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 1883 /***/ 1884 public static final UnicodeBlock KANA_SUPPLEMENT = 1885 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 1886 /***/ 1887 public static final UnicodeBlock PLAYING_CARDS = 1888 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 1889 /***/ 1890 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 1891 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 1892 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 1893 /***/ 1894 public static final UnicodeBlock EMOTICONS = 1895 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 1896 /***/ 1897 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 1898 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 1899 /***/ 1900 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 1901 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 1902 /***/ 1903 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 1904 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 1905 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 1906 1907 /* New blocks in Unicode 6.1 */ 1908 1909 /***/ 1910 public static final UnicodeBlock ARABIC_EXTENDED_A = 1911 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 1912 /***/ 1913 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 1914 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 1915 /***/ 1916 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 1917 /***/ 1918 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 1919 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 1920 /***/ 1921 public static final UnicodeBlock MEROITIC_CURSIVE = 1922 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 1923 /***/ 1924 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 1925 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 1926 /***/ 1927 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 1928 /***/ 1929 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 1930 /***/ 1931 public static final UnicodeBlock SORA_SOMPENG = 1932 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 1933 /***/ 1934 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 1935 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 1936 /***/ 1937 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 1938 1939 /* New blocks in Unicode 7.0 */ 1940 1941 /***/ 1942 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 1943 /***/ 1944 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 1945 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 1946 /***/ 1947 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 1948 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 1949 /***/ 1950 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 1951 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 1952 /***/ 1953 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 1954 /***/ 1955 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 1956 /***/ 1957 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 1958 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 1959 /***/ 1960 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 1961 /***/ 1962 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 1963 /***/ 1964 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 1965 /***/ 1966 public static final UnicodeBlock LATIN_EXTENDED_E = 1967 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 1968 /***/ 1969 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 1970 /***/ 1971 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 1972 /***/ 1973 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 1974 /***/ 1975 public static final UnicodeBlock MENDE_KIKAKUI = 1976 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 1977 /***/ 1978 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 1979 /***/ 1980 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 1981 /***/ 1982 public static final UnicodeBlock MYANMAR_EXTENDED_B = 1983 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 1984 /***/ 1985 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 1986 /***/ 1987 public static final UnicodeBlock OLD_NORTH_ARABIAN = 1988 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 1989 /***/ 1990 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 1991 /***/ 1992 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 1993 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 1994 /***/ 1995 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 1996 /***/ 1997 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 1998 /***/ 1999 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2000 /***/ 2001 public static final UnicodeBlock PSALTER_PAHLAVI = 2002 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2003 /***/ 2004 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2005 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2006 /***/ 2007 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2008 /***/ 2009 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2010 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2011 /***/ 2012 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2013 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2014 /***/ 2015 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2016 /***/ 2017 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2018 2019 /* New blocks in Unicode 8.0 */ 2020 2021 /***/ 2022 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2023 /***/ 2024 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2025 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2026 /***/ 2027 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2028 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2029 /***/ 2030 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2031 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2032 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2033 /***/ 2034 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2035 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2036 /***/ 2037 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2038 /***/ 2039 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2040 /***/ 2041 public static final UnicodeBlock OLD_HUNGARIAN = 2042 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2043 /***/ 2044 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2045 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2046 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2047 /***/ 2048 public static final UnicodeBlock SUTTON_SIGNWRITING = 2049 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2050 2051 /* New blocks in Unicode 9.0 */ 2052 2053 /***/ 2054 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2055 /***/ 2056 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2057 /***/ 2058 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2059 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2060 /***/ 2061 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2062 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2063 /***/ 2064 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2065 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2066 /***/ 2067 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2068 /***/ 2069 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2070 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2071 /***/ 2072 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2073 /***/ 2074 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2075 /***/ 2076 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2077 /***/ 2078 public static final UnicodeBlock TANGUT_COMPONENTS = 2079 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2080 2081 // New blocks in Unicode 10.0 2082 2083 /***/ 2084 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 2085 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/ 2086 /***/ 2087 public static final UnicodeBlock KANA_EXTENDED_A = 2088 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/ 2089 /***/ 2090 public static final UnicodeBlock MASARAM_GONDI = 2091 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/ 2092 /***/ 2093 public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/ 2094 /***/ 2095 public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/ 2096 /***/ 2097 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2098 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/ 2099 /***/ 2100 public static final UnicodeBlock ZANABAZAR_SQUARE = 2101 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/ 2102 2103 /** 2104 */ 2105 public static final UnicodeBlock INVALID_CODE 2106 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2107 2108 static { 2109 for (int blockId = 0; blockId < COUNT; ++blockId) { 2110 if (BLOCKS_[blockId] == null) { 2111 throw new java.lang.IllegalStateException( 2112 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2113 } 2114 } 2115 } 2116 2117 // public methods -------------------------------------------------- 2118 2119 /** 2120 * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID. 2121 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2122 * @param id UnicodeBlock ID 2123 * @return the only instance of the UnicodeBlock with the argument ID 2124 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2125 * returned. 2126 */ 2127 public static UnicodeBlock getInstance(int id) 2128 { 2129 if (id >= 0 && id < BLOCKS_.length) { 2130 return BLOCKS_[id]; 2131 } 2132 return INVALID_CODE; 2133 } 2134 2135 /** 2136 * Returns the Unicode allocation block that contains the code point, 2137 * or null if the code point is not a member of a defined block. 2138 * @param ch code point to be tested 2139 * @return the Unicode allocation block that contains the code point 2140 */ 2141 public static UnicodeBlock of(int ch) 2142 { 2143 if (ch > MAX_VALUE) { 2144 return INVALID_CODE; 2145 } 2146 2147 return UnicodeBlock.getInstance( 2148 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2149 } 2150 2151 /** 2152 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2153 * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike 2154 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2155 * against the official UCD name and the Java block name 2156 * (ignoring case). 2157 * @param blockName the name of the block to match 2158 * @return the UnicodeBlock with that name 2159 * @throws IllegalArgumentException if the blockName could not be matched 2160 */ 2161 public static final UnicodeBlock forName(String blockName) { 2162 Map<String, UnicodeBlock> m = null; 2163 if (mref != null) { 2164 m = mref.get(); 2165 } 2166 if (m == null) { 2167 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length); 2168 for (int i = 0; i < BLOCKS_.length; ++i) { 2169 UnicodeBlock b = BLOCKS_[i]; 2170 String name = trimBlockName( 2171 getPropertyValueName(UProperty.BLOCK, b.getID(), 2172 UProperty.NameChoice.LONG)); 2173 m.put(name, b); 2174 } 2175 mref = new SoftReference<Map<String, UnicodeBlock>>(m); 2176 } 2177 UnicodeBlock b = m.get(trimBlockName(blockName)); 2178 if (b == null) { 2179 throw new IllegalArgumentException(); 2180 } 2181 return b; 2182 } 2183 private static SoftReference<Map<String, UnicodeBlock>> mref; 2184 2185 private static String trimBlockName(String name) { 2186 String upper = name.toUpperCase(Locale.ENGLISH); 2187 StringBuilder result = new StringBuilder(upper.length()); 2188 for (int i = 0; i < upper.length(); i++) { 2189 char c = upper.charAt(i); 2190 if (c != ' ' && c != '_' && c != '-') { 2191 result.append(c); 2192 } 2193 } 2194 return result.toString(); 2195 } 2196 2197 /** 2198 * {icu} Returns the type ID of this Unicode block 2199 * @return integer type ID of this Unicode block 2200 */ 2201 public int getID() 2202 { 2203 return m_id_; 2204 } 2205 2206 // private data members --------------------------------------------- 2207 2208 /** 2209 * Identification code for this UnicodeBlock 2210 */ 2211 private int m_id_; 2212 2213 // private constructor ---------------------------------------------- 2214 2215 /** 2216 * UnicodeBlock constructor 2217 * @param name name of this UnicodeBlock 2218 * @param id unique id of this UnicodeBlock 2219 * @exception NullPointerException if name is <code>null</code> 2220 */ 2221 private UnicodeBlock(String name, int id) 2222 { 2223 super(name); 2224 m_id_ = id; 2225 if (id >= 0) { 2226 BLOCKS_[id] = this; 2227 } 2228 } 2229 } 2230 2231 /** 2232 * East Asian Width constants. 2233 * @see UProperty#EAST_ASIAN_WIDTH 2234 * @see UCharacter#getIntPropertyValue 2235 */ 2236 public static interface EastAsianWidth 2237 { 2238 /** 2239 */ 2240 public static final int NEUTRAL = 0; 2241 /** 2242 */ 2243 public static final int AMBIGUOUS = 1; 2244 /** 2245 */ 2246 public static final int HALFWIDTH = 2; 2247 /** 2248 */ 2249 public static final int FULLWIDTH = 3; 2250 /** 2251 */ 2252 public static final int NARROW = 4; 2253 /** 2254 */ 2255 public static final int WIDE = 5; 2256 /** 2257 * One more than the highest normal EastAsianWidth value. 2258 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2259 * 2260 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2261 * @hide unsupported on Android 2262 */ 2263 @Deprecated 2264 public static final int COUNT = 6; 2265 } 2266 2267 /** 2268 * Decomposition Type constants. 2269 * @see UProperty#DECOMPOSITION_TYPE 2270 */ 2271 public static interface DecompositionType 2272 { 2273 /** 2274 */ 2275 public static final int NONE = 0; 2276 /** 2277 */ 2278 public static final int CANONICAL = 1; 2279 /** 2280 */ 2281 public static final int COMPAT = 2; 2282 /** 2283 */ 2284 public static final int CIRCLE = 3; 2285 /** 2286 */ 2287 public static final int FINAL = 4; 2288 /** 2289 */ 2290 public static final int FONT = 5; 2291 /** 2292 */ 2293 public static final int FRACTION = 6; 2294 /** 2295 */ 2296 public static final int INITIAL = 7; 2297 /** 2298 */ 2299 public static final int ISOLATED = 8; 2300 /** 2301 */ 2302 public static final int MEDIAL = 9; 2303 /** 2304 */ 2305 public static final int NARROW = 10; 2306 /** 2307 */ 2308 public static final int NOBREAK = 11; 2309 /** 2310 */ 2311 public static final int SMALL = 12; 2312 /** 2313 */ 2314 public static final int SQUARE = 13; 2315 /** 2316 */ 2317 public static final int SUB = 14; 2318 /** 2319 */ 2320 public static final int SUPER = 15; 2321 /** 2322 */ 2323 public static final int VERTICAL = 16; 2324 /** 2325 */ 2326 public static final int WIDE = 17; 2327 /** 2328 * One more than the highest normal DecompositionType value. 2329 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 2330 * 2331 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2332 * @hide unsupported on Android 2333 */ 2334 @Deprecated 2335 public static final int COUNT = 18; 2336 } 2337 2338 /** 2339 * Joining Type constants. 2340 * @see UProperty#JOINING_TYPE 2341 */ 2342 public static interface JoiningType 2343 { 2344 /** 2345 */ 2346 public static final int NON_JOINING = 0; 2347 /** 2348 */ 2349 public static final int JOIN_CAUSING = 1; 2350 /** 2351 */ 2352 public static final int DUAL_JOINING = 2; 2353 /** 2354 */ 2355 public static final int LEFT_JOINING = 3; 2356 /** 2357 */ 2358 public static final int RIGHT_JOINING = 4; 2359 /** 2360 */ 2361 public static final int TRANSPARENT = 5; 2362 /** 2363 * One more than the highest normal JoiningType value. 2364 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 2365 * 2366 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2367 * @hide unsupported on Android 2368 */ 2369 @Deprecated 2370 public static final int COUNT = 6; 2371 } 2372 2373 /** 2374 * Joining Group constants. 2375 * @see UProperty#JOINING_GROUP 2376 */ 2377 public static interface JoiningGroup 2378 { 2379 /** 2380 */ 2381 public static final int NO_JOINING_GROUP = 0; 2382 /** 2383 */ 2384 public static final int AIN = 1; 2385 /** 2386 */ 2387 public static final int ALAPH = 2; 2388 /** 2389 */ 2390 public static final int ALEF = 3; 2391 /** 2392 */ 2393 public static final int BEH = 4; 2394 /** 2395 */ 2396 public static final int BETH = 5; 2397 /** 2398 */ 2399 public static final int DAL = 6; 2400 /** 2401 */ 2402 public static final int DALATH_RISH = 7; 2403 /** 2404 */ 2405 public static final int E = 8; 2406 /** 2407 */ 2408 public static final int FEH = 9; 2409 /** 2410 */ 2411 public static final int FINAL_SEMKATH = 10; 2412 /** 2413 */ 2414 public static final int GAF = 11; 2415 /** 2416 */ 2417 public static final int GAMAL = 12; 2418 /** 2419 */ 2420 public static final int HAH = 13; 2421 /***/ 2422 public static final int TEH_MARBUTA_GOAL = 14; 2423 /** 2424 */ 2425 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2426 /** 2427 */ 2428 public static final int HE = 15; 2429 /** 2430 */ 2431 public static final int HEH = 16; 2432 /** 2433 */ 2434 public static final int HEH_GOAL = 17; 2435 /** 2436 */ 2437 public static final int HETH = 18; 2438 /** 2439 */ 2440 public static final int KAF = 19; 2441 /** 2442 */ 2443 public static final int KAPH = 20; 2444 /** 2445 */ 2446 public static final int KNOTTED_HEH = 21; 2447 /** 2448 */ 2449 public static final int LAM = 22; 2450 /** 2451 */ 2452 public static final int LAMADH = 23; 2453 /** 2454 */ 2455 public static final int MEEM = 24; 2456 /** 2457 */ 2458 public static final int MIM = 25; 2459 /** 2460 */ 2461 public static final int NOON = 26; 2462 /** 2463 */ 2464 public static final int NUN = 27; 2465 /** 2466 */ 2467 public static final int PE = 28; 2468 /** 2469 */ 2470 public static final int QAF = 29; 2471 /** 2472 */ 2473 public static final int QAPH = 30; 2474 /** 2475 */ 2476 public static final int REH = 31; 2477 /** 2478 */ 2479 public static final int REVERSED_PE = 32; 2480 /** 2481 */ 2482 public static final int SAD = 33; 2483 /** 2484 */ 2485 public static final int SADHE = 34; 2486 /** 2487 */ 2488 public static final int SEEN = 35; 2489 /** 2490 */ 2491 public static final int SEMKATH = 36; 2492 /** 2493 */ 2494 public static final int SHIN = 37; 2495 /** 2496 */ 2497 public static final int SWASH_KAF = 38; 2498 /** 2499 */ 2500 public static final int SYRIAC_WAW = 39; 2501 /** 2502 */ 2503 public static final int TAH = 40; 2504 /** 2505 */ 2506 public static final int TAW = 41; 2507 /** 2508 */ 2509 public static final int TEH_MARBUTA = 42; 2510 /** 2511 */ 2512 public static final int TETH = 43; 2513 /** 2514 */ 2515 public static final int WAW = 44; 2516 /** 2517 */ 2518 public static final int YEH = 45; 2519 /** 2520 */ 2521 public static final int YEH_BARREE = 46; 2522 /** 2523 */ 2524 public static final int YEH_WITH_TAIL = 47; 2525 /** 2526 */ 2527 public static final int YUDH = 48; 2528 /** 2529 */ 2530 public static final int YUDH_HE = 49; 2531 /** 2532 */ 2533 public static final int ZAIN = 50; 2534 /** 2535 */ 2536 public static final int FE = 51; 2537 /** 2538 */ 2539 public static final int KHAPH = 52; 2540 /** 2541 */ 2542 public static final int ZHAIN = 53; 2543 /** 2544 */ 2545 public static final int BURUSHASKI_YEH_BARREE = 54; 2546 /***/ 2547 public static final int FARSI_YEH = 55; 2548 /***/ 2549 public static final int NYA = 56; 2550 /***/ 2551 public static final int ROHINGYA_YEH = 57; 2552 2553 /***/ 2554 public static final int MANICHAEAN_ALEPH = 58; 2555 /***/ 2556 public static final int MANICHAEAN_AYIN = 59; 2557 /***/ 2558 public static final int MANICHAEAN_BETH = 60; 2559 /***/ 2560 public static final int MANICHAEAN_DALETH = 61; 2561 /***/ 2562 public static final int MANICHAEAN_DHAMEDH = 62; 2563 /***/ 2564 public static final int MANICHAEAN_FIVE = 63; 2565 /***/ 2566 public static final int MANICHAEAN_GIMEL = 64; 2567 /***/ 2568 public static final int MANICHAEAN_HETH = 65; 2569 /***/ 2570 public static final int MANICHAEAN_HUNDRED = 66; 2571 /***/ 2572 public static final int MANICHAEAN_KAPH = 67; 2573 /***/ 2574 public static final int MANICHAEAN_LAMEDH = 68; 2575 /***/ 2576 public static final int MANICHAEAN_MEM = 69; 2577 /***/ 2578 public static final int MANICHAEAN_NUN = 70; 2579 /***/ 2580 public static final int MANICHAEAN_ONE = 71; 2581 /***/ 2582 public static final int MANICHAEAN_PE = 72; 2583 /***/ 2584 public static final int MANICHAEAN_QOPH = 73; 2585 /***/ 2586 public static final int MANICHAEAN_RESH = 74; 2587 /***/ 2588 public static final int MANICHAEAN_SADHE = 75; 2589 /***/ 2590 public static final int MANICHAEAN_SAMEKH = 76; 2591 /***/ 2592 public static final int MANICHAEAN_TAW = 77; 2593 /***/ 2594 public static final int MANICHAEAN_TEN = 78; 2595 /***/ 2596 public static final int MANICHAEAN_TETH = 79; 2597 /***/ 2598 public static final int MANICHAEAN_THAMEDH = 80; 2599 /***/ 2600 public static final int MANICHAEAN_TWENTY = 81; 2601 /***/ 2602 public static final int MANICHAEAN_WAW = 82; 2603 /***/ 2604 public static final int MANICHAEAN_YODH = 83; 2605 /***/ 2606 public static final int MANICHAEAN_ZAYIN = 84; 2607 /***/ 2608 public static final int STRAIGHT_WAW = 85; 2609 2610 /***/ 2611 public static final int AFRICAN_FEH = 86; 2612 /***/ 2613 public static final int AFRICAN_NOON = 87; 2614 /***/ 2615 public static final int AFRICAN_QAF = 88; 2616 2617 /***/ 2618 public static final int MALAYALAM_BHA = 89; 2619 /***/ 2620 public static final int MALAYALAM_JA = 90; 2621 /***/ 2622 public static final int MALAYALAM_LLA = 91; 2623 /***/ 2624 public static final int MALAYALAM_LLLA = 92; 2625 /***/ 2626 public static final int MALAYALAM_NGA = 93; 2627 /***/ 2628 public static final int MALAYALAM_NNA = 94; 2629 /***/ 2630 public static final int MALAYALAM_NNNA = 95; 2631 /***/ 2632 public static final int MALAYALAM_NYA = 96; 2633 /***/ 2634 public static final int MALAYALAM_RA = 97; 2635 /***/ 2636 public static final int MALAYALAM_SSA = 98; 2637 /***/ 2638 public static final int MALAYALAM_TTA = 99; 2639 2640 /** 2641 * One more than the highest normal JoiningGroup value. 2642 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 2643 * 2644 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2645 * @hide unsupported on Android 2646 */ 2647 @Deprecated 2648 public static final int COUNT = 100; 2649 } 2650 2651 /** 2652 * Grapheme Cluster Break constants. 2653 * @see UProperty#GRAPHEME_CLUSTER_BREAK 2654 */ 2655 public static interface GraphemeClusterBreak { 2656 /** 2657 */ 2658 public static final int OTHER = 0; 2659 /** 2660 */ 2661 public static final int CONTROL = 1; 2662 /** 2663 */ 2664 public static final int CR = 2; 2665 /** 2666 */ 2667 public static final int EXTEND = 3; 2668 /** 2669 */ 2670 public static final int L = 4; 2671 /** 2672 */ 2673 public static final int LF = 5; 2674 /** 2675 */ 2676 public static final int LV = 6; 2677 /** 2678 */ 2679 public static final int LVT = 7; 2680 /** 2681 */ 2682 public static final int T = 8; 2683 /** 2684 */ 2685 public static final int V = 9; 2686 /** 2687 */ 2688 public static final int SPACING_MARK = 10; 2689 /** 2690 */ 2691 public static final int PREPEND = 11; 2692 /***/ 2693 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2694 /***/ 2695 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2696 /***/ 2697 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 2698 /***/ 2699 public static final int E_MODIFIER = 15; /*[EM]*/ 2700 /***/ 2701 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 2702 /***/ 2703 public static final int ZWJ = 17; /*[ZWJ]*/ 2704 /** 2705 * One more than the highest normal GraphemeClusterBreak value. 2706 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 2707 * 2708 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2709 * @hide unsupported on Android 2710 */ 2711 @Deprecated 2712 public static final int COUNT = 18; 2713 } 2714 2715 /** 2716 * Word Break constants. 2717 * @see UProperty#WORD_BREAK 2718 */ 2719 public static interface WordBreak { 2720 /** 2721 */ 2722 public static final int OTHER = 0; 2723 /** 2724 */ 2725 public static final int ALETTER = 1; 2726 /** 2727 */ 2728 public static final int FORMAT = 2; 2729 /** 2730 */ 2731 public static final int KATAKANA = 3; 2732 /** 2733 */ 2734 public static final int MIDLETTER = 4; 2735 /** 2736 */ 2737 public static final int MIDNUM = 5; 2738 /** 2739 */ 2740 public static final int NUMERIC = 6; 2741 /** 2742 */ 2743 public static final int EXTENDNUMLET = 7; 2744 /** 2745 */ 2746 public static final int CR = 8; 2747 /** 2748 */ 2749 public static final int EXTEND = 9; 2750 /** 2751 */ 2752 public static final int LF = 10; 2753 /** 2754 */ 2755 public static final int MIDNUMLET = 11; 2756 /** 2757 */ 2758 public static final int NEWLINE = 12; 2759 /***/ 2760 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2761 /***/ 2762 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 2763 /***/ 2764 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 2765 /***/ 2766 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 2767 /***/ 2768 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2769 /***/ 2770 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 2771 /***/ 2772 public static final int E_MODIFIER = 19; /*[EM]*/ 2773 /***/ 2774 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 2775 /***/ 2776 public static final int ZWJ = 21; /*[ZWJ]*/ 2777 /** 2778 * One more than the highest normal WordBreak value. 2779 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 2780 * 2781 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2782 * @hide unsupported on Android 2783 */ 2784 @Deprecated 2785 public static final int COUNT = 22; 2786 } 2787 2788 /** 2789 * Sentence Break constants. 2790 * @see UProperty#SENTENCE_BREAK 2791 */ 2792 public static interface SentenceBreak { 2793 /** 2794 */ 2795 public static final int OTHER = 0; 2796 /** 2797 */ 2798 public static final int ATERM = 1; 2799 /** 2800 */ 2801 public static final int CLOSE = 2; 2802 /** 2803 */ 2804 public static final int FORMAT = 3; 2805 /** 2806 */ 2807 public static final int LOWER = 4; 2808 /** 2809 */ 2810 public static final int NUMERIC = 5; 2811 /** 2812 */ 2813 public static final int OLETTER = 6; 2814 /** 2815 */ 2816 public static final int SEP = 7; 2817 /** 2818 */ 2819 public static final int SP = 8; 2820 /** 2821 */ 2822 public static final int STERM = 9; 2823 /** 2824 */ 2825 public static final int UPPER = 10; 2826 /** 2827 */ 2828 public static final int CR = 11; 2829 /** 2830 */ 2831 public static final int EXTEND = 12; 2832 /** 2833 */ 2834 public static final int LF = 13; 2835 /** 2836 */ 2837 public static final int SCONTINUE = 14; 2838 /** 2839 * One more than the highest normal SentenceBreak value. 2840 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 2841 * 2842 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2843 * @hide unsupported on Android 2844 */ 2845 @Deprecated 2846 public static final int COUNT = 15; 2847 } 2848 2849 /** 2850 * Line Break constants. 2851 * @see UProperty#LINE_BREAK 2852 */ 2853 public static interface LineBreak 2854 { 2855 /** 2856 */ 2857 public static final int UNKNOWN = 0; 2858 /** 2859 */ 2860 public static final int AMBIGUOUS = 1; 2861 /** 2862 */ 2863 public static final int ALPHABETIC = 2; 2864 /** 2865 */ 2866 public static final int BREAK_BOTH = 3; 2867 /** 2868 */ 2869 public static final int BREAK_AFTER = 4; 2870 /** 2871 */ 2872 public static final int BREAK_BEFORE = 5; 2873 /** 2874 */ 2875 public static final int MANDATORY_BREAK = 6; 2876 /** 2877 */ 2878 public static final int CONTINGENT_BREAK = 7; 2879 /** 2880 */ 2881 public static final int CLOSE_PUNCTUATION = 8; 2882 /** 2883 */ 2884 public static final int COMBINING_MARK = 9; 2885 /** 2886 */ 2887 public static final int CARRIAGE_RETURN = 10; 2888 /** 2889 */ 2890 public static final int EXCLAMATION = 11; 2891 /** 2892 */ 2893 public static final int GLUE = 12; 2894 /** 2895 */ 2896 public static final int HYPHEN = 13; 2897 /** 2898 */ 2899 public static final int IDEOGRAPHIC = 14; 2900 /** 2901 * @see #INSEPARABLE 2902 */ 2903 public static final int INSEPERABLE = 15; 2904 /** 2905 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 2906 */ 2907 public static final int INSEPARABLE = 15; 2908 /** 2909 */ 2910 public static final int INFIX_NUMERIC = 16; 2911 /** 2912 */ 2913 public static final int LINE_FEED = 17; 2914 /** 2915 */ 2916 public static final int NONSTARTER = 18; 2917 /** 2918 */ 2919 public static final int NUMERIC = 19; 2920 /** 2921 */ 2922 public static final int OPEN_PUNCTUATION = 20; 2923 /** 2924 */ 2925 public static final int POSTFIX_NUMERIC = 21; 2926 /** 2927 */ 2928 public static final int PREFIX_NUMERIC = 22; 2929 /** 2930 */ 2931 public static final int QUOTATION = 23; 2932 /** 2933 */ 2934 public static final int COMPLEX_CONTEXT = 24; 2935 /** 2936 */ 2937 public static final int SURROGATE = 25; 2938 /** 2939 */ 2940 public static final int SPACE = 26; 2941 /** 2942 */ 2943 public static final int BREAK_SYMBOLS = 27; 2944 /** 2945 */ 2946 public static final int ZWSPACE = 28; 2947 /** 2948 */ 2949 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 2950 /** 2951 */ 2952 public static final int WORD_JOINER = 30; /*[WJ]*/ 2953 /** 2954 */ 2955 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 2956 /** 2957 */ 2958 public static final int H3 = 32; 2959 /** 2960 */ 2961 public static final int JL = 33; 2962 /** 2963 */ 2964 public static final int JT = 34; 2965 /** 2966 */ 2967 public static final int JV = 35; 2968 /***/ 2969 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 2970 /***/ 2971 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 2972 /***/ 2973 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 2974 /***/ 2975 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2976 /***/ 2977 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2978 /***/ 2979 public static final int E_MODIFIER = 41; /*[EM]*/ 2980 /***/ 2981 public static final int ZWJ = 42; /*[ZWJ]*/ 2982 /** 2983 * One more than the highest normal LineBreak value. 2984 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 2985 * 2986 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2987 * @hide unsupported on Android 2988 */ 2989 @Deprecated 2990 public static final int COUNT = 43; 2991 } 2992 2993 /** 2994 * Numeric Type constants. 2995 * @see UProperty#NUMERIC_TYPE 2996 */ 2997 public static interface NumericType 2998 { 2999 /** 3000 */ 3001 public static final int NONE = 0; 3002 /** 3003 */ 3004 public static final int DECIMAL = 1; 3005 /** 3006 */ 3007 public static final int DIGIT = 2; 3008 /** 3009 */ 3010 public static final int NUMERIC = 3; 3011 /** 3012 * One more than the highest normal NumericType value. 3013 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 3014 * 3015 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3016 * @hide unsupported on Android 3017 */ 3018 @Deprecated 3019 public static final int COUNT = 4; 3020 } 3021 3022 /** 3023 * Hangul Syllable Type constants. 3024 * 3025 * @see UProperty#HANGUL_SYLLABLE_TYPE 3026 */ 3027 public static interface HangulSyllableType 3028 { 3029 /** 3030 */ 3031 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3032 /** 3033 */ 3034 public static final int LEADING_JAMO = 1; /*[L]*/ 3035 /** 3036 */ 3037 public static final int VOWEL_JAMO = 2; /*[V]*/ 3038 /** 3039 */ 3040 public static final int TRAILING_JAMO = 3; /*[T]*/ 3041 /** 3042 */ 3043 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3044 /** 3045 */ 3046 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3047 /** 3048 * One more than the highest normal HangulSyllableType value. 3049 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 3050 * 3051 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3052 * @hide unsupported on Android 3053 */ 3054 @Deprecated 3055 public static final int COUNT = 6; 3056 } 3057 3058 /** 3059 * Bidi Paired Bracket Type constants. 3060 * 3061 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3062 */ 3063 public static interface BidiPairedBracketType { 3064 /** 3065 * Not a paired bracket. 3066 */ 3067 public static final int NONE = 0; 3068 /** 3069 * Open paired bracket. 3070 */ 3071 public static final int OPEN = 1; 3072 /** 3073 * Close paired bracket. 3074 */ 3075 public static final int CLOSE = 2; 3076 /** 3077 * One more than the highest normal BidiPairedBracketType value. 3078 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3079 * 3080 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3081 * @hide unsupported on Android 3082 */ 3083 @Deprecated 3084 public static final int COUNT = 3; 3085 } 3086 3087 // public data members ----------------------------------------------- 3088 3089 /** 3090 * The lowest Unicode code point value, constant 0. 3091 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 3092 */ 3093 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 3094 3095 /** 3096 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 3097 * Same as {@link Character#MAX_CODE_POINT}. 3098 * 3099 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 3100 * which is still a char with the value U+FFFF. 3101 */ 3102 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 3103 3104 /** 3105 * The minimum value for Supplementary code points, constant U+10000. 3106 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 3107 */ 3108 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 3109 3110 /** 3111 * Unicode value used when translating into Unicode encoding form and there 3112 * is no existing character. 3113 */ 3114 public static final int REPLACEMENT_CHAR = '\uFFFD'; 3115 3116 /** 3117 * Special value that is returned by getUnicodeNumericValue(int) when no 3118 * numeric value is defined for a code point. 3119 * @see #getUnicodeNumericValue 3120 */ 3121 public static final double NO_NUMERIC_VALUE = -123456789; 3122 3123 /** 3124 * Compatibility constant for Java Character's MIN_RADIX. 3125 */ 3126 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 3127 3128 /** 3129 * Compatibility constant for Java Character's MAX_RADIX. 3130 */ 3131 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 3132 3133 /** 3134 * Do not lowercase non-initial parts of words when titlecasing. 3135 * Option bit for titlecasing APIs that take an options bit set. 3136 * 3137 * By default, titlecasing will titlecase the first cased character 3138 * of a word and lowercase all other characters. 3139 * With this option, the other characters will not be modified. 3140 * 3141 * @see #toTitleCase 3142 */ 3143 public static final int TITLECASE_NO_LOWERCASE = 0x100; 3144 3145 /** 3146 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 3147 * titlecase exactly the characters at breaks from the iterator. 3148 * Option bit for titlecasing APIs that take an options bit set. 3149 * 3150 * By default, titlecasing will take each break iterator index, 3151 * adjust it by looking for the next cased character, and titlecase that one. 3152 * Other characters are lowercased. 3153 * 3154 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 3155 * 3156 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 3157 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 3158 * cased character F. If F exists, map F to default_title(F); then map each 3159 * subsequent character C to default_lower(C). 3160 * 3161 * @see #toTitleCase 3162 * @see #TITLECASE_NO_LOWERCASE 3163 */ 3164 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 3165 3166 // public methods ---------------------------------------------------- 3167 3168 /** 3169 * Returnss the numeric value of a decimal digit code point. 3170 * <br>This method observes the semantics of 3171 * <code>java.lang.Character.digit()</code>. Note that this 3172 * will return positive values for code points for which isDigit 3173 * returns false, just like java.lang.Character. 3174 * <br><em>Semantic Change:</em> In release 1.3.1 and 3175 * prior, this did not treat the European letters as having a 3176 * digit value, and also treated numeric letters and other numbers as 3177 * digits. 3178 * This has been changed to conform to the java semantics. 3179 * <br>A code point is a valid digit if and only if: 3180 * <ul> 3181 * <li>ch is a decimal digit or one of the european letters, and 3182 * <li>the value of ch is less than the specified radix. 3183 * </ul> 3184 * @param ch the code point to query 3185 * @param radix the radix 3186 * @return the numeric value represented by the code point in the 3187 * specified radix, or -1 if the code point is not a decimal digit 3188 * or if its value is too large for the radix 3189 */ 3190 public static int digit(int ch, int radix) 3191 { 3192 if (2 <= radix && radix <= 36) { 3193 int value = digit(ch); 3194 if (value < 0) { 3195 // ch is not a decimal digit, try latin letters 3196 value = UCharacterProperty.getEuropeanDigit(ch); 3197 } 3198 return (value < radix) ? value : -1; 3199 } else { 3200 return -1; // invalid radix 3201 } 3202 } 3203 3204 /** 3205 * Returnss the numeric value of a decimal digit code point. 3206 * <br>This is a convenience overload of <code>digit(int, int)</code> 3207 * that provides a decimal radix. 3208 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 3209 * treated numeric letters and other numbers as digits. This has 3210 * been changed to conform to the java semantics. 3211 * @param ch the code point to query 3212 * @return the numeric value represented by the code point, 3213 * or -1 if the code point is not a decimal digit or if its 3214 * value is too large for a decimal radix 3215 */ 3216 public static int digit(int ch) 3217 { 3218 return UCharacterProperty.INSTANCE.digit(ch); 3219 } 3220 3221 /** 3222 * Returns the numeric value of the code point as a nonnegative 3223 * integer. 3224 * <br>If the code point does not have a numeric value, then -1 is returned. 3225 * <br> 3226 * If the code point has a numeric value that cannot be represented as a 3227 * nonnegative integer (for example, a fractional value), then -2 is 3228 * returned. 3229 * @param ch the code point to query 3230 * @return the numeric value of the code point, or -1 if it has no numeric 3231 * value, or -2 if it has a numeric value that cannot be represented as a 3232 * nonnegative integer 3233 */ 3234 public static int getNumericValue(int ch) 3235 { 3236 return UCharacterProperty.INSTANCE.getNumericValue(ch); 3237 } 3238 3239 /** 3240 * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the 3241 * Unicode Character Database. 3242 * <p>A "double" return type is necessary because some numeric values are 3243 * fractions, negative, or too large for int. 3244 * <p>For characters without any numeric values in the Unicode Character 3245 * Database, this function will return NO_NUMERIC_VALUE. 3246 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 3247 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 3248 * return type int and returns -1 when the argument ch does not have a 3249 * corresponding numeric value. This has been changed to synch with ICU4C 3250 * 3251 * This corresponds to the ICU4C function u_getNumericValue. 3252 * @param ch Code point to get the numeric value for. 3253 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 3254 */ 3255 public static double getUnicodeNumericValue(int ch) 3256 { 3257 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 3258 } 3259 3260 /** 3261 * Compatibility override of Java deprecated method. This 3262 * method will always remain deprecated. 3263 * Same as java.lang.Character.isSpace(). 3264 * @param ch the code point 3265 * @return true if the code point is a space character as 3266 * defined by java.lang.Character.isSpace. 3267 * @deprecated ICU 3.4 (Java) 3268 * @hide original deprecated declaration 3269 */ 3270 @Deprecated 3271 public static boolean isSpace(int ch) { 3272 return ch <= 0x20 && 3273 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 3274 } 3275 3276 /** 3277 * Returns a value indicating a code point's Unicode category. 3278 * Up-to-date Unicode implementation of java.lang.Character.getType() 3279 * except for the above mentioned code points that had their category 3280 * changed.<br> 3281 * Return results are constants from the interface 3282 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 3283 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 3284 * those returned by java.lang.Character.getType. UCharacterCategory values 3285 * match the ones used in ICU4C, while java.lang.Character type 3286 * values, though similar, skip the value 17. 3287 * @param ch code point whose type is to be determined 3288 * @return category which is a value of UCharacterCategory 3289 */ 3290 public static int getType(int ch) 3291 { 3292 return UCharacterProperty.INSTANCE.getType(ch); 3293 } 3294 3295 /** 3296 * Determines if a code point has a defined meaning in the up-to-date 3297 * Unicode standard. 3298 * E.g. supplementary code points though allocated space are not defined in 3299 * Unicode yet.<br> 3300 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 3301 * @param ch code point to be determined if it is defined in the most 3302 * current version of Unicode 3303 * @return true if this code point is defined in unicode 3304 */ 3305 public static boolean isDefined(int ch) 3306 { 3307 return getType(ch) != 0; 3308 } 3309 3310 /** 3311 * Determines if a code point is a Java digit. 3312 * <br>This method observes the semantics of 3313 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 3314 * digits only. 3315 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 3316 * numeric letters and other numbers as digits. 3317 * This has been changed to conform to the java semantics. 3318 * @param ch code point to query 3319 * @return true if this code point is a digit 3320 */ 3321 public static boolean isDigit(int ch) 3322 { 3323 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 3324 } 3325 3326 /** 3327 * Determines if the specified code point is an ISO control character. 3328 * A code point is considered to be an ISO control character if it is in 3329 * the range \u0000 through \u001F or in the range \u007F through 3330 * \u009F.<br> 3331 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 3332 * @param ch code point to determine if it is an ISO control character 3333 * @return true if code point is a ISO control character 3334 */ 3335 public static boolean isISOControl(int ch) 3336 { 3337 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 3338 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 3339 } 3340 3341 /** 3342 * Determines if the specified code point is a letter. 3343 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 3344 * @param ch code point to determine if it is a letter 3345 * @return true if code point is a letter 3346 */ 3347 public static boolean isLetter(int ch) 3348 { 3349 // if props == 0, it will just fall through and return false 3350 return ((1 << getType(ch)) 3351 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3352 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3353 | (1 << UCharacterCategory.TITLECASE_LETTER) 3354 | (1 << UCharacterCategory.MODIFIER_LETTER) 3355 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 3356 } 3357 3358 /** 3359 * Determines if the specified code point is a letter or digit. 3360 * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii 3361 * characters 'A' - 'Z' and 'a' - 'z' as digits. 3362 * @param ch code point to determine if it is a letter or a digit 3363 * @return true if code point is a letter or a digit 3364 */ 3365 public static boolean isLetterOrDigit(int ch) 3366 { 3367 return ((1 << getType(ch)) 3368 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3369 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3370 | (1 << UCharacterCategory.TITLECASE_LETTER) 3371 | (1 << UCharacterCategory.MODIFIER_LETTER) 3372 | (1 << UCharacterCategory.OTHER_LETTER) 3373 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 3374 } 3375 3376 /** 3377 * Compatibility override of Java deprecated method. This 3378 * method will always remain deprecated. Delegates to 3379 * java.lang.Character.isJavaIdentifierStart. 3380 * @param cp the code point 3381 * @return true if the code point can start a java identifier. 3382 * @deprecated ICU 3.4 (Java) 3383 * @hide original deprecated declaration 3384 */ 3385 @Deprecated 3386 public static boolean isJavaLetter(int cp) { 3387 return isJavaIdentifierStart(cp); 3388 } 3389 3390 /** 3391 * Compatibility override of Java deprecated method. This 3392 * method will always remain deprecated. Delegates to 3393 * java.lang.Character.isJavaIdentifierPart. 3394 * @param cp the code point 3395 * @return true if the code point can continue a java identifier. 3396 * @deprecated ICU 3.4 (Java) 3397 * @hide original deprecated declaration 3398 */ 3399 @Deprecated 3400 public static boolean isJavaLetterOrDigit(int cp) { 3401 return isJavaIdentifierPart(cp); 3402 } 3403 3404 /** 3405 * Compatibility override of Java method, delegates to 3406 * java.lang.Character.isJavaIdentifierStart. 3407 * @param cp the code point 3408 * @return true if the code point can start a java identifier. 3409 */ 3410 public static boolean isJavaIdentifierStart(int cp) { 3411 // note, downcast to char for jdk 1.4 compatibility 3412 return java.lang.Character.isJavaIdentifierStart((char)cp); 3413 } 3414 3415 /** 3416 * Compatibility override of Java method, delegates to 3417 * java.lang.Character.isJavaIdentifierPart. 3418 * @param cp the code point 3419 * @return true if the code point can continue a java identifier. 3420 */ 3421 public static boolean isJavaIdentifierPart(int cp) { 3422 // note, downcast to char for jdk 1.4 compatibility 3423 return java.lang.Character.isJavaIdentifierPart((char)cp); 3424 } 3425 3426 /** 3427 * Determines if the specified code point is a lowercase character. 3428 * UnicodeData only contains case mappings for code points where they are 3429 * one-to-one mappings; it also omits information about context-sensitive 3430 * case mappings.<br> For more information about Unicode case mapping 3431 * please refer to the 3432 * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 3433 * #21</a>.<br> 3434 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 3435 * @param ch code point to determine if it is in lowercase 3436 * @return true if code point is a lowercase character 3437 */ 3438 public static boolean isLowerCase(int ch) 3439 { 3440 // if props == 0, it will just fall through and return false 3441 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 3442 } 3443 3444 /** 3445 * Determines if the specified code point is a white space character. 3446 * A code point is considered to be an whitespace character if and only 3447 * if it satisfies one of the following criteria: 3448 * <ul> 3449 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 3450 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 3451 * <li> It is \u0009, HORIZONTAL TABULATION. 3452 * <li> It is \u000A, LINE FEED. 3453 * <li> It is \u000B, VERTICAL TABULATION. 3454 * <li> It is \u000C, FORM FEED. 3455 * <li> It is \u000D, CARRIAGE RETURN. 3456 * <li> It is \u001C, FILE SEPARATOR. 3457 * <li> It is \u001D, GROUP SEPARATOR. 3458 * <li> It is \u001E, RECORD SEPARATOR. 3459 * <li> It is \u001F, UNIT SEPARATOR. 3460 * </ul> 3461 * 3462 * This API tries to sync with the semantics of Java's 3463 * java.lang.Character.isWhitespace(), but it may not return 3464 * the exact same results because of the Unicode version 3465 * difference. 3466 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 3467 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 3468 * See http://www.unicode.org/versions/Unicode4.0.1/ 3469 * @param ch code point to determine if it is a white space 3470 * @return true if the specified code point is a white space character 3471 */ 3472 public static boolean isWhitespace(int ch) 3473 { 3474 // exclude no-break spaces 3475 // if props == 0, it will just fall through and return false 3476 return ((1 << getType(ch)) & 3477 ((1 << UCharacterCategory.SPACE_SEPARATOR) 3478 | (1 << UCharacterCategory.LINE_SEPARATOR) 3479 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 3480 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 3481 // TAB VT LF FF CR FS GS RS US NL are all control characters 3482 // that are white spaces. 3483 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 3484 } 3485 3486 /** 3487 * Determines if the specified code point is a Unicode specified space 3488 * character, i.e. if code point is in the category Zs, Zl and Zp. 3489 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 3490 * @param ch code point to determine if it is a space 3491 * @return true if the specified code point is a space character 3492 */ 3493 public static boolean isSpaceChar(int ch) 3494 { 3495 // if props == 0, it will just fall through and return false 3496 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 3497 | (1 << UCharacterCategory.LINE_SEPARATOR) 3498 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 3499 != 0; 3500 } 3501 3502 /** 3503 * Determines if the specified code point is a titlecase character. 3504 * UnicodeData only contains case mappings for code points where they are 3505 * one-to-one mappings; it also omits information about context-sensitive 3506 * case mappings.<br> 3507 * For more information about Unicode case mapping please refer to the 3508 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3509 * Technical report #21</a>.<br> 3510 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 3511 * @param ch code point to determine if it is in title case 3512 * @return true if the specified code point is a titlecase character 3513 */ 3514 public static boolean isTitleCase(int ch) 3515 { 3516 // if props == 0, it will just fall through and return false 3517 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 3518 } 3519 3520 /** 3521 * Determines if the specified code point may be any part of a Unicode 3522 * identifier other than the starting character. 3523 * A code point may be part of a Unicode identifier if and only if it is 3524 * one of the following: 3525 * <ul> 3526 * <li> Lu Uppercase letter 3527 * <li> Ll Lowercase letter 3528 * <li> Lt Titlecase letter 3529 * <li> Lm Modifier letter 3530 * <li> Lo Other letter 3531 * <li> Nl Letter number 3532 * <li> Pc Connecting punctuation character 3533 * <li> Nd decimal number 3534 * <li> Mc Spacing combining mark 3535 * <li> Mn Non-spacing mark 3536 * <li> Cf formatting code 3537 * </ul> 3538 * Up-to-date Unicode implementation of 3539 * java.lang.Character.isUnicodeIdentifierPart().<br> 3540 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3541 * @param ch code point to determine if is can be part of a Unicode 3542 * identifier 3543 * @return true if code point is any character belonging a unicode 3544 * identifier suffix after the first character 3545 */ 3546 public static boolean isUnicodeIdentifierPart(int ch) 3547 { 3548 // if props == 0, it will just fall through and return false 3549 // cat == format 3550 return ((1 << getType(ch)) 3551 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3552 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3553 | (1 << UCharacterCategory.TITLECASE_LETTER) 3554 | (1 << UCharacterCategory.MODIFIER_LETTER) 3555 | (1 << UCharacterCategory.OTHER_LETTER) 3556 | (1 << UCharacterCategory.LETTER_NUMBER) 3557 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 3558 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 3559 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 3560 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 3561 || isIdentifierIgnorable(ch); 3562 } 3563 3564 /** 3565 * Determines if the specified code point is permissible as the first 3566 * character in a Unicode identifier. 3567 * A code point may start a Unicode identifier if it is of type either 3568 * <ul> 3569 * <li> Lu Uppercase letter 3570 * <li> Ll Lowercase letter 3571 * <li> Lt Titlecase letter 3572 * <li> Lm Modifier letter 3573 * <li> Lo Other letter 3574 * <li> Nl Letter number 3575 * </ul> 3576 * Up-to-date Unicode implementation of 3577 * java.lang.Character.isUnicodeIdentifierStart().<br> 3578 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3579 * @param ch code point to determine if it can start a Unicode identifier 3580 * @return true if code point is the first character belonging a unicode 3581 * identifier 3582 */ 3583 public static boolean isUnicodeIdentifierStart(int ch) 3584 { 3585 /*int cat = getType(ch);*/ 3586 // if props == 0, it will just fall through and return false 3587 return ((1 << getType(ch)) 3588 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3589 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3590 | (1 << UCharacterCategory.TITLECASE_LETTER) 3591 | (1 << UCharacterCategory.MODIFIER_LETTER) 3592 | (1 << UCharacterCategory.OTHER_LETTER) 3593 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 3594 } 3595 3596 /** 3597 * Determines if the specified code point should be regarded as an 3598 * ignorable character in a Java identifier. 3599 * A character is Java-identifier-ignorable if it has the general category 3600 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 3601 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 3602 * Up-to-date Unicode implementation of 3603 * java.lang.Character.isIdentifierIgnorable().<br> 3604 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3605 * <p>Note that Unicode just recommends to ignore Cf (format controls). 3606 * @param ch code point to be determined if it can be ignored in a Unicode 3607 * identifier. 3608 * @return true if the code point is ignorable 3609 */ 3610 public static boolean isIdentifierIgnorable(int ch) 3611 { 3612 // see java.lang.Character.isIdentifierIgnorable() on range of 3613 // ignorable characters. 3614 if (ch <= 0x9f) { 3615 return isISOControl(ch) 3616 && !((ch >= 0x9 && ch <= 0xd) 3617 || (ch >= 0x1c && ch <= 0x1f)); 3618 } 3619 return getType(ch) == UCharacterCategory.FORMAT; 3620 } 3621 3622 /** 3623 * Determines if the specified code point is an uppercase character. 3624 * UnicodeData only contains case mappings for code point where they are 3625 * one-to-one mappings; it also omits information about context-sensitive 3626 * case mappings.<br> 3627 * For language specific case conversion behavior, use 3628 * toUpperCase(locale, str). <br> 3629 * For example, the case conversion for dot-less i and dotted I in Turkish, 3630 * or for final sigma in Greek. 3631 * For more information about Unicode case mapping please refer to the 3632 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3633 * Technical report #21</a>.<br> 3634 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 3635 * @param ch code point to determine if it is in uppercase 3636 * @return true if the code point is an uppercase character 3637 */ 3638 public static boolean isUpperCase(int ch) 3639 { 3640 // if props == 0, it will just fall through and return false 3641 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 3642 } 3643 3644 /** 3645 * The given code point is mapped to its lowercase equivalent; if the code 3646 * point has no lowercase equivalent, the code point itself is returned. 3647 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 3648 * 3649 * <p>This function only returns the simple, single-code point case mapping. 3650 * Full case mappings should be used whenever possible because they produce 3651 * better results by working on whole strings. 3652 * They take into account the string context and the language and can map 3653 * to a result string with a different length as appropriate. 3654 * Full case mappings are applied by the case mapping functions 3655 * that take String parameters rather than code points (int). 3656 * See also the User Guide chapter on C/POSIX migration: 3657 * http://www.icu-project.org/userguide/posix.html#case_mappings 3658 * 3659 * @param ch code point whose lowercase equivalent is to be retrieved 3660 * @return the lowercase equivalent code point 3661 */ 3662 public static int toLowerCase(int ch) { 3663 return UCaseProps.INSTANCE.tolower(ch); 3664 } 3665 3666 /** 3667 * Converts argument code point and returns a String object representing 3668 * the code point's value in UTF-16 format. 3669 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 3670 * 3671 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 3672 * 3673 * @param ch code point 3674 * @return string representation of the code point, null if code point is not 3675 * defined in unicode 3676 */ 3677 public static String toString(int ch) 3678 { 3679 if (ch < MIN_VALUE || ch > MAX_VALUE) { 3680 return null; 3681 } 3682 3683 if (ch < SUPPLEMENTARY_MIN_VALUE) { 3684 return String.valueOf((char)ch); 3685 } 3686 3687 return new String(Character.toChars(ch)); 3688 } 3689 3690 /** 3691 * Converts the code point argument to titlecase. 3692 * If no titlecase is available, the uppercase is returned. If no uppercase 3693 * is available, the code point itself is returned. 3694 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 3695 * 3696 * <p>This function only returns the simple, single-code point case mapping. 3697 * Full case mappings should be used whenever possible because they produce 3698 * better results by working on whole strings. 3699 * They take into account the string context and the language and can map 3700 * to a result string with a different length as appropriate. 3701 * Full case mappings are applied by the case mapping functions 3702 * that take String parameters rather than code points (int). 3703 * See also the User Guide chapter on C/POSIX migration: 3704 * http://www.icu-project.org/userguide/posix.html#case_mappings 3705 * 3706 * @param ch code point whose title case is to be retrieved 3707 * @return titlecase code point 3708 */ 3709 public static int toTitleCase(int ch) { 3710 return UCaseProps.INSTANCE.totitle(ch); 3711 } 3712 3713 /** 3714 * Converts the character argument to uppercase. 3715 * If no uppercase is available, the character itself is returned. 3716 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 3717 * 3718 * <p>This function only returns the simple, single-code point case mapping. 3719 * Full case mappings should be used whenever possible because they produce 3720 * better results by working on whole strings. 3721 * They take into account the string context and the language and can map 3722 * to a result string with a different length as appropriate. 3723 * Full case mappings are applied by the case mapping functions 3724 * that take String parameters rather than code points (int). 3725 * See also the User Guide chapter on C/POSIX migration: 3726 * http://www.icu-project.org/userguide/posix.html#case_mappings 3727 * 3728 * @param ch code point whose uppercase is to be retrieved 3729 * @return uppercase code point 3730 */ 3731 public static int toUpperCase(int ch) { 3732 return UCaseProps.INSTANCE.toupper(ch); 3733 } 3734 3735 // extra methods not in java.lang.Character -------------------------- 3736 3737 /** 3738 * <strong>[icu]</strong> Determines if the code point is a supplementary character. 3739 * A code point is a supplementary character if and only if it is greater 3740 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 3741 * @param ch code point to be determined if it is in the supplementary 3742 * plane 3743 * @return true if code point is a supplementary character 3744 */ 3745 public static boolean isSupplementary(int ch) 3746 { 3747 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 3748 ch <= UCharacter.MAX_VALUE; 3749 } 3750 3751 /** 3752 * <strong>[icu]</strong> Determines if the code point is in the BMP plane. 3753 * @param ch code point to be determined if it is not a supplementary 3754 * character 3755 * @return true if code point is not a supplementary character 3756 */ 3757 public static boolean isBMP(int ch) 3758 { 3759 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 3760 } 3761 3762 /** 3763 * <strong>[icu]</strong> Determines whether the specified code point is a printable character 3764 * according to the Unicode standard. 3765 * @param ch code point to be determined if it is printable 3766 * @return true if the code point is a printable character 3767 */ 3768 public static boolean isPrintable(int ch) 3769 { 3770 int cat = getType(ch); 3771 // if props == 0, it will just fall through and return false 3772 return (cat != UCharacterCategory.UNASSIGNED && 3773 cat != UCharacterCategory.CONTROL && 3774 cat != UCharacterCategory.FORMAT && 3775 cat != UCharacterCategory.PRIVATE_USE && 3776 cat != UCharacterCategory.SURROGATE && 3777 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 3778 } 3779 3780 /** 3781 * <strong>[icu]</strong> Determines whether the specified code point is of base form. 3782 * A code point of base form does not graphically combine with preceding 3783 * characters, and is neither a control nor a format character. 3784 * @param ch code point to be determined if it is of base form 3785 * @return true if the code point is of base form 3786 */ 3787 public static boolean isBaseForm(int ch) 3788 { 3789 int cat = getType(ch); 3790 // if props == 0, it will just fall through and return false 3791 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 3792 cat == UCharacterCategory.OTHER_NUMBER || 3793 cat == UCharacterCategory.LETTER_NUMBER || 3794 cat == UCharacterCategory.UPPERCASE_LETTER || 3795 cat == UCharacterCategory.LOWERCASE_LETTER || 3796 cat == UCharacterCategory.TITLECASE_LETTER || 3797 cat == UCharacterCategory.MODIFIER_LETTER || 3798 cat == UCharacterCategory.OTHER_LETTER || 3799 cat == UCharacterCategory.NON_SPACING_MARK || 3800 cat == UCharacterCategory.ENCLOSING_MARK || 3801 cat == UCharacterCategory.COMBINING_SPACING_MARK; 3802 } 3803 3804 /** 3805 * <strong>[icu]</strong> Returns the Bidirection property of a code point. 3806 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 3807 * property.<br> 3808 * Result returned belongs to the interface 3809 * <a href=UCharacterDirection.html>UCharacterDirection</a> 3810 * @param ch the code point to be determined its direction 3811 * @return direction constant from UCharacterDirection. 3812 */ 3813 public static int getDirection(int ch) 3814 { 3815 return UBiDiProps.INSTANCE.getClass(ch); 3816 } 3817 3818 /** 3819 * Determines whether the code point has the "mirrored" property. 3820 * This property is set for characters that are commonly used in 3821 * Right-To-Left contexts and need to be displayed with a "mirrored" 3822 * glyph. 3823 * @param ch code point whose mirror is to be determined 3824 * @return true if the code point has the "mirrored" property 3825 */ 3826 public static boolean isMirrored(int ch) 3827 { 3828 return UBiDiProps.INSTANCE.isMirrored(ch); 3829 } 3830 3831 /** 3832 * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point. 3833 * For code points with the "mirrored" property, implementations sometimes 3834 * need a "poor man's" mapping to another code point such that the default 3835 * glyph may serve as the mirror-image of the default glyph of the 3836 * specified code point.<br> 3837 * This is useful for text conversion to and from codepages with visual 3838 * order, and for displays without glyph selection capabilities. 3839 * @param ch code point whose mirror is to be retrieved 3840 * @return another code point that may serve as a mirror-image substitute, 3841 * or ch itself if there is no such mapping or ch does not have the 3842 * "mirrored" property 3843 */ 3844 public static int getMirror(int ch) 3845 { 3846 return UBiDiProps.INSTANCE.getMirror(ch); 3847 } 3848 3849 /** 3850 * <strong>[icu]</strong> Maps the specified character to its paired bracket character. 3851 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 3852 * Otherwise c itself is returned. 3853 * See http://www.unicode.org/reports/tr9/ 3854 * 3855 * @param c the code point to be mapped 3856 * @return the paired bracket code point, 3857 * or c itself if there is no such mapping 3858 * (Bidi_Paired_Bracket_Type=None) 3859 * 3860 * @see UProperty#BIDI_PAIRED_BRACKET 3861 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3862 * @see #getMirror(int) 3863 */ 3864 public static int getBidiPairedBracket(int c) { 3865 return UBiDiProps.INSTANCE.getPairedBracket(c); 3866 } 3867 3868 /** 3869 * <strong>[icu]</strong> Returns the combining class of the argument codepoint 3870 * @param ch code point whose combining is to be retrieved 3871 * @return the combining class of the codepoint 3872 */ 3873 public static int getCombiningClass(int ch) 3874 { 3875 return Normalizer2.getNFDInstance().getCombiningClass(ch); 3876 } 3877 3878 /** 3879 * <strong>[icu]</strong> A code point is illegal if and only if 3880 * <ul> 3881 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 3882 * <li> A surrogate value, 0xD800 to 0xDFFF 3883 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 3884 * </ul> 3885 * Note: legal does not mean that it is assigned in this version of Unicode. 3886 * @param ch code point to determine if it is a legal code point by itself 3887 * @return true if and only if legal. 3888 */ 3889 public static boolean isLegal(int ch) 3890 { 3891 if (ch < MIN_VALUE) { 3892 return false; 3893 } 3894 if (ch < Character.MIN_SURROGATE) { 3895 return true; 3896 } 3897 if (ch <= Character.MAX_SURROGATE) { 3898 return false; 3899 } 3900 if (UCharacterUtility.isNonCharacter(ch)) { 3901 return false; 3902 } 3903 return (ch <= MAX_VALUE); 3904 } 3905 3906 /** 3907 * <strong>[icu]</strong> A string is legal iff all its code points are legal. 3908 * A code point is illegal if and only if 3909 * <ul> 3910 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 3911 * <li> A surrogate value, 0xD800 to 0xDFFF 3912 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 3913 * </ul> 3914 * Note: legal does not mean that it is assigned in this version of Unicode. 3915 * @param str containing code points to examin 3916 * @return true if and only if legal. 3917 */ 3918 public static boolean isLegal(String str) 3919 { 3920 int size = str.length(); 3921 int codepoint; 3922 for (int i = 0; i < size; i += Character.charCount(codepoint)) 3923 { 3924 codepoint = str.codePointAt(i); 3925 if (!isLegal(codepoint)) { 3926 return false; 3927 } 3928 } 3929 return true; 3930 } 3931 3932 /** 3933 * <strong>[icu]</strong> Returns the version of Unicode data used. 3934 * @return the unicode version number used 3935 */ 3936 public static VersionInfo getUnicodeVersion() 3937 { 3938 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 3939 } 3940 3941 /** 3942 * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or 3943 * null if the character is unassigned or outside the range 3944 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 3945 * <br> 3946 * Note calling any methods related to code point names, e.g. get*Name*() 3947 * incurs a one-time initialisation cost to construct the name tables. 3948 * @param ch the code point for which to get the name 3949 * @return most current Unicode name 3950 */ 3951 public static String getName(int ch) 3952 { 3953 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 3954 } 3955 3956 /** 3957 * <strong>[icu]</strong> Returns the names for each of the characters in a string 3958 * @param s string to format 3959 * @param separator string to go between names 3960 * @return string of names 3961 */ 3962 public static String getName(String s, String separator) { 3963 if (s.length() == 1) { // handle common case 3964 return getName(s.charAt(0)); 3965 } 3966 int cp; 3967 StringBuilder sb = new StringBuilder(); 3968 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 3969 cp = s.codePointAt(i); 3970 if (i != 0) sb.append(separator); 3971 sb.append(UCharacter.getName(cp)); 3972 } 3973 return sb.toString(); 3974 } 3975 3976 /** 3977 * <strong>[icu]</strong> Returns null. 3978 * Used to return the Unicode_1_Name property value which was of little practical value. 3979 * @param ch the code point for which to get the name 3980 * @return null 3981 * @deprecated ICU 49 3982 * @hide original deprecated declaration 3983 */ 3984 @Deprecated 3985 public static String getName1_0(int ch) 3986 { 3987 return null; 3988 } 3989 3990 /** 3991 * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and 3992 * getName1_0(int), this method will return a name even for codepoints that 3993 * are not assigned a name in UnicodeData.txt. 3994 * 3995 * <p>The names are returned in the following order. 3996 * <ul> 3997 * <li> Most current Unicode name if there is any 3998 * <li> Unicode 1.0 name if there is any 3999 * <li> Extended name in the form of 4000 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 4001 * </ul> 4002 * Note calling any methods related to code point names, e.g. get*Name*() 4003 * incurs a one-time initialisation cost to construct the name tables. 4004 * @param ch the code point for which to get the name 4005 * @return a name for the argument codepoint 4006 */ 4007 public static String getExtendedName(int ch) { 4008 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 4009 } 4010 4011 /** 4012 * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one. 4013 * Returns null if the character is unassigned or outside the range 4014 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4015 * <br> 4016 * Note calling any methods related to code point names, e.g. get*Name*() 4017 * incurs a one-time initialisation cost to construct the name tables. 4018 * @param ch the code point for which to get the name alias 4019 * @return Unicode name alias, or null 4020 */ 4021 public static String getNameAlias(int ch) 4022 { 4023 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 4024 } 4025 4026 /** 4027 * <strong>[icu]</strong> Returns null. 4028 * Used to return the ISO 10646 comment for a character. 4029 * The Unicode ISO_Comment property is deprecated and has no values. 4030 * 4031 * @param ch The code point for which to get the ISO comment. 4032 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 4033 * @return null 4034 * @deprecated ICU 49 4035 * @hide original deprecated declaration 4036 */ 4037 @Deprecated 4038 public static String getISOComment(int ch) 4039 { 4040 return null; 4041 } 4042 4043 /** 4044 * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and 4045 * return its code point value. All Unicode names are in uppercase. 4046 * Note calling any methods related to code point names, e.g. get*Name*() 4047 * incurs a one-time initialisation cost to construct the name tables. 4048 * @param name most current Unicode character name whose code point is to 4049 * be returned 4050 * @return code point or -1 if name is not found 4051 */ 4052 public static int getCharFromName(String name){ 4053 return UCharacterName.INSTANCE.getCharFromName( 4054 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 4055 } 4056 4057 /** 4058 * <strong>[icu]</strong> Returns -1. 4059 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 4060 * its code point value. 4061 * @param name Unicode 1.0 code point name whose code point is to be 4062 * returned 4063 * @return -1 4064 * @deprecated ICU 49 4065 * @see #getName1_0(int) 4066 * @hide original deprecated declaration 4067 */ 4068 @Deprecated 4069 public static int getCharFromName1_0(String name){ 4070 return -1; 4071 } 4072 4073 /** 4074 * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code 4075 * point value. All Unicode names are in uppercase. 4076 * Extended names are all lowercase except for numbers and are contained 4077 * within angle brackets. 4078 * The names are searched in the following order 4079 * <ul> 4080 * <li> Most current Unicode name if there is any 4081 * <li> Unicode 1.0 name if there is any 4082 * <li> Extended name in the form of 4083 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 4084 * </ul> 4085 * Note calling any methods related to code point names, e.g. get*Name*() 4086 * incurs a one-time initialisation cost to construct the name tables. 4087 * @param name codepoint name 4088 * @return code point associated with the name or -1 if the name is not 4089 * found. 4090 */ 4091 public static int getCharFromExtendedName(String name){ 4092 return UCharacterName.INSTANCE.getCharFromName( 4093 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 4094 } 4095 4096 /** 4097 * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return 4098 * its code point value. All Unicode names are in uppercase. 4099 * Note calling any methods related to code point names, e.g. get*Name*() 4100 * incurs a one-time initialisation cost to construct the name tables. 4101 * @param name Unicode name alias whose code point is to be returned 4102 * @return code point or -1 if name is not found 4103 */ 4104 public static int getCharFromNameAlias(String name){ 4105 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 4106 } 4107 4108 /** 4109 * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the 4110 * Unicode database file PropertyAliases.txt. Most properties 4111 * have more than one name. The nameChoice determines which one 4112 * is returned. 4113 * 4114 * In addition, this function maps the property 4115 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 4116 * "General_Category_Mask". These names are not in 4117 * PropertyAliases.txt. 4118 * 4119 * @param property UProperty selector. 4120 * 4121 * @param nameChoice UProperty.NameChoice selector for which name 4122 * to get. All properties have a long name. Most have a short 4123 * name, but some do not. Unicode allows for additional names; if 4124 * present these will be returned by UProperty.NameChoice.LONG + i, 4125 * where i=1, 2,... 4126 * 4127 * @return a name, or null if Unicode explicitly defines no name 4128 * ("n/a") for a given property/nameChoice. If a given nameChoice 4129 * throws an exception, then all larger values of nameChoice will 4130 * throw an exception. If null is returned for a given 4131 * nameChoice, then other nameChoice values may return non-null 4132 * results. 4133 * 4134 * @exception IllegalArgumentException thrown if property or 4135 * nameChoice are invalid. 4136 * 4137 * @see UProperty 4138 * @see UProperty.NameChoice 4139 */ 4140 public static String getPropertyName(int property, 4141 int nameChoice) { 4142 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 4143 } 4144 4145 /** 4146 * <strong>[icu]</strong> Return the UProperty selector for a given property name, as 4147 * specified in the Unicode database file PropertyAliases.txt. 4148 * Short, long, and any other variants are recognized. 4149 * 4150 * In addition, this function maps the synthetic names "gcm" / 4151 * "General_Category_Mask" to the property 4152 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 4153 * PropertyAliases.txt. 4154 * 4155 * @param propertyAlias the property name to be matched. The name 4156 * is compared using "loose matching" as described in 4157 * PropertyAliases.txt. 4158 * 4159 * @return a UProperty enum. 4160 * 4161 * @exception IllegalArgumentException thrown if propertyAlias 4162 * is not recognized. 4163 * 4164 * @see UProperty 4165 */ 4166 public static int getPropertyEnum(CharSequence propertyAlias) { 4167 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 4168 if (propEnum == UProperty.UNDEFINED) { 4169 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 4170 } 4171 return propEnum; 4172 } 4173 4174 /** 4175 * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in 4176 * the Unicode database file PropertyValueAliases.txt. Most 4177 * values have more than one name. The nameChoice determines 4178 * which one is returned. 4179 * 4180 * Note: Some of the names in PropertyValueAliases.txt can only be 4181 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 4182 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4183 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4184 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4185 * 4186 * @param property UProperty selector constant. 4187 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4188 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4189 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4190 * If out of range, null is returned. 4191 * 4192 * @param value selector for a value for the given property. In 4193 * general, valid values range from 0 up to some maximum. There 4194 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 4195 * non-zero value BASIC_LATIN.getID(). (2.) 4196 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 4197 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 4198 * are mask values produced by left-shifting 1 by 4199 * UCharacter.getType(). This allows grouped categories such as 4200 * [:L:] to be represented. Mask values are non-contiguous. 4201 * 4202 * @param nameChoice UProperty.NameChoice selector for which name 4203 * to get. All values have a long name. Most have a short name, 4204 * but some do not. Unicode allows for additional names; if 4205 * present these will be returned by UProperty.NameChoice.LONG + i, 4206 * where i=1, 2,... 4207 * 4208 * @return a name, or null if Unicode explicitly defines no name 4209 * ("n/a") for a given property/value/nameChoice. If a given 4210 * nameChoice throws an exception, then all larger values of 4211 * nameChoice will throw an exception. If null is returned for a 4212 * given nameChoice, then other nameChoice values may return 4213 * non-null results. 4214 * 4215 * @exception IllegalArgumentException thrown if property, value, 4216 * or nameChoice are invalid. 4217 * 4218 * @see UProperty 4219 * @see UProperty.NameChoice 4220 */ 4221 public static String getPropertyValueName(int property, 4222 int value, 4223 int nameChoice) 4224 { 4225 if ((property == UProperty.CANONICAL_COMBINING_CLASS 4226 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 4227 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 4228 && value >= UCharacter.getIntPropertyMinValue( 4229 UProperty.CANONICAL_COMBINING_CLASS) 4230 && value <= UCharacter.getIntPropertyMaxValue( 4231 UProperty.CANONICAL_COMBINING_CLASS) 4232 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 4233 // this is hard coded for the valid cc 4234 // because PropertyValueAliases.txt does not contain all of them 4235 try { 4236 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 4237 nameChoice); 4238 } 4239 catch (IllegalArgumentException e) { 4240 return null; 4241 } 4242 } 4243 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 4244 } 4245 4246 /** 4247 * <strong>[icu]</strong> Return the property value integer for a given value name, as 4248 * specified in the Unicode database file PropertyValueAliases.txt. 4249 * Short, long, and any other variants are recognized. 4250 * 4251 * Note: Some of the names in PropertyValueAliases.txt will only be 4252 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 4253 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4254 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4255 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4256 * 4257 * @param property UProperty selector constant. 4258 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4259 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4260 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4261 * Only these properties can be enumerated. 4262 * 4263 * @param valueAlias the value name to be matched. The name is 4264 * compared using "loose matching" as described in 4265 * PropertyValueAliases.txt. 4266 * 4267 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 4268 * values are mask values produced by left-shifting 1 by 4269 * UCharacter.getType(). This allows grouped categories such as 4270 * [:L:] to be represented. 4271 * 4272 * @see UProperty 4273 * @throws IllegalArgumentException if property is not a valid UProperty 4274 * selector or valueAlias is not a value of this property 4275 */ 4276 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 4277 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 4278 if (propEnum == UProperty.UNDEFINED) { 4279 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 4280 } 4281 return propEnum; 4282 } 4283 4284 /** 4285 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 4286 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 4287 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 4288 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 4289 * @deprecated This API is ICU internal only. 4290 * @hide original deprecated declaration 4291 * @hide draft / provisional / internal are hidden on Android 4292 */ 4293 @Deprecated 4294 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 4295 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 4296 } 4297 4298 4299 /** 4300 * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units. 4301 * 4302 * @param lead the lead char 4303 * @param trail the trail char 4304 * @return code point if surrogate characters are valid. 4305 * @exception IllegalArgumentException thrown when the code units do 4306 * not form a valid code point 4307 */ 4308 public static int getCodePoint(char lead, char trail) 4309 { 4310 if (Character.isSurrogatePair(lead, trail)) { 4311 return Character.toCodePoint(lead, trail); 4312 } 4313 throw new IllegalArgumentException("Illegal surrogate characters"); 4314 } 4315 4316 /** 4317 * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point. 4318 * 4319 * @param char16 the BMP code point 4320 * @return code point if argument is a valid character. 4321 * @exception IllegalArgumentException thrown when char16 is not a valid 4322 * code point 4323 */ 4324 public static int getCodePoint(char char16) 4325 { 4326 if (UCharacter.isLegal(char16)) { 4327 return char16; 4328 } 4329 throw new IllegalArgumentException("Illegal codepoint"); 4330 } 4331 4332 /** 4333 * Returns the uppercase version of the argument string. 4334 * Casing is dependent on the default locale and context-sensitive. 4335 * @param str source string to be performed on 4336 * @return uppercase version of the argument string 4337 */ 4338 public static String toUpperCase(String str) 4339 { 4340 return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str); 4341 } 4342 4343 /** 4344 * Returns the lowercase version of the argument string. 4345 * Casing is dependent on the default locale and context-sensitive 4346 * @param str source string to be performed on 4347 * @return lowercase version of the argument string 4348 */ 4349 public static String toLowerCase(String str) 4350 { 4351 return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str); 4352 } 4353 4354 /** 4355 * <p>Returns the titlecase version of the argument string. 4356 * <p>Position for titlecasing is determined by the argument break 4357 * iterator, hence the user can customize his break iterator for 4358 * a specialized titlecasing. In this case only the forward iteration 4359 * needs to be implemented. 4360 * If the break iterator passed in is null, the default Unicode algorithm 4361 * will be used to determine the titlecase positions. 4362 * 4363 * <p>Only positions returned by the break iterator will be title cased, 4364 * character in between the positions will all be in lower case. 4365 * <p>Casing is dependent on the default locale and context-sensitive 4366 * @param str source string to be performed on 4367 * @param breakiter break iterator to determine the positions in which 4368 * the character should be title cased. 4369 * @return titlecase version of the argument string 4370 */ 4371 public static String toTitleCase(String str, BreakIterator breakiter) 4372 { 4373 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 4374 } 4375 4376 private static int getDefaultCaseLocale() { 4377 return UCaseProps.getCaseLocale(Locale.getDefault()); 4378 } 4379 4380 private static int getCaseLocale(Locale locale) { 4381 if (locale == null) { 4382 locale = Locale.getDefault(); 4383 } 4384 return UCaseProps.getCaseLocale(locale); 4385 } 4386 4387 private static int getCaseLocale(ULocale locale) { 4388 if (locale == null) { 4389 locale = ULocale.getDefault(); 4390 } 4391 return UCaseProps.getCaseLocale(locale); 4392 } 4393 4394 /** 4395 * Returns the uppercase version of the argument string. 4396 * Casing is dependent on the argument locale and context-sensitive. 4397 * @param locale which string is to be converted in 4398 * @param str source string to be performed on 4399 * @return uppercase version of the argument string 4400 */ 4401 public static String toUpperCase(Locale locale, String str) 4402 { 4403 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 4404 } 4405 4406 /** 4407 * Returns the uppercase version of the argument string. 4408 * Casing is dependent on the argument locale and context-sensitive. 4409 * @param locale which string is to be converted in 4410 * @param str source string to be performed on 4411 * @return uppercase version of the argument string 4412 */ 4413 public static String toUpperCase(ULocale locale, String str) { 4414 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 4415 } 4416 4417 /** 4418 * Returns the lowercase version of the argument string. 4419 * Casing is dependent on the argument locale and context-sensitive 4420 * @param locale which string is to be converted in 4421 * @param str source string to be performed on 4422 * @return lowercase version of the argument string 4423 */ 4424 public static String toLowerCase(Locale locale, String str) 4425 { 4426 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 4427 } 4428 4429 /** 4430 * Returns the lowercase version of the argument string. 4431 * Casing is dependent on the argument locale and context-sensitive 4432 * @param locale which string is to be converted in 4433 * @param str source string to be performed on 4434 * @return lowercase version of the argument string 4435 */ 4436 public static String toLowerCase(ULocale locale, String str) { 4437 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 4438 } 4439 4440 /** 4441 * <p>Returns the titlecase version of the argument string. 4442 * <p>Position for titlecasing is determined by the argument break 4443 * iterator, hence the user can customize his break iterator for 4444 * a specialized titlecasing. In this case only the forward iteration 4445 * needs to be implemented. 4446 * If the break iterator passed in is null, the default Unicode algorithm 4447 * will be used to determine the titlecase positions. 4448 * 4449 * <p>Only positions returned by the break iterator will be title cased, 4450 * character in between the positions will all be in lower case. 4451 * <p>Casing is dependent on the argument locale and context-sensitive 4452 * @param locale which string is to be converted in 4453 * @param str source string to be performed on 4454 * @param breakiter break iterator to determine the positions in which 4455 * the character should be title cased. 4456 * @return titlecase version of the argument string 4457 */ 4458 public static String toTitleCase(Locale locale, String str, 4459 BreakIterator breakiter) 4460 { 4461 return toTitleCase(locale, str, breakiter, 0); 4462 } 4463 4464 /** 4465 * <p>Returns the titlecase version of the argument string. 4466 * <p>Position for titlecasing is determined by the argument break 4467 * iterator, hence the user can customize his break iterator for 4468 * a specialized titlecasing. In this case only the forward iteration 4469 * needs to be implemented. 4470 * If the break iterator passed in is null, the default Unicode algorithm 4471 * will be used to determine the titlecase positions. 4472 * 4473 * <p>Only positions returned by the break iterator will be title cased, 4474 * character in between the positions will all be in lower case. 4475 * <p>Casing is dependent on the argument locale and context-sensitive 4476 * @param locale which string is to be converted in 4477 * @param str source string to be performed on 4478 * @param titleIter break iterator to determine the positions in which 4479 * the character should be title cased. 4480 * @return titlecase version of the argument string 4481 */ 4482 public static String toTitleCase(ULocale locale, String str, 4483 BreakIterator titleIter) { 4484 return toTitleCase(locale, str, titleIter, 0); 4485 } 4486 4487 /** 4488 * <p>Returns the titlecase version of the argument string. 4489 * <p>Position for titlecasing is determined by the argument break 4490 * iterator, hence the user can customize his break iterator for 4491 * a specialized titlecasing. In this case only the forward iteration 4492 * needs to be implemented. 4493 * If the break iterator passed in is null, the default Unicode algorithm 4494 * will be used to determine the titlecase positions. 4495 * 4496 * <p>Only positions returned by the break iterator will be title cased, 4497 * character in between the positions will all be in lower case. 4498 * <p>Casing is dependent on the argument locale and context-sensitive 4499 * @param locale which string is to be converted in 4500 * @param str source string to be performed on 4501 * @param titleIter break iterator to determine the positions in which 4502 * the character should be title cased. 4503 * @param options bit set to modify the titlecasing operation 4504 * @return titlecase version of the argument string 4505 * @see #TITLECASE_NO_LOWERCASE 4506 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 4507 */ 4508 public static String toTitleCase(ULocale locale, String str, 4509 BreakIterator titleIter, int options) { 4510 if (titleIter == null && locale == null) { 4511 locale = ULocale.getDefault(); 4512 } 4513 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 4514 titleIter.setText(str); 4515 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 4516 } 4517 4518 /** 4519 * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string, 4520 * and sometimes has no effect at all; the original string is returned whenever casing 4521 * would not be appropriate for the first word (such as for CJK characters or initial numbers). 4522 * Initial non-letters are skipped in order to find the character to change. 4523 * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE. 4524 * <p>Examples: 4525 * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr> 4526 * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr> 4527 * <tr><td>contact us</td><td>Contact us</td></tr> 4528 * <tr><td>49ers win!</td><td>49ers win!</td></tr> 4529 * <tr><td>(abc)</td><td>(abc)</td></tr> 4530 * <tr><td>ijs</td><td>Ijs</td></tr> 4531 * <tr><td>ijs</td><td>IJs</td><td>nl-BE</td></tr> 4532 * <tr><td>ijs</td><td>js</td><td>tr-DE</td></tr> 4533 * </table> 4534 * @param locale the locale for accessing exceptional behavior (eg for tr). 4535 * @param str the source string to change 4536 * @return the modified string, or the original if no modifications were necessary. 4537 * @deprecated ICU internal only 4538 * @hide original deprecated declaration 4539 * @hide draft / provisional / internal are hidden on Android 4540 */ 4541 @Deprecated 4542 public static String toTitleFirst(ULocale locale, String str) { 4543 // TODO: Remove this function. Inline it where it is called in CLDR. 4544 return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, str); 4545 } 4546 4547 private static final android.icu.text.CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE = 4548 android.icu.text.CaseMap.toTitle().wholeString().noLowercase(); 4549 4550 /** 4551 * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string. 4552 * <p>Position for titlecasing is determined by the argument break 4553 * iterator, hence the user can customize his break iterator for 4554 * a specialized titlecasing. In this case only the forward iteration 4555 * needs to be implemented. 4556 * If the break iterator passed in is null, the default Unicode algorithm 4557 * will be used to determine the titlecase positions. 4558 * 4559 * <p>Only positions returned by the break iterator will be title cased, 4560 * character in between the positions will all be in lower case. 4561 * <p>Casing is dependent on the argument locale and context-sensitive 4562 * @param locale which string is to be converted in 4563 * @param str source string to be performed on 4564 * @param titleIter break iterator to determine the positions in which 4565 * the character should be title cased. 4566 * @param options bit set to modify the titlecasing operation 4567 * @return titlecase version of the argument string 4568 * @see #TITLECASE_NO_LOWERCASE 4569 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 4570 */ 4571 public static String toTitleCase(Locale locale, String str, 4572 BreakIterator titleIter, 4573 int options) { 4574 if (titleIter == null && locale == null) { 4575 locale = Locale.getDefault(); 4576 } 4577 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 4578 titleIter.setText(str); 4579 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 4580 } 4581 4582 /** 4583 * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according 4584 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 4585 * folding equivalent, the character itself is returned. 4586 * 4587 * <p>This function only returns the simple, single-code point case mapping. 4588 * Full case mappings should be used whenever possible because they produce 4589 * better results by working on whole strings. 4590 * They can map to a result string with a different length as appropriate. 4591 * Full case mappings are applied by the case mapping functions 4592 * that take String parameters rather than code points (int). 4593 * See also the User Guide chapter on C/POSIX migration: 4594 * http://www.icu-project.org/userguide/posix.html#case_mappings 4595 * 4596 * @param ch the character to be converted 4597 * @param defaultmapping Indicates whether the default mappings defined in 4598 * CaseFolding.txt are to be used, otherwise the 4599 * mappings for dotted I and dotless i marked with 4600 * 'T' in CaseFolding.txt are included. 4601 * @return the case folding equivalent of the character, if 4602 * any; otherwise the character itself. 4603 * @see #foldCase(String, boolean) 4604 */ 4605 public static int foldCase(int ch, boolean defaultmapping) { 4606 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 4607 } 4608 4609 /** 4610 * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to 4611 * UnicodeData.txt and CaseFolding.txt; if any character has no case 4612 * folding equivalent, the character itself is returned. 4613 * "Full", multiple-code point case folding mappings are returned here. 4614 * For "simple" single-code point mappings use the API 4615 * foldCase(int ch, boolean defaultmapping). 4616 * @param str the String to be converted 4617 * @param defaultmapping Indicates whether the default mappings defined in 4618 * CaseFolding.txt are to be used, otherwise the 4619 * mappings for dotted I and dotless i marked with 4620 * 'T' in CaseFolding.txt are included. 4621 * @return the case folding equivalent of the character, if 4622 * any; otherwise the character itself. 4623 * @see #foldCase(int, boolean) 4624 */ 4625 public static String foldCase(String str, boolean defaultmapping) { 4626 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 4627 } 4628 4629 /** 4630 * <strong>[icu]</strong> Option value for case folding: use default mappings defined in 4631 * CaseFolding.txt. 4632 */ 4633 public static final int FOLD_CASE_DEFAULT = 0x0000; 4634 /** 4635 * <strong>[icu]</strong> Option value for case folding: 4636 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 4637 * and dotless i appropriately for Turkic languages (tr, az). 4638 * 4639 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 4640 * are to be included for default mappings and 4641 * excluded for the Turkic-specific mappings. 4642 * 4643 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 4644 * are to be excluded for default mappings and 4645 * included for the Turkic-specific mappings. 4646 */ 4647 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 4648 4649 /** 4650 * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according 4651 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 4652 * folding equivalent, the character itself is returned. 4653 * 4654 * <p>This function only returns the simple, single-code point case mapping. 4655 * Full case mappings should be used whenever possible because they produce 4656 * better results by working on whole strings. 4657 * They can map to a result string with a different length as appropriate. 4658 * Full case mappings are applied by the case mapping functions 4659 * that take String parameters rather than code points (int). 4660 * See also the User Guide chapter on C/POSIX migration: 4661 * http://www.icu-project.org/userguide/posix.html#case_mappings 4662 * 4663 * @param ch the character to be converted 4664 * @param options A bit set for special processing. Currently the recognised options 4665 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 4666 * @return the case folding equivalent of the character, if any; otherwise the 4667 * character itself. 4668 * @see #foldCase(String, boolean) 4669 */ 4670 public static int foldCase(int ch, int options) { 4671 return UCaseProps.INSTANCE.fold(ch, options); 4672 } 4673 4674 /** 4675 * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to 4676 * UnicodeData.txt and CaseFolding.txt; if any character has no case 4677 * folding equivalent, the character itself is returned. 4678 * "Full", multiple-code point case folding mappings are returned here. 4679 * For "simple" single-code point mappings use the API 4680 * foldCase(int ch, boolean defaultmapping). 4681 * @param str the String to be converted 4682 * @param options A bit set for special processing. Currently the recognised options 4683 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 4684 * @return the case folding equivalent of the character, if any; otherwise the 4685 * character itself. 4686 * @see #foldCase(int, boolean) 4687 */ 4688 public static final String foldCase(String str, int options) { 4689 return CaseMapImpl.fold(options, str); 4690 } 4691 4692 /** 4693 * <strong>[icu]</strong> Returns the numeric value of a Han character. 4694 * 4695 * <p>This returns the value of Han 'numeric' code points, 4696 * including those for zero, ten, hundred, thousand, ten thousand, 4697 * and hundred million. 4698 * This includes both the standard and 'checkwriting' 4699 * characters, the 'big circle' zero character, and the standard 4700 * zero character. 4701 * 4702 * <p>Note: The Unicode Standard has numeric values for more 4703 * Han characters recognized by this method 4704 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 4705 * and a {@link android.icu.text.NumberFormat} can be used with 4706 * a Chinese {@link android.icu.text.NumberingSystem}. 4707 * 4708 * @param ch code point to query 4709 * @return value if it is a Han 'numeric character,' otherwise return -1. 4710 */ 4711 public static int getHanNumericValue(int ch) 4712 { 4713 switch(ch) 4714 { 4715 case IDEOGRAPHIC_NUMBER_ZERO_ : 4716 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 4717 return 0; // Han Zero 4718 case CJK_IDEOGRAPH_FIRST_ : 4719 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 4720 return 1; // Han One 4721 case CJK_IDEOGRAPH_SECOND_ : 4722 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 4723 return 2; // Han Two 4724 case CJK_IDEOGRAPH_THIRD_ : 4725 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 4726 return 3; // Han Three 4727 case CJK_IDEOGRAPH_FOURTH_ : 4728 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 4729 return 4; // Han Four 4730 case CJK_IDEOGRAPH_FIFTH_ : 4731 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 4732 return 5; // Han Five 4733 case CJK_IDEOGRAPH_SIXTH_ : 4734 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 4735 return 6; // Han Six 4736 case CJK_IDEOGRAPH_SEVENTH_ : 4737 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 4738 return 7; // Han Seven 4739 case CJK_IDEOGRAPH_EIGHTH_ : 4740 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 4741 return 8; // Han Eight 4742 case CJK_IDEOGRAPH_NINETH_ : 4743 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 4744 return 9; // Han Nine 4745 case CJK_IDEOGRAPH_TEN_ : 4746 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 4747 return 10; 4748 case CJK_IDEOGRAPH_HUNDRED_ : 4749 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 4750 return 100; 4751 case CJK_IDEOGRAPH_THOUSAND_ : 4752 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 4753 return 1000; 4754 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 4755 return 10000; 4756 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 4757 return 100000000; 4758 } 4759 return -1; // no value 4760 } 4761 4762 /** 4763 * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints. 4764 * <p>Example of use:<br> 4765 * <pre> 4766 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 4767 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 4768 * while (iterator.next(element)) { 4769 * System.out.println("Codepoint \\u" + 4770 * Integer.toHexString(element.start) + 4771 * " to codepoint \\u" + 4772 * Integer.toHexString(element.limit - 1) + 4773 * " has the character type " + 4774 * element.value); 4775 * } 4776 * </pre> 4777 * @return an iterator 4778 */ 4779 public static RangeValueIterator getTypeIterator() 4780 { 4781 return new UCharacterTypeIterator(); 4782 } 4783 4784 private static final class UCharacterTypeIterator implements RangeValueIterator { 4785 UCharacterTypeIterator() { 4786 reset(); 4787 } 4788 4789 // implements RangeValueIterator 4790 @Override 4791 public boolean next(Element element) { 4792 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 4793 element.start=range.startCodePoint; 4794 element.limit=range.endCodePoint+1; 4795 element.value=range.value; 4796 return true; 4797 } else { 4798 return false; 4799 } 4800 } 4801 4802 // implements RangeValueIterator 4803 @Override 4804 public void reset() { 4805 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 4806 } 4807 4808 private Iterator<Trie2.Range> trieIterator; 4809 private Trie2.Range range; 4810 4811 private static final class MaskType implements Trie2.ValueMapper { 4812 // Extracts the general category ("character type") from the trie value. 4813 @Override 4814 public int map(int value) { 4815 return value & UCharacterProperty.TYPE_MASK; 4816 } 4817 } 4818 private static final MaskType MASK_TYPE=new MaskType(); 4819 } 4820 4821 /** 4822 * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints. 4823 * <p>This API only gets the iterator for the modern, most up-to-date 4824 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 4825 * for extended names use getExtendedNameIterator(). 4826 * <p>Example of use:<br> 4827 * <pre> 4828 * ValueIterator iterator = UCharacter.getNameIterator(); 4829 * ValueIterator.Element element = new ValueIterator.Element(); 4830 * while (iterator.next(element)) { 4831 * System.out.println("Codepoint \\u" + 4832 * Integer.toHexString(element.codepoint) + 4833 * " has the name " + (String)element.value); 4834 * } 4835 * </pre> 4836 * <p>The maximal range which the name iterator iterates is from 4837 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 4838 * @return an iterator 4839 */ 4840 public static ValueIterator getNameIterator(){ 4841 return new UCharacterNameIterator(UCharacterName.INSTANCE, 4842 UCharacterNameChoice.UNICODE_CHAR_NAME); 4843 } 4844 4845 /** 4846 * <strong>[icu]</strong> Returns an empty iterator. 4847 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 4848 * @return an empty iterator 4849 * @deprecated ICU 49 4850 * @see #getName1_0(int) 4851 * @hide original deprecated declaration 4852 */ 4853 @Deprecated 4854 public static ValueIterator getName1_0Iterator(){ 4855 return new DummyValueIterator(); 4856 } 4857 4858 private static final class DummyValueIterator implements ValueIterator { 4859 @Override 4860 public boolean next(Element element) { return false; } 4861 @Override 4862 public void reset() {} 4863 @Override 4864 public void setRange(int start, int limit) {} 4865 } 4866 4867 /** 4868 * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints. 4869 * <p>This API only gets the iterator for the extended names. 4870 * For modern, most up-to-date Unicode names use getNameIterator() or 4871 * for older 1.0 Unicode names use get1_0NameIterator(). 4872 * <p>Example of use:<br> 4873 * <pre> 4874 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 4875 * ValueIterator.Element element = new ValueIterator.Element(); 4876 * while (iterator.next(element)) { 4877 * System.out.println("Codepoint \\u" + 4878 * Integer.toHexString(element.codepoint) + 4879 * " has the name " + (String)element.value); 4880 * } 4881 * </pre> 4882 * <p>The maximal range which the name iterator iterates is from 4883 * @return an iterator 4884 */ 4885 public static ValueIterator getExtendedNameIterator(){ 4886 return new UCharacterNameIterator(UCharacterName.INSTANCE, 4887 UCharacterNameChoice.EXTENDED_CHAR_NAME); 4888 } 4889 4890 /** 4891 * <strong>[icu]</strong> Returns the "age" of the code point. 4892 * <p>The "age" is the Unicode version when the code point was first 4893 * designated (as a non-character or for Private Use) or assigned a 4894 * character. 4895 * <p>This can be useful to avoid emitting code points to receiving 4896 * processes that do not accept newer characters. 4897 * <p>The data is from the UCD file DerivedAge.txt. 4898 * @param ch The code point. 4899 * @return the Unicode version number 4900 */ 4901 public static VersionInfo getAge(int ch) 4902 { 4903 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4904 throw new IllegalArgumentException("Codepoint out of bounds"); 4905 } 4906 return UCharacterProperty.INSTANCE.getAge(ch); 4907 } 4908 4909 /** 4910 * <strong>[icu]</strong> <p>Check a binary Unicode property for a code point. 4911 * <p>Unicode, especially in version 3.2, defines many more properties 4912 * than the original set in UnicodeData.txt. 4913 * <p>This API is intended to reflect Unicode properties as defined in 4914 * the Unicode Character Database (UCD) and Unicode Technical Reports 4915 * (UTR). 4916 * <p>For details about the properties see 4917 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 4918 * <p>For names of Unicode properties see the UCD file 4919 * PropertyAliases.txt. 4920 * <p>This API does not check the validity of the codepoint. 4921 * <p>Important: If ICU is built with UCD files from Unicode versions 4922 * below 3.2, then properties marked with "new" are not or 4923 * not fully available. 4924 * @param ch code point to test. 4925 * @param property selector constant from android.icu.lang.UProperty, 4926 * identifies which binary property to check. 4927 * @return true or false according to the binary Unicode property value 4928 * for ch. Also false if property is out of bounds or if the 4929 * Unicode version does not have data for the property at all, or 4930 * not for this code point. 4931 * @see android.icu.lang.UProperty 4932 */ 4933 public static boolean hasBinaryProperty(int ch, int property) 4934 { 4935 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 4936 } 4937 4938 /** 4939 * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property. 4940 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 4941 * <p>Different from UCharacter.isLetter(ch)! 4942 * @param ch codepoint to be tested 4943 */ 4944 public static boolean isUAlphabetic(int ch) 4945 { 4946 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 4947 } 4948 4949 /** 4950 * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property. 4951 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 4952 * <p>This is different from UCharacter.isLowerCase(ch)! 4953 * @param ch codepoint to be tested 4954 */ 4955 public static boolean isULowercase(int ch) 4956 { 4957 return hasBinaryProperty(ch, UProperty.LOWERCASE); 4958 } 4959 4960 /** 4961 * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property. 4962 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 4963 * <p>This is different from UCharacter.isUpperCase(ch)! 4964 * @param ch codepoint to be tested 4965 */ 4966 public static boolean isUUppercase(int ch) 4967 { 4968 return hasBinaryProperty(ch, UProperty.UPPERCASE); 4969 } 4970 4971 /** 4972 * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property. 4973 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 4974 * <p>This is different from both UCharacter.isSpace(ch) and 4975 * UCharacter.isWhitespace(ch)! 4976 * @param ch codepoint to be tested 4977 */ 4978 public static boolean isUWhiteSpace(int ch) 4979 { 4980 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 4981 } 4982 4983 /** 4984 * <strong>[icu]</strong> <p>Returns the property value for an Unicode property type of a code point. 4985 * Also returns binary and mask property values. 4986 * <p>Unicode, especially in version 3.2, defines many more properties than 4987 * the original set in UnicodeData.txt. 4988 * <p>The properties APIs are intended to reflect Unicode properties as 4989 * defined in the Unicode Character Database (UCD) and Unicode Technical 4990 * Reports (UTR). For details about the properties see 4991 * http://www.unicode.org/. 4992 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 4993 * 4994 * <pre> 4995 * Sample usage: 4996 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 4997 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 4998 * boolean b = (ideo == 1) ? true : false; 4999 * </pre> 5000 * @param ch code point to test. 5001 * @param type UProperty selector constant, identifies which binary 5002 * property to check. Must be 5003 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5004 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5005 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5006 * @return numeric value that is directly the property value or, 5007 * for enumerated properties, corresponds to the numeric value of 5008 * the enumerated constant of the respective property value 5009 * enumeration type (cast to enum type if necessary). 5010 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5011 * Returns a bit-mask for mask properties. 5012 * Returns 0 if 'type' is out of bounds or if the Unicode version 5013 * does not have data for the property at all, or not for this code 5014 * point. 5015 * @see UProperty 5016 * @see #hasBinaryProperty 5017 * @see #getIntPropertyMinValue 5018 * @see #getIntPropertyMaxValue 5019 * @see #getUnicodeVersion 5020 */ 5021 public static int getIntPropertyValue(int ch, int type) 5022 { 5023 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5024 } 5025 /** 5026 * <strong>[icu]</strong> Returns a string version of the property value. 5027 * @param propertyEnum The property enum value. 5028 * @param codepoint The codepoint value. 5029 * @param nameChoice The choice of the name. 5030 * @return value as string 5031 * @deprecated This API is ICU internal only. 5032 * @hide original deprecated declaration 5033 * @hide draft / provisional / internal are hidden on Android 5034 */ 5035 @Deprecated 5036 ///CLOVER:OFF 5037 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5038 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5039 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5040 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5041 nameChoice); 5042 } 5043 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5044 return String.valueOf(getUnicodeNumericValue(codepoint)); 5045 } 5046 // otherwise must be string property 5047 switch (propertyEnum) { 5048 case UProperty.AGE: return getAge(codepoint).toString(); 5049 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5050 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 5051 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 5052 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5053 case UProperty.NAME: return getName(codepoint); 5054 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 5055 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5056 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5057 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5058 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5059 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5060 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5061 } 5062 throw new IllegalArgumentException("Illegal Property Enum"); 5063 } 5064 ///CLOVER:ON 5065 5066 /** 5067 * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type. 5068 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5069 * to allocate arrays of android.icu.text.UnicodeSet or similar. 5070 * @param type UProperty selector constant, identifies which binary 5071 * property to check. Must be 5072 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5073 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5074 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5075 * for a Unicode property. 0 if the property 5076 * selector 'type' is out of range. 5077 * @see UProperty 5078 * @see #hasBinaryProperty 5079 * @see #getUnicodeVersion 5080 * @see #getIntPropertyMaxValue 5081 * @see #getIntPropertyValue 5082 */ 5083 public static int getIntPropertyMinValue(int type){ 5084 5085 return 0; // undefined; and: all other properties have a minimum value of 0 5086 } 5087 5088 5089 /** 5090 * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property. 5091 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5092 * to allocate arrays of android.icu.text.UnicodeSet or similar. 5093 * Examples for min/max values (for Unicode 3.2): 5094 * <ul> 5095 * <li> UProperty.BIDI_CLASS: 0/18 5096 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5097 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5098 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5099 * </ul> 5100 * For undefined UProperty constant values, min/max values will be 0/-1. 5101 * @param type UProperty selector constant, identifies which binary 5102 * property to check. Must be 5103 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5104 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5105 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 5106 * property. <= 0 if the property selector 'type' is out of range. 5107 * @see UProperty 5108 * @see #hasBinaryProperty 5109 * @see #getUnicodeVersion 5110 * @see #getIntPropertyMaxValue 5111 * @see #getIntPropertyValue 5112 */ 5113 public static int getIntPropertyMaxValue(int type) 5114 { 5115 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 5116 } 5117 5118 /** 5119 * Provide the java.lang.Character forDigit API, for convenience. 5120 */ 5121 public static char forDigit(int digit, int radix) { 5122 return java.lang.Character.forDigit(digit, radix); 5123 } 5124 5125 // JDK 1.5 API coverage 5126 5127 /** 5128 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 5129 */ 5130 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 5131 5132 /** 5133 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 5134 */ 5135 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 5136 5137 /** 5138 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 5139 */ 5140 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 5141 5142 /** 5143 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 5144 */ 5145 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 5146 5147 /** 5148 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 5149 */ 5150 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 5151 5152 /** 5153 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 5154 */ 5155 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 5156 5157 /** 5158 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 5159 */ 5160 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 5161 5162 /** 5163 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 5164 */ 5165 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 5166 5167 /** 5168 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 5169 */ 5170 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 5171 5172 /** 5173 * Equivalent to {@link Character#isValidCodePoint}. 5174 * 5175 * @param cp the code point to check 5176 * @return true if cp is a valid code point 5177 */ 5178 public static final boolean isValidCodePoint(int cp) { 5179 return cp >= 0 && cp <= MAX_CODE_POINT; 5180 } 5181 5182 /** 5183 * Same as {@link Character#isSupplementaryCodePoint}. 5184 * 5185 * @param cp the code point to check 5186 * @return true if cp is a supplementary code point 5187 */ 5188 public static final boolean isSupplementaryCodePoint(int cp) { 5189 return Character.isSupplementaryCodePoint(cp); 5190 } 5191 5192 /** 5193 * Same as {@link Character#isHighSurrogate}. 5194 * 5195 * @param ch the char to check 5196 * @return true if ch is a high (lead) surrogate 5197 */ 5198 public static boolean isHighSurrogate(char ch) { 5199 return Character.isHighSurrogate(ch); 5200 } 5201 5202 /** 5203 * Same as {@link Character#isLowSurrogate}. 5204 * 5205 * @param ch the char to check 5206 * @return true if ch is a low (trail) surrogate 5207 */ 5208 public static boolean isLowSurrogate(char ch) { 5209 return Character.isLowSurrogate(ch); 5210 } 5211 5212 /** 5213 * Same as {@link Character#isSurrogatePair}. 5214 * 5215 * @param high the high (lead) char 5216 * @param low the low (trail) char 5217 * @return true if high, low form a surrogate pair 5218 */ 5219 public static final boolean isSurrogatePair(char high, char low) { 5220 return Character.isSurrogatePair(high, low); 5221 } 5222 5223 /** 5224 * Same as {@link Character#charCount}. 5225 * Returns the number of chars needed to represent the code point (1 or 2). 5226 * This does not check the code point for validity. 5227 * 5228 * @param cp the code point to check 5229 * @return the number of chars needed to represent the code point 5230 */ 5231 public static int charCount(int cp) { 5232 return Character.charCount(cp); 5233 } 5234 5235 /** 5236 * Same as {@link Character#toCodePoint}. 5237 * Returns the code point represented by the two surrogate code units. 5238 * This does not check the surrogate pair for validity. 5239 * 5240 * @param high the high (lead) surrogate 5241 * @param low the low (trail) surrogate 5242 * @return the code point formed by the surrogate pair 5243 */ 5244 public static final int toCodePoint(char high, char low) { 5245 return Character.toCodePoint(high, low); 5246 } 5247 5248 /** 5249 * Same as {@link Character#codePointAt(CharSequence, int)}. 5250 * Returns the code point at index. 5251 * This examines only the characters at index and index+1. 5252 * 5253 * @param seq the characters to check 5254 * @param index the index of the first or only char forming the code point 5255 * @return the code point at the index 5256 */ 5257 public static final int codePointAt(CharSequence seq, int index) { 5258 char c1 = seq.charAt(index++); 5259 if (isHighSurrogate(c1)) { 5260 if (index < seq.length()) { 5261 char c2 = seq.charAt(index); 5262 if (isLowSurrogate(c2)) { 5263 return toCodePoint(c1, c2); 5264 } 5265 } 5266 } 5267 return c1; 5268 } 5269 5270 /** 5271 * Same as {@link Character#codePointAt(char[], int)}. 5272 * Returns the code point at index. 5273 * This examines only the characters at index and index+1. 5274 * 5275 * @param text the characters to check 5276 * @param index the index of the first or only char forming the code point 5277 * @return the code point at the index 5278 */ 5279 public static final int codePointAt(char[] text, int index) { 5280 char c1 = text[index++]; 5281 if (isHighSurrogate(c1)) { 5282 if (index < text.length) { 5283 char c2 = text[index]; 5284 if (isLowSurrogate(c2)) { 5285 return toCodePoint(c1, c2); 5286 } 5287 } 5288 } 5289 return c1; 5290 } 5291 5292 /** 5293 * Same as {@link Character#codePointAt(char[], int, int)}. 5294 * Returns the code point at index. 5295 * This examines only the characters at index and index+1. 5296 * 5297 * @param text the characters to check 5298 * @param index the index of the first or only char forming the code point 5299 * @param limit the limit of the valid text 5300 * @return the code point at the index 5301 */ 5302 public static final int codePointAt(char[] text, int index, int limit) { 5303 if (index >= limit || limit > text.length) { 5304 throw new IndexOutOfBoundsException(); 5305 } 5306 char c1 = text[index++]; 5307 if (isHighSurrogate(c1)) { 5308 if (index < limit) { 5309 char c2 = text[index]; 5310 if (isLowSurrogate(c2)) { 5311 return toCodePoint(c1, c2); 5312 } 5313 } 5314 } 5315 return c1; 5316 } 5317 5318 /** 5319 * Same as {@link Character#codePointBefore(CharSequence, int)}. 5320 * Return the code point before index. 5321 * This examines only the characters at index-1 and index-2. 5322 * 5323 * @param seq the characters to check 5324 * @param index the index after the last or only char forming the code point 5325 * @return the code point before the index 5326 */ 5327 public static final int codePointBefore(CharSequence seq, int index) { 5328 char c2 = seq.charAt(--index); 5329 if (isLowSurrogate(c2)) { 5330 if (index > 0) { 5331 char c1 = seq.charAt(--index); 5332 if (isHighSurrogate(c1)) { 5333 return toCodePoint(c1, c2); 5334 } 5335 } 5336 } 5337 return c2; 5338 } 5339 5340 /** 5341 * Same as {@link Character#codePointBefore(char[], int)}. 5342 * Returns the code point before index. 5343 * This examines only the characters at index-1 and index-2. 5344 * 5345 * @param text the characters to check 5346 * @param index the index after the last or only char forming the code point 5347 * @return the code point before the index 5348 */ 5349 public static final int codePointBefore(char[] text, int index) { 5350 char c2 = text[--index]; 5351 if (isLowSurrogate(c2)) { 5352 if (index > 0) { 5353 char c1 = text[--index]; 5354 if (isHighSurrogate(c1)) { 5355 return toCodePoint(c1, c2); 5356 } 5357 } 5358 } 5359 return c2; 5360 } 5361 5362 /** 5363 * Same as {@link Character#codePointBefore(char[], int, int)}. 5364 * Return the code point before index. 5365 * This examines only the characters at index-1 and index-2. 5366 * 5367 * @param text the characters to check 5368 * @param index the index after the last or only char forming the code point 5369 * @param limit the start of the valid text 5370 * @return the code point before the index 5371 */ 5372 public static final int codePointBefore(char[] text, int index, int limit) { 5373 if (index <= limit || limit < 0) { 5374 throw new IndexOutOfBoundsException(); 5375 } 5376 char c2 = text[--index]; 5377 if (isLowSurrogate(c2)) { 5378 if (index > limit) { 5379 char c1 = text[--index]; 5380 if (isHighSurrogate(c1)) { 5381 return toCodePoint(c1, c2); 5382 } 5383 } 5384 } 5385 return c2; 5386 } 5387 5388 /** 5389 * Same as {@link Character#toChars(int, char[], int)}. 5390 * Writes the chars representing the 5391 * code point into the destination at the given index. 5392 * 5393 * @param cp the code point to convert 5394 * @param dst the destination array into which to put the char(s) representing the code point 5395 * @param dstIndex the index at which to put the first (or only) char 5396 * @return the count of the number of chars written (1 or 2) 5397 * @throws IllegalArgumentException if cp is not a valid code point 5398 */ 5399 public static final int toChars(int cp, char[] dst, int dstIndex) { 5400 return Character.toChars(cp, dst, dstIndex); 5401 } 5402 5403 /** 5404 * Same as {@link Character#toChars(int)}. 5405 * Returns a char array representing the code point. 5406 * 5407 * @param cp the code point to convert 5408 * @return an array containing the char(s) representing the code point 5409 * @throws IllegalArgumentException if cp is not a valid code point 5410 */ 5411 public static final char[] toChars(int cp) { 5412 return Character.toChars(cp); 5413 } 5414 5415 /** 5416 * Equivalent to the {@link Character#getDirectionality(char)} method, for 5417 * convenience. Returns a byte representing the directionality of the 5418 * character. 5419 * 5420 * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns 5421 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 5422 * 5423 * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link 5424 * UCharacterDirection} and its interface {@link 5425 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 5426 * defined by <code>java.lang.Character</code>. 5427 * @param cp the code point to check 5428 * @return the directionality of the code point 5429 * @see #getDirection 5430 */ 5431 public static byte getDirectionality(int cp) 5432 { 5433 return (byte)getDirection(cp); 5434 } 5435 5436 /** 5437 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 5438 * method, for convenience. Counts the number of code points in the range 5439 * of text. 5440 * @param text the characters to check 5441 * @param start the start of the range 5442 * @param limit the limit of the range 5443 * @return the number of code points in the range 5444 */ 5445 public static int codePointCount(CharSequence text, int start, int limit) { 5446 if (start < 0 || limit < start || limit > text.length()) { 5447 throw new IndexOutOfBoundsException("start (" + start + 5448 ") or limit (" + limit + 5449 ") invalid or out of range 0, " + text.length()); 5450 } 5451 5452 int len = limit - start; 5453 while (limit > start) { 5454 char ch = text.charAt(--limit); 5455 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 5456 ch = text.charAt(--limit); 5457 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 5458 --len; 5459 break; 5460 } 5461 } 5462 } 5463 return len; 5464 } 5465 5466 /** 5467 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 5468 * convenience. Counts the number of code points in the range of text. 5469 * @param text the characters to check 5470 * @param start the start of the range 5471 * @param limit the limit of the range 5472 * @return the number of code points in the range 5473 */ 5474 public static int codePointCount(char[] text, int start, int limit) { 5475 if (start < 0 || limit < start || limit > text.length) { 5476 throw new IndexOutOfBoundsException("start (" + start + 5477 ") or limit (" + limit + 5478 ") invalid or out of range 0, " + text.length); 5479 } 5480 5481 int len = limit - start; 5482 while (limit > start) { 5483 char ch = text[--limit]; 5484 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 5485 ch = text[--limit]; 5486 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 5487 --len; 5488 break; 5489 } 5490 } 5491 } 5492 return len; 5493 } 5494 5495 /** 5496 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 5497 * method, for convenience. Adjusts the char index by a code point offset. 5498 * @param text the characters to check 5499 * @param index the index to adjust 5500 * @param codePointOffset the number of code points by which to offset the index 5501 * @return the adjusted index 5502 */ 5503 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 5504 if (index < 0 || index > text.length()) { 5505 throw new IndexOutOfBoundsException("index ( " + index + 5506 ") out of range 0, " + text.length()); 5507 } 5508 5509 if (codePointOffset < 0) { 5510 while (++codePointOffset <= 0) { 5511 char ch = text.charAt(--index); 5512 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 5513 ch = text.charAt(--index); 5514 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 5515 if (++codePointOffset > 0) { 5516 return index+1; 5517 } 5518 } 5519 } 5520 } 5521 } else { 5522 int limit = text.length(); 5523 while (--codePointOffset >= 0) { 5524 char ch = text.charAt(index++); 5525 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 5526 ch = text.charAt(index++); 5527 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 5528 if (--codePointOffset < 0) { 5529 return index-1; 5530 } 5531 } 5532 } 5533 } 5534 } 5535 5536 return index; 5537 } 5538 5539 /** 5540 * Equivalent to the 5541 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 5542 * method, for convenience. Adjusts the char index by a code point offset. 5543 * @param text the characters to check 5544 * @param start the start of the range to check 5545 * @param count the length of the range to check 5546 * @param index the index to adjust 5547 * @param codePointOffset the number of code points by which to offset the index 5548 * @return the adjusted index 5549 */ 5550 public static int offsetByCodePoints(char[] text, int start, int count, int index, 5551 int codePointOffset) { 5552 int limit = start + count; 5553 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 5554 throw new IndexOutOfBoundsException("index ( " + index + 5555 ") out of range " + start + 5556 ", " + limit + 5557 " in array 0, " + text.length); 5558 } 5559 5560 if (codePointOffset < 0) { 5561 while (++codePointOffset <= 0) { 5562 char ch = text[--index]; 5563 if (index < start) { 5564 throw new IndexOutOfBoundsException("index ( " + index + 5565 ") < start (" + start + 5566 ")"); 5567 } 5568 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 5569 ch = text[--index]; 5570 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 5571 if (++codePointOffset > 0) { 5572 return index+1; 5573 } 5574 } 5575 } 5576 } 5577 } else { 5578 while (--codePointOffset >= 0) { 5579 char ch = text[index++]; 5580 if (index > limit) { 5581 throw new IndexOutOfBoundsException("index ( " + index + 5582 ") > limit (" + limit + 5583 ")"); 5584 } 5585 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 5586 ch = text[index++]; 5587 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 5588 if (--codePointOffset < 0) { 5589 return index-1; 5590 } 5591 } 5592 } 5593 } 5594 } 5595 5596 return index; 5597 } 5598 5599 // private variables ------------------------------------------------- 5600 5601 /** 5602 * To get the last character out from a data type 5603 */ 5604 private static final int LAST_CHAR_MASK_ = 0xFFFF; 5605 5606 // /** 5607 // * To get the last byte out from a data type 5608 // */ 5609 // private static final int LAST_BYTE_MASK_ = 0xFF; 5610 // 5611 // /** 5612 // * Shift 16 bits 5613 // */ 5614 // private static final int SHIFT_16_ = 16; 5615 // 5616 // /** 5617 // * Shift 24 bits 5618 // */ 5619 // private static final int SHIFT_24_ = 24; 5620 // 5621 // /** 5622 // * Decimal radix 5623 // */ 5624 // private static final int DECIMAL_RADIX_ = 10; 5625 5626 /** 5627 * No break space code point 5628 */ 5629 private static final int NO_BREAK_SPACE_ = 0xA0; 5630 5631 /** 5632 * Figure space code point 5633 */ 5634 private static final int FIGURE_SPACE_ = 0x2007; 5635 5636 /** 5637 * Narrow no break space code point 5638 */ 5639 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 5640 5641 /** 5642 * Ideographic number zero code point 5643 */ 5644 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 5645 5646 /** 5647 * CJK Ideograph, First code point 5648 */ 5649 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 5650 5651 /** 5652 * CJK Ideograph, Second code point 5653 */ 5654 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 5655 5656 /** 5657 * CJK Ideograph, Third code point 5658 */ 5659 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 5660 5661 /** 5662 * CJK Ideograph, Fourth code point 5663 */ 5664 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 5665 5666 /** 5667 * CJK Ideograph, FIFTH code point 5668 */ 5669 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 5670 5671 /** 5672 * CJK Ideograph, Sixth code point 5673 */ 5674 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 5675 5676 /** 5677 * CJK Ideograph, Seventh code point 5678 */ 5679 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 5680 5681 /** 5682 * CJK Ideograph, Eighth code point 5683 */ 5684 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 5685 5686 /** 5687 * CJK Ideograph, Nineth code point 5688 */ 5689 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 5690 5691 /** 5692 * Application Program command code point 5693 */ 5694 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 5695 5696 /** 5697 * Unit separator code point 5698 */ 5699 private static final int UNIT_SEPARATOR_ = 0x001F; 5700 5701 /** 5702 * Delete code point 5703 */ 5704 private static final int DELETE_ = 0x007F; 5705 5706 /** 5707 * Han digit characters 5708 */ 5709 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 5710 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 5711 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 5712 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 5713 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 5714 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 5715 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 5716 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 5717 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 5718 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 5719 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 5720 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 5721 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 5722 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 5723 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 5724 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 5725 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 5726 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 5727 5728 // private constructor ----------------------------------------------- 5729 ///CLOVER:OFF 5730 /** 5731 * Private constructor to prevent instantiation 5732 */ 5733 private UCharacter() 5734 { 5735 } 5736 ///CLOVER:ON 5737 } 5738