1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.lang; 11 12 import java.lang.ref.SoftReference; 13 import java.util.HashMap; 14 import java.util.Iterator; 15 import java.util.Locale; 16 import java.util.Map; 17 18 import com.ibm.icu.impl.CaseMapImpl; 19 import com.ibm.icu.impl.IllegalIcuArgumentException; 20 import com.ibm.icu.impl.Trie2; 21 import com.ibm.icu.impl.UBiDiProps; 22 import com.ibm.icu.impl.UCaseProps; 23 import com.ibm.icu.impl.UCharacterName; 24 import com.ibm.icu.impl.UCharacterNameChoice; 25 import com.ibm.icu.impl.UCharacterProperty; 26 import com.ibm.icu.impl.UCharacterUtility; 27 import com.ibm.icu.impl.UPropertyAliases; 28 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 29 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; 30 import com.ibm.icu.text.BreakIterator; 31 import com.ibm.icu.text.Edits; 32 import com.ibm.icu.text.Normalizer2; 33 import com.ibm.icu.util.RangeValueIterator; 34 import com.ibm.icu.util.ULocale; 35 import com.ibm.icu.util.ValueIterator; 36 import com.ibm.icu.util.VersionInfo; 37 38 /** 39 * {@icuenhanced java.lang.Character}.{@icu _usage_} 40 * 41 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 42 * These extensions provide support for more Unicode properties. 43 * Each ICU release supports the latest version of Unicode available at that time. 44 * 45 * <p>For some time before Java 5 added support for supplementary Unicode code points, 46 * The ICU UCharacter class and many other ICU classes already supported them. 47 * Some UCharacter methods and constants were widened slightly differently than 48 * how the Character class methods and constants were widened later. 49 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 50 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 51 * 52 * <p>Code points are represented in these API using ints. While it would be 53 * more convenient in Java to have a separate primitive datatype for them, 54 * ints suffice in the meantime. 55 * 56 * <p>To use this class please add the jar file name icu4j.jar to the 57 * class path, since it contains data files which supply the information used 58 * by this file.<br> 59 * E.g. In Windows <br> 60 * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> 61 * Otherwise, another method would be to copy the files uprops.dat and 62 * unames.icu from the icu4j source subdirectory 63 * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 64 * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>. 65 * 66 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 67 * properties, the main differences between UCharacter and Character are: 68 * <ul> 69 * <li> UCharacter is not designed to be a char wrapper and does not have 70 * APIs to which involves management of that single char.<br> 71 * These include: 72 * <ul> 73 * <li> char charValue(), 74 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 75 * </ul> 76 * <li> UCharacter does not include Character APIs that are deprecated, nor 77 * does it include the Java-specific character information, such as 78 * boolean isJavaIdentifierPart(char ch). 79 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 80 * values '10' - '35'. UCharacter also does this in digit and 81 * getNumericValue, to adhere to the java semantics of these 82 * methods. New methods unicodeDigit, and 83 * getUnicodeNumericValue do not treat the above code points 84 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 85 * </ul> 86 * <p> 87 * Further detail on differences can be determined using the program 88 * <a href= 89 * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java"> 90 * com.ibm.icu.dev.test.lang.UCharacterCompare</a> 91 * <p> 92 * In addition to Java compatibility functions, which calculate derived properties, 93 * this API provides low-level access to the Unicode Character Database. 94 * <p> 95 * Unicode assigns each code point (not just assigned character) values for 96 * many properties. 97 * Most of them are simple boolean flags, or constants from a small enumerated list. 98 * For some properties, values are strings or other relatively more complex types. 99 * <p> 100 * For more information see 101 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 102 * (http://www.unicode.org/ucd/) 103 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU 104 * User Guide chapter on Properties</a> 105 * (http://www.icu-project.org/userguide/properties.html). 106 * <p> 107 * There are also functions that provide easy migration from C/POSIX functions 108 * like isblank(). Their use is generally discouraged because the C/POSIX 109 * standards do not define their semantics beyond the ASCII range, which means 110 * that different implementations exhibit very different behavior. 111 * Instead, Unicode properties should be used directly. 112 * <p> 113 * There are also only a few, broad C/POSIX character classes, and they tend 114 * to be used for conflicting purposes. For example, the "isalpha()" class 115 * is sometimes used to determine word boundaries, while a more sophisticated 116 * approach would at least distinguish initial letters from continuation 117 * characters (the latter including combining marks). 118 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 119 * Another example: There is no "istitle()" class for titlecase characters. 120 * <p> 121 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 122 * ICU implements them according to the Standard Recommendations in 123 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 124 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 125 * <p> 126 * API access for C/POSIX character classes is as follows: 127 * <pre>{@code 128 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 129 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 130 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 131 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 132 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 133 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 134 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 135 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 136 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 137 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 138 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 139 * - cntrl: getType(c)==CONTROL 140 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 141 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 142 * <p> 143 * The C/POSIX character classes are also available in UnicodeSet patterns, 144 * using patterns like [:graph:] or \p{graph}. 145 * 146 * <p>{@icunote} There are several ICU (and Java) whitespace functions. 147 * Comparison:<ul> 148 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 149 * most of general categories "Z" (separators) + most whitespace ISO controls 150 * (including no-break spaces, but excluding IS1..IS4 and ZWSP) 151 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 152 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 153 * 154 * <p> 155 * This class is not subclassable. 156 * 157 * @author Syn Wee Quek 158 * @stable ICU 2.1 159 * @see com.ibm.icu.lang.UCharacterEnums 160 */ 161 162 public final class UCharacter implements ECharacterCategory, ECharacterDirection 163 { 164 // public inner classes ---------------------------------------------- 165 166 /** 167 * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_} 168 * 169 * A family of character subsets representing the character blocks in the 170 * Unicode specification, generated from Unicode Data file Blocks.txt. 171 * Character blocks generally define characters used for a specific script 172 * or purpose. A character is contained by at most one Unicode block. 173 * 174 * {@icunote} All fields named XXX_ID are specific to ICU. 175 * 176 * @stable ICU 2.4 177 */ 178 public static final class UnicodeBlock extends Character.Subset 179 { 180 // block id corresponding to icu4c ----------------------------------- 181 182 /** 183 * @stable ICU 2.4 184 */ 185 public static final int INVALID_CODE_ID = -1; 186 /** 187 * @stable ICU 2.4 188 */ 189 public static final int BASIC_LATIN_ID = 1; 190 /** 191 * @stable ICU 2.4 192 */ 193 public static final int LATIN_1_SUPPLEMENT_ID = 2; 194 /** 195 * @stable ICU 2.4 196 */ 197 public static final int LATIN_EXTENDED_A_ID = 3; 198 /** 199 * @stable ICU 2.4 200 */ 201 public static final int LATIN_EXTENDED_B_ID = 4; 202 /** 203 * @stable ICU 2.4 204 */ 205 public static final int IPA_EXTENSIONS_ID = 5; 206 /** 207 * @stable ICU 2.4 208 */ 209 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 210 /** 211 * @stable ICU 2.4 212 */ 213 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 214 /** 215 * Unicode 3.2 renames this block to "Greek and Coptic". 216 * @stable ICU 2.4 217 */ 218 public static final int GREEK_ID = 8; 219 /** 220 * @stable ICU 2.4 221 */ 222 public static final int CYRILLIC_ID = 9; 223 /** 224 * @stable ICU 2.4 225 */ 226 public static final int ARMENIAN_ID = 10; 227 /** 228 * @stable ICU 2.4 229 */ 230 public static final int HEBREW_ID = 11; 231 /** 232 * @stable ICU 2.4 233 */ 234 public static final int ARABIC_ID = 12; 235 /** 236 * @stable ICU 2.4 237 */ 238 public static final int SYRIAC_ID = 13; 239 /** 240 * @stable ICU 2.4 241 */ 242 public static final int THAANA_ID = 14; 243 /** 244 * @stable ICU 2.4 245 */ 246 public static final int DEVANAGARI_ID = 15; 247 /** 248 * @stable ICU 2.4 249 */ 250 public static final int BENGALI_ID = 16; 251 /** 252 * @stable ICU 2.4 253 */ 254 public static final int GURMUKHI_ID = 17; 255 /** 256 * @stable ICU 2.4 257 */ 258 public static final int GUJARATI_ID = 18; 259 /** 260 * @stable ICU 2.4 261 */ 262 public static final int ORIYA_ID = 19; 263 /** 264 * @stable ICU 2.4 265 */ 266 public static final int TAMIL_ID = 20; 267 /** 268 * @stable ICU 2.4 269 */ 270 public static final int TELUGU_ID = 21; 271 /** 272 * @stable ICU 2.4 273 */ 274 public static final int KANNADA_ID = 22; 275 /** 276 * @stable ICU 2.4 277 */ 278 public static final int MALAYALAM_ID = 23; 279 /** 280 * @stable ICU 2.4 281 */ 282 public static final int SINHALA_ID = 24; 283 /** 284 * @stable ICU 2.4 285 */ 286 public static final int THAI_ID = 25; 287 /** 288 * @stable ICU 2.4 289 */ 290 public static final int LAO_ID = 26; 291 /** 292 * @stable ICU 2.4 293 */ 294 public static final int TIBETAN_ID = 27; 295 /** 296 * @stable ICU 2.4 297 */ 298 public static final int MYANMAR_ID = 28; 299 /** 300 * @stable ICU 2.4 301 */ 302 public static final int GEORGIAN_ID = 29; 303 /** 304 * @stable ICU 2.4 305 */ 306 public static final int HANGUL_JAMO_ID = 30; 307 /** 308 * @stable ICU 2.4 309 */ 310 public static final int ETHIOPIC_ID = 31; 311 /** 312 * @stable ICU 2.4 313 */ 314 public static final int CHEROKEE_ID = 32; 315 /** 316 * @stable ICU 2.4 317 */ 318 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 319 /** 320 * @stable ICU 2.4 321 */ 322 public static final int OGHAM_ID = 34; 323 /** 324 * @stable ICU 2.4 325 */ 326 public static final int RUNIC_ID = 35; 327 /** 328 * @stable ICU 2.4 329 */ 330 public static final int KHMER_ID = 36; 331 /** 332 * @stable ICU 2.4 333 */ 334 public static final int MONGOLIAN_ID = 37; 335 /** 336 * @stable ICU 2.4 337 */ 338 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 339 /** 340 * @stable ICU 2.4 341 */ 342 public static final int GREEK_EXTENDED_ID = 39; 343 /** 344 * @stable ICU 2.4 345 */ 346 public static final int GENERAL_PUNCTUATION_ID = 40; 347 /** 348 * @stable ICU 2.4 349 */ 350 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 351 /** 352 * @stable ICU 2.4 353 */ 354 public static final int CURRENCY_SYMBOLS_ID = 42; 355 /** 356 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 357 * Symbols". 358 * @stable ICU 2.4 359 */ 360 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 361 /** 362 * @stable ICU 2.4 363 */ 364 public static final int LETTERLIKE_SYMBOLS_ID = 44; 365 /** 366 * @stable ICU 2.4 367 */ 368 public static final int NUMBER_FORMS_ID = 45; 369 /** 370 * @stable ICU 2.4 371 */ 372 public static final int ARROWS_ID = 46; 373 /** 374 * @stable ICU 2.4 375 */ 376 public static final int MATHEMATICAL_OPERATORS_ID = 47; 377 /** 378 * @stable ICU 2.4 379 */ 380 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 381 /** 382 * @stable ICU 2.4 383 */ 384 public static final int CONTROL_PICTURES_ID = 49; 385 /** 386 * @stable ICU 2.4 387 */ 388 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 389 /** 390 * @stable ICU 2.4 391 */ 392 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 393 /** 394 * @stable ICU 2.4 395 */ 396 public static final int BOX_DRAWING_ID = 52; 397 /** 398 * @stable ICU 2.4 399 */ 400 public static final int BLOCK_ELEMENTS_ID = 53; 401 /** 402 * @stable ICU 2.4 403 */ 404 public static final int GEOMETRIC_SHAPES_ID = 54; 405 /** 406 * @stable ICU 2.4 407 */ 408 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 409 /** 410 * @stable ICU 2.4 411 */ 412 public static final int DINGBATS_ID = 56; 413 /** 414 * @stable ICU 2.4 415 */ 416 public static final int BRAILLE_PATTERNS_ID = 57; 417 /** 418 * @stable ICU 2.4 419 */ 420 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 421 /** 422 * @stable ICU 2.4 423 */ 424 public static final int KANGXI_RADICALS_ID = 59; 425 /** 426 * @stable ICU 2.4 427 */ 428 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 429 /** 430 * @stable ICU 2.4 431 */ 432 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 433 /** 434 * @stable ICU 2.4 435 */ 436 public static final int HIRAGANA_ID = 62; 437 /** 438 * @stable ICU 2.4 439 */ 440 public static final int KATAKANA_ID = 63; 441 /** 442 * @stable ICU 2.4 443 */ 444 public static final int BOPOMOFO_ID = 64; 445 /** 446 * @stable ICU 2.4 447 */ 448 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 449 /** 450 * @stable ICU 2.4 451 */ 452 public static final int KANBUN_ID = 66; 453 /** 454 * @stable ICU 2.4 455 */ 456 public static final int BOPOMOFO_EXTENDED_ID = 67; 457 /** 458 * @stable ICU 2.4 459 */ 460 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 461 /** 462 * @stable ICU 2.4 463 */ 464 public static final int CJK_COMPATIBILITY_ID = 69; 465 /** 466 * @stable ICU 2.4 467 */ 468 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 469 /** 470 * @stable ICU 2.4 471 */ 472 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 473 /** 474 * @stable ICU 2.4 475 */ 476 public static final int YI_SYLLABLES_ID = 72; 477 /** 478 * @stable ICU 2.4 479 */ 480 public static final int YI_RADICALS_ID = 73; 481 /** 482 * @stable ICU 2.4 483 */ 484 public static final int HANGUL_SYLLABLES_ID = 74; 485 /** 486 * @stable ICU 2.4 487 */ 488 public static final int HIGH_SURROGATES_ID = 75; 489 /** 490 * @stable ICU 2.4 491 */ 492 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 493 /** 494 * @stable ICU 2.4 495 */ 496 public static final int LOW_SURROGATES_ID = 77; 497 /** 498 * Same as public static final int PRIVATE_USE. 499 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 500 * and multiple code point ranges had this block. 501 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 502 * and adds separate blocks for the supplementary PUAs. 503 * @stable ICU 2.4 504 */ 505 public static final int PRIVATE_USE_AREA_ID = 78; 506 /** 507 * Same as public static final int PRIVATE_USE_AREA. 508 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 509 * and multiple code point ranges had this block. 510 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 511 * and adds separate blocks for the supplementary PUAs. 512 * @stable ICU 2.4 513 */ 514 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 515 /** 516 * @stable ICU 2.4 517 */ 518 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 519 /** 520 * @stable ICU 2.4 521 */ 522 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 523 /** 524 * @stable ICU 2.4 525 */ 526 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 527 /** 528 * @stable ICU 2.4 529 */ 530 public static final int COMBINING_HALF_MARKS_ID = 82; 531 /** 532 * @stable ICU 2.4 533 */ 534 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 535 /** 536 * @stable ICU 2.4 537 */ 538 public static final int SMALL_FORM_VARIANTS_ID = 84; 539 /** 540 * @stable ICU 2.4 541 */ 542 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 543 /** 544 * @stable ICU 2.4 545 */ 546 public static final int SPECIALS_ID = 86; 547 /** 548 * @stable ICU 2.4 549 */ 550 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 551 /** 552 * @stable ICU 2.4 553 */ 554 public static final int OLD_ITALIC_ID = 88; 555 /** 556 * @stable ICU 2.4 557 */ 558 public static final int GOTHIC_ID = 89; 559 /** 560 * @stable ICU 2.4 561 */ 562 public static final int DESERET_ID = 90; 563 /** 564 * @stable ICU 2.4 565 */ 566 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 567 /** 568 * @stable ICU 2.4 569 */ 570 public static final int MUSICAL_SYMBOLS_ID = 92; 571 /** 572 * @stable ICU 2.4 573 */ 574 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 575 /** 576 * @stable ICU 2.4 577 */ 578 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 579 /** 580 * @stable ICU 2.4 581 */ 582 public static final int 583 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 584 /** 585 * @stable ICU 2.4 586 */ 587 public static final int TAGS_ID = 96; 588 589 // New blocks in Unicode 3.2 590 591 /** 592 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 593 * @stable ICU 2.4 594 */ 595 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 596 /** 597 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 598 * @stable ICU 3.0 599 */ 600 601 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 602 /** 603 * @stable ICU 2.4 604 */ 605 public static final int TAGALOG_ID = 98; 606 /** 607 * @stable ICU 2.4 608 */ 609 public static final int HANUNOO_ID = 99; 610 /** 611 * @stable ICU 2.4 612 */ 613 public static final int BUHID_ID = 100; 614 /** 615 * @stable ICU 2.4 616 */ 617 public static final int TAGBANWA_ID = 101; 618 /** 619 * @stable ICU 2.4 620 */ 621 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 622 /** 623 * @stable ICU 2.4 624 */ 625 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 626 /** 627 * @stable ICU 2.4 628 */ 629 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 630 /** 631 * @stable ICU 2.4 632 */ 633 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 634 /** 635 * @stable ICU 2.4 636 */ 637 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 638 /** 639 * @stable ICU 2.4 640 */ 641 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 642 /** 643 * @stable ICU 2.4 644 */ 645 public static final int VARIATION_SELECTORS_ID = 108; 646 /** 647 * @stable ICU 2.4 648 */ 649 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 650 /** 651 * @stable ICU 2.4 652 */ 653 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 654 655 /** 656 * @stable ICU 2.6 657 */ 658 public static final int LIMBU_ID = 111; /*[1900]*/ 659 /** 660 * @stable ICU 2.6 661 */ 662 public static final int TAI_LE_ID = 112; /*[1950]*/ 663 /** 664 * @stable ICU 2.6 665 */ 666 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 667 /** 668 * @stable ICU 2.6 669 */ 670 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 671 /** 672 * @stable ICU 2.6 673 */ 674 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 675 /** 676 * @stable ICU 2.6 677 */ 678 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 679 /** 680 * @stable ICU 2.6 681 */ 682 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 683 /** 684 * @stable ICU 2.6 685 */ 686 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 687 /** 688 * @stable ICU 2.6 689 */ 690 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 691 /** 692 * @stable ICU 2.6 693 */ 694 public static final int UGARITIC_ID = 120; /*[10380]*/ 695 /** 696 * @stable ICU 2.6 697 */ 698 public static final int SHAVIAN_ID = 121; /*[10450]*/ 699 /** 700 * @stable ICU 2.6 701 */ 702 public static final int OSMANYA_ID = 122; /*[10480]*/ 703 /** 704 * @stable ICU 2.6 705 */ 706 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 707 /** 708 * @stable ICU 2.6 709 */ 710 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 711 /** 712 * @stable ICU 2.6 713 */ 714 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 715 716 /* New blocks in Unicode 4.1 */ 717 718 /** 719 * @stable ICU 3.4 720 */ 721 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 722 723 /** 724 * @stable ICU 3.4 725 */ 726 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 727 728 /** 729 * @stable ICU 3.4 730 */ 731 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 732 733 /** 734 * @stable ICU 3.4 735 */ 736 public static final int BUGINESE_ID = 129; /*[1A00]*/ 737 738 /** 739 * @stable ICU 3.4 740 */ 741 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 742 743 /** 744 * @stable ICU 3.4 745 */ 746 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 747 748 /** 749 * @stable ICU 3.4 750 */ 751 public static final int COPTIC_ID = 132; /*[2C80]*/ 752 753 /** 754 * @stable ICU 3.4 755 */ 756 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 757 758 /** 759 * @stable ICU 3.4 760 */ 761 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 762 763 /** 764 * @stable ICU 3.4 765 */ 766 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 767 768 /** 769 * @stable ICU 3.4 770 */ 771 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 772 773 /** 774 * @stable ICU 3.4 775 */ 776 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 777 778 /** 779 * @stable ICU 3.4 780 */ 781 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 782 783 /** 784 * @stable ICU 3.4 785 */ 786 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 787 788 /** 789 * @stable ICU 3.4 790 */ 791 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 792 793 /** 794 * @stable ICU 3.4 795 */ 796 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 797 798 /** 799 * @stable ICU 3.4 800 */ 801 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 802 803 /** 804 * @stable ICU 3.4 805 */ 806 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 807 808 /** 809 * @stable ICU 3.4 810 */ 811 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 812 813 /** 814 * @stable ICU 3.4 815 */ 816 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 817 818 /* New blocks in Unicode 5.0 */ 819 820 /** 821 * @stable ICU 3.6 822 */ 823 public static final int NKO_ID = 146; /*[07C0]*/ 824 /** 825 * @stable ICU 3.6 826 */ 827 public static final int BALINESE_ID = 147; /*[1B00]*/ 828 /** 829 * @stable ICU 3.6 830 */ 831 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 832 /** 833 * @stable ICU 3.6 834 */ 835 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 836 /** 837 * @stable ICU 3.6 838 */ 839 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 840 /** 841 * @stable ICU 3.6 842 */ 843 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 844 /** 845 * @stable ICU 3.6 846 */ 847 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 848 /** 849 * @stable ICU 3.6 850 */ 851 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 852 /** 853 * @stable ICU 3.6 854 */ 855 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 856 857 /** 858 * @stable ICU 4.0 859 */ 860 public static final int SUNDANESE_ID = 155; /* [1B80] */ 861 862 /** 863 * @stable ICU 4.0 864 */ 865 public static final int LEPCHA_ID = 156; /* [1C00] */ 866 867 /** 868 * @stable ICU 4.0 869 */ 870 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 871 872 /** 873 * @stable ICU 4.0 874 */ 875 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 876 877 /** 878 * @stable ICU 4.0 879 */ 880 public static final int VAI_ID = 159; /* [A500] */ 881 882 /** 883 * @stable ICU 4.0 884 */ 885 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 886 887 /** 888 * @stable ICU 4.0 889 */ 890 public static final int SAURASHTRA_ID = 161; /* [A880] */ 891 892 /** 893 * @stable ICU 4.0 894 */ 895 public static final int KAYAH_LI_ID = 162; /* [A900] */ 896 897 /** 898 * @stable ICU 4.0 899 */ 900 public static final int REJANG_ID = 163; /* [A930] */ 901 902 /** 903 * @stable ICU 4.0 904 */ 905 public static final int CHAM_ID = 164; /* [AA00] */ 906 907 /** 908 * @stable ICU 4.0 909 */ 910 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 911 912 /** 913 * @stable ICU 4.0 914 */ 915 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 916 917 /** 918 * @stable ICU 4.0 919 */ 920 public static final int LYCIAN_ID = 167; /* [10280] */ 921 922 /** 923 * @stable ICU 4.0 924 */ 925 public static final int CARIAN_ID = 168; /* [102A0] */ 926 927 /** 928 * @stable ICU 4.0 929 */ 930 public static final int LYDIAN_ID = 169; /* [10920] */ 931 932 /** 933 * @stable ICU 4.0 934 */ 935 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 936 937 /** 938 * @stable ICU 4.0 939 */ 940 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 941 942 /* New blocks in Unicode 5.2 */ 943 944 /** @stable ICU 4.4 */ 945 public static final int SAMARITAN_ID = 172; /*[0800]*/ 946 /** @stable ICU 4.4 */ 947 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 948 /** @stable ICU 4.4 */ 949 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 950 /** @stable ICU 4.4 */ 951 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 952 /** @stable ICU 4.4 */ 953 public static final int LISU_ID = 176; /*[A4D0]*/ 954 /** @stable ICU 4.4 */ 955 public static final int BAMUM_ID = 177; /*[A6A0]*/ 956 /** @stable ICU 4.4 */ 957 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 958 /** @stable ICU 4.4 */ 959 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 960 /** @stable ICU 4.4 */ 961 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 962 /** @stable ICU 4.4 */ 963 public static final int JAVANESE_ID = 181; /*[A980]*/ 964 /** @stable ICU 4.4 */ 965 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 966 /** @stable ICU 4.4 */ 967 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 968 /** @stable ICU 4.4 */ 969 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 970 /** @stable ICU 4.4 */ 971 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 972 /** @stable ICU 4.4 */ 973 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 974 /** @stable ICU 4.4 */ 975 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 976 /** @stable ICU 4.4 */ 977 public static final int AVESTAN_ID = 188; /*[10B00]*/ 978 /** @stable ICU 4.4 */ 979 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 980 /** @stable ICU 4.4 */ 981 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 982 /** @stable ICU 4.4 */ 983 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 984 /** @stable ICU 4.4 */ 985 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 986 /** @stable ICU 4.4 */ 987 public static final int KAITHI_ID = 193; /*[11080]*/ 988 /** @stable ICU 4.4 */ 989 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 990 /** @stable ICU 4.4 */ 991 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 992 /** @stable ICU 4.4 */ 993 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 994 /** @stable ICU 4.4 */ 995 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 996 997 /* New blocks in Unicode 6.0 */ 998 999 /** @stable ICU 4.6 */ 1000 public static final int MANDAIC_ID = 198; /*[0840]*/ 1001 /** @stable ICU 4.6 */ 1002 public static final int BATAK_ID = 199; /*[1BC0]*/ 1003 /** @stable ICU 4.6 */ 1004 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 1005 /** @stable ICU 4.6 */ 1006 public static final int BRAHMI_ID = 201; /*[11000]*/ 1007 /** @stable ICU 4.6 */ 1008 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 1009 /** @stable ICU 4.6 */ 1010 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 1011 /** @stable ICU 4.6 */ 1012 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 1013 /** @stable ICU 4.6 */ 1014 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 1015 /** @stable ICU 4.6 */ 1016 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 1017 /** @stable ICU 4.6 */ 1018 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 1019 /** @stable ICU 4.6 */ 1020 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 1021 /** @stable ICU 4.6 */ 1022 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 1023 1024 /* New blocks in Unicode 6.1 */ 1025 1026 /** @stable ICU 49 */ 1027 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 1028 /** @stable ICU 49 */ 1029 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 1030 /** @stable ICU 49 */ 1031 public static final int CHAKMA_ID = 212; /*[11100]*/ 1032 /** @stable ICU 49 */ 1033 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 1034 /** @stable ICU 49 */ 1035 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 1036 /** @stable ICU 49 */ 1037 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 1038 /** @stable ICU 49 */ 1039 public static final int MIAO_ID = 216; /*[16F00]*/ 1040 /** @stable ICU 49 */ 1041 public static final int SHARADA_ID = 217; /*[11180]*/ 1042 /** @stable ICU 49 */ 1043 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 1044 /** @stable ICU 49 */ 1045 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 1046 /** @stable ICU 49 */ 1047 public static final int TAKRI_ID = 220; /*[11680]*/ 1048 1049 /* New blocks in Unicode 7.0 */ 1050 1051 /** @stable ICU 54 */ 1052 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 1053 /** @stable ICU 54 */ 1054 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 1055 /** @stable ICU 54 */ 1056 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 1057 /** @stable ICU 54 */ 1058 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 1059 /** @stable ICU 54 */ 1060 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 1061 /** @stable ICU 54 */ 1062 public static final int ELBASAN_ID = 226; /*[10500]*/ 1063 /** @stable ICU 54 */ 1064 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 1065 /** @stable ICU 54 */ 1066 public static final int GRANTHA_ID = 228; /*[11300]*/ 1067 /** @stable ICU 54 */ 1068 public static final int KHOJKI_ID = 229; /*[11200]*/ 1069 /** @stable ICU 54 */ 1070 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 1071 /** @stable ICU 54 */ 1072 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 1073 /** @stable ICU 54 */ 1074 public static final int LINEAR_A_ID = 232; /*[10600]*/ 1075 /** @stable ICU 54 */ 1076 public static final int MAHAJANI_ID = 233; /*[11150]*/ 1077 /** @stable ICU 54 */ 1078 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 1079 /** @stable ICU 54 */ 1080 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 1081 /** @stable ICU 54 */ 1082 public static final int MODI_ID = 236; /*[11600]*/ 1083 /** @stable ICU 54 */ 1084 public static final int MRO_ID = 237; /*[16A40]*/ 1085 /** @stable ICU 54 */ 1086 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 1087 /** @stable ICU 54 */ 1088 public static final int NABATAEAN_ID = 239; /*[10880]*/ 1089 /** @stable ICU 54 */ 1090 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 1091 /** @stable ICU 54 */ 1092 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 1093 /** @stable ICU 54 */ 1094 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 1095 /** @stable ICU 54 */ 1096 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 1097 /** @stable ICU 54 */ 1098 public static final int PALMYRENE_ID = 244; /*[10860]*/ 1099 /** @stable ICU 54 */ 1100 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 1101 /** @stable ICU 54 */ 1102 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 1103 /** @stable ICU 54 */ 1104 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 1105 /** @stable ICU 54 */ 1106 public static final int SIDDHAM_ID = 248; /*[11580]*/ 1107 /** @stable ICU 54 */ 1108 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 1109 /** @stable ICU 54 */ 1110 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 1111 /** @stable ICU 54 */ 1112 public static final int TIRHUTA_ID = 251; /*[11480]*/ 1113 /** @stable ICU 54 */ 1114 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 1115 1116 /* New blocks in Unicode 8.0 */ 1117 1118 /** @stable ICU 56 */ 1119 public static final int AHOM_ID = 253; /*[11700]*/ 1120 /** @stable ICU 56 */ 1121 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 1122 /** @stable ICU 56 */ 1123 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 1124 /** @stable ICU 56 */ 1125 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 1126 /** @stable ICU 56 */ 1127 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 1128 /** @stable ICU 56 */ 1129 public static final int HATRAN_ID = 258; /*[108E0]*/ 1130 /** @stable ICU 56 */ 1131 public static final int MULTANI_ID = 259; /*[11280]*/ 1132 /** @stable ICU 56 */ 1133 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 1134 /** @stable ICU 56 */ 1135 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 1136 /** @stable ICU 56 */ 1137 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 1138 1139 /* New blocks in Unicode 9.0 */ 1140 1141 /** @stable ICU 58 */ 1142 public static final int ADLAM_ID = 263; /*[1E900]*/ 1143 /** @stable ICU 58 */ 1144 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 1145 /** @stable ICU 58 */ 1146 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 1147 /** @stable ICU 58 */ 1148 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 1149 /** @stable ICU 58 */ 1150 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 1151 /** @stable ICU 58 */ 1152 public static final int MARCHEN_ID = 268; /*[11C70]*/ 1153 /** @stable ICU 58 */ 1154 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 1155 /** @stable ICU 58 */ 1156 public static final int NEWA_ID = 270; /*[11400]*/ 1157 /** @stable ICU 58 */ 1158 public static final int OSAGE_ID = 271; /*[104B0]*/ 1159 /** @stable ICU 58 */ 1160 public static final int TANGUT_ID = 272; /*[17000]*/ 1161 /** @stable ICU 58 */ 1162 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 1163 1164 /** 1165 * One more than the highest normal UnicodeBlock value. 1166 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 1167 * 1168 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1169 */ 1170 @Deprecated 1171 public static final int COUNT = 274; 1172 1173 // blocks objects --------------------------------------------------- 1174 1175 /** 1176 * Array of UnicodeBlocks, for easy access in getInstance(int) 1177 */ 1178 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1179 1180 /** 1181 * @stable ICU 2.6 1182 */ 1183 public static final UnicodeBlock NO_BLOCK 1184 = new UnicodeBlock("NO_BLOCK", 0); 1185 1186 /** 1187 * @stable ICU 2.4 1188 */ 1189 public static final UnicodeBlock BASIC_LATIN 1190 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1191 /** 1192 * @stable ICU 2.4 1193 */ 1194 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1195 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1196 /** 1197 * @stable ICU 2.4 1198 */ 1199 public static final UnicodeBlock LATIN_EXTENDED_A 1200 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1201 /** 1202 * @stable ICU 2.4 1203 */ 1204 public static final UnicodeBlock LATIN_EXTENDED_B 1205 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1206 /** 1207 * @stable ICU 2.4 1208 */ 1209 public static final UnicodeBlock IPA_EXTENSIONS 1210 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1211 /** 1212 * @stable ICU 2.4 1213 */ 1214 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1215 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1216 /** 1217 * @stable ICU 2.4 1218 */ 1219 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1220 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1221 /** 1222 * Unicode 3.2 renames this block to "Greek and Coptic". 1223 * @stable ICU 2.4 1224 */ 1225 public static final UnicodeBlock GREEK 1226 = new UnicodeBlock("GREEK", GREEK_ID); 1227 /** 1228 * @stable ICU 2.4 1229 */ 1230 public static final UnicodeBlock CYRILLIC 1231 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1232 /** 1233 * @stable ICU 2.4 1234 */ 1235 public static final UnicodeBlock ARMENIAN 1236 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1237 /** 1238 * @stable ICU 2.4 1239 */ 1240 public static final UnicodeBlock HEBREW 1241 = new UnicodeBlock("HEBREW", HEBREW_ID); 1242 /** 1243 * @stable ICU 2.4 1244 */ 1245 public static final UnicodeBlock ARABIC 1246 = new UnicodeBlock("ARABIC", ARABIC_ID); 1247 /** 1248 * @stable ICU 2.4 1249 */ 1250 public static final UnicodeBlock SYRIAC 1251 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1252 /** 1253 * @stable ICU 2.4 1254 */ 1255 public static final UnicodeBlock THAANA 1256 = new UnicodeBlock("THAANA", THAANA_ID); 1257 /** 1258 * @stable ICU 2.4 1259 */ 1260 public static final UnicodeBlock DEVANAGARI 1261 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1262 /** 1263 * @stable ICU 2.4 1264 */ 1265 public static final UnicodeBlock BENGALI 1266 = new UnicodeBlock("BENGALI", BENGALI_ID); 1267 /** 1268 * @stable ICU 2.4 1269 */ 1270 public static final UnicodeBlock GURMUKHI 1271 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1272 /** 1273 * @stable ICU 2.4 1274 */ 1275 public static final UnicodeBlock GUJARATI 1276 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1277 /** 1278 * @stable ICU 2.4 1279 */ 1280 public static final UnicodeBlock ORIYA 1281 = new UnicodeBlock("ORIYA", ORIYA_ID); 1282 /** 1283 * @stable ICU 2.4 1284 */ 1285 public static final UnicodeBlock TAMIL 1286 = new UnicodeBlock("TAMIL", TAMIL_ID); 1287 /** 1288 * @stable ICU 2.4 1289 */ 1290 public static final UnicodeBlock TELUGU 1291 = new UnicodeBlock("TELUGU", TELUGU_ID); 1292 /** 1293 * @stable ICU 2.4 1294 */ 1295 public static final UnicodeBlock KANNADA 1296 = new UnicodeBlock("KANNADA", KANNADA_ID); 1297 /** 1298 * @stable ICU 2.4 1299 */ 1300 public static final UnicodeBlock MALAYALAM 1301 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1302 /** 1303 * @stable ICU 2.4 1304 */ 1305 public static final UnicodeBlock SINHALA 1306 = new UnicodeBlock("SINHALA", SINHALA_ID); 1307 /** 1308 * @stable ICU 2.4 1309 */ 1310 public static final UnicodeBlock THAI 1311 = new UnicodeBlock("THAI", THAI_ID); 1312 /** 1313 * @stable ICU 2.4 1314 */ 1315 public static final UnicodeBlock LAO 1316 = new UnicodeBlock("LAO", LAO_ID); 1317 /** 1318 * @stable ICU 2.4 1319 */ 1320 public static final UnicodeBlock TIBETAN 1321 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1322 /** 1323 * @stable ICU 2.4 1324 */ 1325 public static final UnicodeBlock MYANMAR 1326 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1327 /** 1328 * @stable ICU 2.4 1329 */ 1330 public static final UnicodeBlock GEORGIAN 1331 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1332 /** 1333 * @stable ICU 2.4 1334 */ 1335 public static final UnicodeBlock HANGUL_JAMO 1336 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1337 /** 1338 * @stable ICU 2.4 1339 */ 1340 public static final UnicodeBlock ETHIOPIC 1341 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1342 /** 1343 * @stable ICU 2.4 1344 */ 1345 public static final UnicodeBlock CHEROKEE 1346 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1347 /** 1348 * @stable ICU 2.4 1349 */ 1350 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1351 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1352 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1353 /** 1354 * @stable ICU 2.4 1355 */ 1356 public static final UnicodeBlock OGHAM 1357 = new UnicodeBlock("OGHAM", OGHAM_ID); 1358 /** 1359 * @stable ICU 2.4 1360 */ 1361 public static final UnicodeBlock RUNIC 1362 = new UnicodeBlock("RUNIC", RUNIC_ID); 1363 /** 1364 * @stable ICU 2.4 1365 */ 1366 public static final UnicodeBlock KHMER 1367 = new UnicodeBlock("KHMER", KHMER_ID); 1368 /** 1369 * @stable ICU 2.4 1370 */ 1371 public static final UnicodeBlock MONGOLIAN 1372 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1373 /** 1374 * @stable ICU 2.4 1375 */ 1376 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1377 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1378 /** 1379 * @stable ICU 2.4 1380 */ 1381 public static final UnicodeBlock GREEK_EXTENDED 1382 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1383 /** 1384 * @stable ICU 2.4 1385 */ 1386 public static final UnicodeBlock GENERAL_PUNCTUATION 1387 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1388 /** 1389 * @stable ICU 2.4 1390 */ 1391 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1392 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1393 /** 1394 * @stable ICU 2.4 1395 */ 1396 public static final UnicodeBlock CURRENCY_SYMBOLS 1397 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1398 /** 1399 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1400 * Symbols". 1401 * @stable ICU 2.4 1402 */ 1403 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1404 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1405 /** 1406 * @stable ICU 2.4 1407 */ 1408 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1409 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1410 /** 1411 * @stable ICU 2.4 1412 */ 1413 public static final UnicodeBlock NUMBER_FORMS 1414 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1415 /** 1416 * @stable ICU 2.4 1417 */ 1418 public static final UnicodeBlock ARROWS 1419 = new UnicodeBlock("ARROWS", ARROWS_ID); 1420 /** 1421 * @stable ICU 2.4 1422 */ 1423 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1424 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1425 /** 1426 * @stable ICU 2.4 1427 */ 1428 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1429 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1430 /** 1431 * @stable ICU 2.4 1432 */ 1433 public static final UnicodeBlock CONTROL_PICTURES 1434 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1435 /** 1436 * @stable ICU 2.4 1437 */ 1438 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1439 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1440 /** 1441 * @stable ICU 2.4 1442 */ 1443 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1444 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1445 /** 1446 * @stable ICU 2.4 1447 */ 1448 public static final UnicodeBlock BOX_DRAWING 1449 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1450 /** 1451 * @stable ICU 2.4 1452 */ 1453 public static final UnicodeBlock BLOCK_ELEMENTS 1454 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1455 /** 1456 * @stable ICU 2.4 1457 */ 1458 public static final UnicodeBlock GEOMETRIC_SHAPES 1459 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1460 /** 1461 * @stable ICU 2.4 1462 */ 1463 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1464 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1465 /** 1466 * @stable ICU 2.4 1467 */ 1468 public static final UnicodeBlock DINGBATS 1469 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1470 /** 1471 * @stable ICU 2.4 1472 */ 1473 public static final UnicodeBlock BRAILLE_PATTERNS 1474 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1475 /** 1476 * @stable ICU 2.4 1477 */ 1478 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1479 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1480 /** 1481 * @stable ICU 2.4 1482 */ 1483 public static final UnicodeBlock KANGXI_RADICALS 1484 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1485 /** 1486 * @stable ICU 2.4 1487 */ 1488 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1489 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1490 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1491 /** 1492 * @stable ICU 2.4 1493 */ 1494 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1495 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1496 /** 1497 * @stable ICU 2.4 1498 */ 1499 public static final UnicodeBlock HIRAGANA 1500 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1501 /** 1502 * @stable ICU 2.4 1503 */ 1504 public static final UnicodeBlock KATAKANA 1505 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1506 /** 1507 * @stable ICU 2.4 1508 */ 1509 public static final UnicodeBlock BOPOMOFO 1510 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1511 /** 1512 * @stable ICU 2.4 1513 */ 1514 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1515 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1516 /** 1517 * @stable ICU 2.4 1518 */ 1519 public static final UnicodeBlock KANBUN 1520 = new UnicodeBlock("KANBUN", KANBUN_ID); 1521 /** 1522 * @stable ICU 2.4 1523 */ 1524 public static final UnicodeBlock BOPOMOFO_EXTENDED 1525 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1526 /** 1527 * @stable ICU 2.4 1528 */ 1529 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1530 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1531 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1532 /** 1533 * @stable ICU 2.4 1534 */ 1535 public static final UnicodeBlock CJK_COMPATIBILITY 1536 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1537 /** 1538 * @stable ICU 2.4 1539 */ 1540 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1541 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1542 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1543 /** 1544 * @stable ICU 2.4 1545 */ 1546 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1547 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1548 /** 1549 * @stable ICU 2.4 1550 */ 1551 public static final UnicodeBlock YI_SYLLABLES 1552 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1553 /** 1554 * @stable ICU 2.4 1555 */ 1556 public static final UnicodeBlock YI_RADICALS 1557 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1558 /** 1559 * @stable ICU 2.4 1560 */ 1561 public static final UnicodeBlock HANGUL_SYLLABLES 1562 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1563 /** 1564 * @stable ICU 2.4 1565 */ 1566 public static final UnicodeBlock HIGH_SURROGATES 1567 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1568 /** 1569 * @stable ICU 2.4 1570 */ 1571 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1572 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1573 /** 1574 * @stable ICU 2.4 1575 */ 1576 public static final UnicodeBlock LOW_SURROGATES 1577 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1578 /** 1579 * Same as public static final int PRIVATE_USE. 1580 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1581 * and multiple code point ranges had this block. 1582 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1583 * and adds separate blocks for the supplementary PUAs. 1584 * @stable ICU 2.4 1585 */ 1586 public static final UnicodeBlock PRIVATE_USE_AREA 1587 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1588 /** 1589 * Same as public static final int PRIVATE_USE_AREA. 1590 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1591 * and multiple code point ranges had this block. 1592 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1593 * and adds separate blocks for the supplementary PUAs. 1594 * @stable ICU 2.4 1595 */ 1596 public static final UnicodeBlock PRIVATE_USE 1597 = PRIVATE_USE_AREA; 1598 /** 1599 * @stable ICU 2.4 1600 */ 1601 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1602 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1603 /** 1604 * @stable ICU 2.4 1605 */ 1606 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1607 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1608 /** 1609 * @stable ICU 2.4 1610 */ 1611 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1612 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1613 /** 1614 * @stable ICU 2.4 1615 */ 1616 public static final UnicodeBlock COMBINING_HALF_MARKS 1617 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1618 /** 1619 * @stable ICU 2.4 1620 */ 1621 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1622 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1623 /** 1624 * @stable ICU 2.4 1625 */ 1626 public static final UnicodeBlock SMALL_FORM_VARIANTS 1627 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1628 /** 1629 * @stable ICU 2.4 1630 */ 1631 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1632 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1633 /** 1634 * @stable ICU 2.4 1635 */ 1636 public static final UnicodeBlock SPECIALS 1637 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1638 /** 1639 * @stable ICU 2.4 1640 */ 1641 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1642 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1643 /** 1644 * @stable ICU 2.4 1645 */ 1646 public static final UnicodeBlock OLD_ITALIC 1647 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1648 /** 1649 * @stable ICU 2.4 1650 */ 1651 public static final UnicodeBlock GOTHIC 1652 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1653 /** 1654 * @stable ICU 2.4 1655 */ 1656 public static final UnicodeBlock DESERET 1657 = new UnicodeBlock("DESERET", DESERET_ID); 1658 /** 1659 * @stable ICU 2.4 1660 */ 1661 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1662 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1663 /** 1664 * @stable ICU 2.4 1665 */ 1666 public static final UnicodeBlock MUSICAL_SYMBOLS 1667 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1668 /** 1669 * @stable ICU 2.4 1670 */ 1671 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1672 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1673 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1674 /** 1675 * @stable ICU 2.4 1676 */ 1677 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1678 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1679 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1680 /** 1681 * @stable ICU 2.4 1682 */ 1683 public static final UnicodeBlock 1684 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1685 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1686 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1687 /** 1688 * @stable ICU 2.4 1689 */ 1690 public static final UnicodeBlock TAGS 1691 = new UnicodeBlock("TAGS", TAGS_ID); 1692 1693 // New blocks in Unicode 3.2 1694 1695 /** 1696 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1697 * @stable ICU 2.4 1698 */ 1699 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1700 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1701 /** 1702 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1703 * @stable ICU 3.0 1704 */ 1705 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1706 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1707 /** 1708 * @stable ICU 2.4 1709 */ 1710 public static final UnicodeBlock TAGALOG 1711 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1712 /** 1713 * @stable ICU 2.4 1714 */ 1715 public static final UnicodeBlock HANUNOO 1716 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1717 /** 1718 * @stable ICU 2.4 1719 */ 1720 public static final UnicodeBlock BUHID 1721 = new UnicodeBlock("BUHID", BUHID_ID); 1722 /** 1723 * @stable ICU 2.4 1724 */ 1725 public static final UnicodeBlock TAGBANWA 1726 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1727 /** 1728 * @stable ICU 2.4 1729 */ 1730 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1731 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1732 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1733 /** 1734 * @stable ICU 2.4 1735 */ 1736 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1737 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1738 /** 1739 * @stable ICU 2.4 1740 */ 1741 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1742 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1743 /** 1744 * @stable ICU 2.4 1745 */ 1746 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1747 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1748 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1749 /** 1750 * @stable ICU 2.4 1751 */ 1752 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1753 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1754 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1755 /** 1756 * @stable ICU 2.4 1757 */ 1758 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1759 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1760 /** 1761 * @stable ICU 2.4 1762 */ 1763 public static final UnicodeBlock VARIATION_SELECTORS 1764 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1765 /** 1766 * @stable ICU 2.4 1767 */ 1768 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1769 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1770 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1771 /** 1772 * @stable ICU 2.4 1773 */ 1774 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1775 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1776 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1777 1778 /** 1779 * @stable ICU 2.6 1780 */ 1781 public static final UnicodeBlock LIMBU 1782 = new UnicodeBlock("LIMBU", LIMBU_ID); 1783 /** 1784 * @stable ICU 2.6 1785 */ 1786 public static final UnicodeBlock TAI_LE 1787 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1788 /** 1789 * @stable ICU 2.6 1790 */ 1791 public static final UnicodeBlock KHMER_SYMBOLS 1792 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1793 1794 /** 1795 * @stable ICU 2.6 1796 */ 1797 public static final UnicodeBlock PHONETIC_EXTENSIONS 1798 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1799 1800 /** 1801 * @stable ICU 2.6 1802 */ 1803 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1804 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1805 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1806 /** 1807 * @stable ICU 2.6 1808 */ 1809 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1810 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1811 /** 1812 * @stable ICU 2.6 1813 */ 1814 public static final UnicodeBlock LINEAR_B_SYLLABARY 1815 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1816 /** 1817 * @stable ICU 2.6 1818 */ 1819 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1820 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1821 /** 1822 * @stable ICU 2.6 1823 */ 1824 public static final UnicodeBlock AEGEAN_NUMBERS 1825 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1826 /** 1827 * @stable ICU 2.6 1828 */ 1829 public static final UnicodeBlock UGARITIC 1830 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1831 /** 1832 * @stable ICU 2.6 1833 */ 1834 public static final UnicodeBlock SHAVIAN 1835 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1836 /** 1837 * @stable ICU 2.6 1838 */ 1839 public static final UnicodeBlock OSMANYA 1840 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1841 /** 1842 * @stable ICU 2.6 1843 */ 1844 public static final UnicodeBlock CYPRIOT_SYLLABARY 1845 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1846 /** 1847 * @stable ICU 2.6 1848 */ 1849 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1850 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1851 1852 /** 1853 * @stable ICU 2.6 1854 */ 1855 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1856 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1857 1858 /* New blocks in Unicode 4.1 */ 1859 1860 /** 1861 * @stable ICU 3.4 1862 */ 1863 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1864 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1865 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1866 1867 /** 1868 * @stable ICU 3.4 1869 */ 1870 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1871 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1872 1873 /** 1874 * @stable ICU 3.4 1875 */ 1876 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1877 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1878 1879 /** 1880 * @stable ICU 3.4 1881 */ 1882 public static final UnicodeBlock BUGINESE = 1883 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1884 1885 /** 1886 * @stable ICU 3.4 1887 */ 1888 public static final UnicodeBlock CJK_STROKES = 1889 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1890 1891 /** 1892 * @stable ICU 3.4 1893 */ 1894 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1895 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1896 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1897 1898 /** 1899 * @stable ICU 3.4 1900 */ 1901 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1902 1903 /** 1904 * @stable ICU 3.4 1905 */ 1906 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1907 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1908 1909 /** 1910 * @stable ICU 3.4 1911 */ 1912 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1913 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1914 1915 /** 1916 * @stable ICU 3.4 1917 */ 1918 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1919 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 1920 1921 /** 1922 * @stable ICU 3.4 1923 */ 1924 public static final UnicodeBlock GLAGOLITIC = 1925 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 1926 1927 /** 1928 * @stable ICU 3.4 1929 */ 1930 public static final UnicodeBlock KHAROSHTHI = 1931 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 1932 1933 /** 1934 * @stable ICU 3.4 1935 */ 1936 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1937 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 1938 1939 /** 1940 * @stable ICU 3.4 1941 */ 1942 public static final UnicodeBlock NEW_TAI_LUE = 1943 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 1944 1945 /** 1946 * @stable ICU 3.4 1947 */ 1948 public static final UnicodeBlock OLD_PERSIAN = 1949 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 1950 1951 /** 1952 * @stable ICU 3.4 1953 */ 1954 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1955 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1956 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 1957 1958 /** 1959 * @stable ICU 3.4 1960 */ 1961 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1962 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 1963 1964 /** 1965 * @stable ICU 3.4 1966 */ 1967 public static final UnicodeBlock SYLOTI_NAGRI = 1968 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 1969 1970 /** 1971 * @stable ICU 3.4 1972 */ 1973 public static final UnicodeBlock TIFINAGH = 1974 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 1975 1976 /** 1977 * @stable ICU 3.4 1978 */ 1979 public static final UnicodeBlock VERTICAL_FORMS = 1980 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 1981 1982 /** 1983 * @stable ICU 3.6 1984 */ 1985 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 1986 /** 1987 * @stable ICU 3.6 1988 */ 1989 public static final UnicodeBlock BALINESE = 1990 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 1991 /** 1992 * @stable ICU 3.6 1993 */ 1994 public static final UnicodeBlock LATIN_EXTENDED_C = 1995 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 1996 /** 1997 * @stable ICU 3.6 1998 */ 1999 public static final UnicodeBlock LATIN_EXTENDED_D = 2000 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 2001 /** 2002 * @stable ICU 3.6 2003 */ 2004 public static final UnicodeBlock PHAGS_PA = 2005 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 2006 /** 2007 * @stable ICU 3.6 2008 */ 2009 public static final UnicodeBlock PHOENICIAN = 2010 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 2011 /** 2012 * @stable ICU 3.6 2013 */ 2014 public static final UnicodeBlock CUNEIFORM = 2015 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 2016 /** 2017 * @stable ICU 3.6 2018 */ 2019 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2020 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2021 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 2022 /** 2023 * @stable ICU 3.6 2024 */ 2025 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2026 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 2027 2028 /** 2029 * @stable ICU 4.0 2030 */ 2031 public static final UnicodeBlock SUNDANESE = 2032 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 2033 2034 /** 2035 * @stable ICU 4.0 2036 */ 2037 public static final UnicodeBlock LEPCHA = 2038 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 2039 2040 /** 2041 * @stable ICU 4.0 2042 */ 2043 public static final UnicodeBlock OL_CHIKI = 2044 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 2045 2046 /** 2047 * @stable ICU 4.0 2048 */ 2049 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2050 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 2051 2052 /** 2053 * @stable ICU 4.0 2054 */ 2055 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 2056 2057 /** 2058 * @stable ICU 4.0 2059 */ 2060 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2061 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 2062 2063 /** 2064 * @stable ICU 4.0 2065 */ 2066 public static final UnicodeBlock SAURASHTRA = 2067 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 2068 2069 /** 2070 * @stable ICU 4.0 2071 */ 2072 public static final UnicodeBlock KAYAH_LI = 2073 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 2074 2075 /** 2076 * @stable ICU 4.0 2077 */ 2078 public static final UnicodeBlock REJANG = 2079 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 2080 2081 /** 2082 * @stable ICU 4.0 2083 */ 2084 public static final UnicodeBlock CHAM = 2085 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 2086 2087 /** 2088 * @stable ICU 4.0 2089 */ 2090 public static final UnicodeBlock ANCIENT_SYMBOLS = 2091 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 2092 2093 /** 2094 * @stable ICU 4.0 2095 */ 2096 public static final UnicodeBlock PHAISTOS_DISC = 2097 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 2098 2099 /** 2100 * @stable ICU 4.0 2101 */ 2102 public static final UnicodeBlock LYCIAN = 2103 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 2104 2105 /** 2106 * @stable ICU 4.0 2107 */ 2108 public static final UnicodeBlock CARIAN = 2109 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 2110 2111 /** 2112 * @stable ICU 4.0 2113 */ 2114 public static final UnicodeBlock LYDIAN = 2115 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 2116 2117 /** 2118 * @stable ICU 4.0 2119 */ 2120 public static final UnicodeBlock MAHJONG_TILES = 2121 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 2122 2123 /** 2124 * @stable ICU 4.0 2125 */ 2126 public static final UnicodeBlock DOMINO_TILES = 2127 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 2128 2129 /* New blocks in Unicode 5.2 */ 2130 2131 /** @stable ICU 4.4 */ 2132 public static final UnicodeBlock SAMARITAN = 2133 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 2134 /** @stable ICU 4.4 */ 2135 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 2136 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 2137 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 2138 /** @stable ICU 4.4 */ 2139 public static final UnicodeBlock TAI_THAM = 2140 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 2141 /** @stable ICU 4.4 */ 2142 public static final UnicodeBlock VEDIC_EXTENSIONS = 2143 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 2144 /** @stable ICU 4.4 */ 2145 public static final UnicodeBlock LISU = 2146 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 2147 /** @stable ICU 4.4 */ 2148 public static final UnicodeBlock BAMUM = 2149 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 2150 /** @stable ICU 4.4 */ 2151 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2152 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 2153 /** @stable ICU 4.4 */ 2154 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2155 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 2156 /** @stable ICU 4.4 */ 2157 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2158 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 2159 /** @stable ICU 4.4 */ 2160 public static final UnicodeBlock JAVANESE = 2161 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 2162 /** @stable ICU 4.4 */ 2163 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2164 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 2165 /** @stable ICU 4.4 */ 2166 public static final UnicodeBlock TAI_VIET = 2167 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 2168 /** @stable ICU 4.4 */ 2169 public static final UnicodeBlock MEETEI_MAYEK = 2170 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 2171 /** @stable ICU 4.4 */ 2172 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2173 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 2174 /** @stable ICU 4.4 */ 2175 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2176 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 2177 /** @stable ICU 4.4 */ 2178 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2179 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 2180 /** @stable ICU 4.4 */ 2181 public static final UnicodeBlock AVESTAN = 2182 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 2183 /** @stable ICU 4.4 */ 2184 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2185 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 2186 /** @stable ICU 4.4 */ 2187 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2188 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 2189 /** @stable ICU 4.4 */ 2190 public static final UnicodeBlock OLD_TURKIC = 2191 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 2192 /** @stable ICU 4.4 */ 2193 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2194 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 2195 /** @stable ICU 4.4 */ 2196 public static final UnicodeBlock KAITHI = 2197 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 2198 /** @stable ICU 4.4 */ 2199 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2200 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 2201 /** @stable ICU 4.4 */ 2202 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2203 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2204 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 2205 /** @stable ICU 4.4 */ 2206 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2207 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2208 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 2209 /** @stable ICU 4.4 */ 2210 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2211 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2212 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 2213 2214 /* New blocks in Unicode 6.0 */ 2215 2216 /** @stable ICU 4.6 */ 2217 public static final UnicodeBlock MANDAIC = 2218 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 2219 /** @stable ICU 4.6 */ 2220 public static final UnicodeBlock BATAK = 2221 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 2222 /** @stable ICU 4.6 */ 2223 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2224 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 2225 /** @stable ICU 4.6 */ 2226 public static final UnicodeBlock BRAHMI = 2227 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 2228 /** @stable ICU 4.6 */ 2229 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2230 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 2231 /** @stable ICU 4.6 */ 2232 public static final UnicodeBlock KANA_SUPPLEMENT = 2233 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 2234 /** @stable ICU 4.6 */ 2235 public static final UnicodeBlock PLAYING_CARDS = 2236 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 2237 /** @stable ICU 4.6 */ 2238 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2239 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2240 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 2241 /** @stable ICU 4.6 */ 2242 public static final UnicodeBlock EMOTICONS = 2243 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 2244 /** @stable ICU 4.6 */ 2245 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2246 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 2247 /** @stable ICU 4.6 */ 2248 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2249 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 2250 /** @stable ICU 4.6 */ 2251 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2252 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2253 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 2254 2255 /* New blocks in Unicode 6.1 */ 2256 2257 /** @stable ICU 49 */ 2258 public static final UnicodeBlock ARABIC_EXTENDED_A = 2259 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 2260 /** @stable ICU 49 */ 2261 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2262 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 2263 /** @stable ICU 49 */ 2264 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 2265 /** @stable ICU 49 */ 2266 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2267 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2268 /** @stable ICU 49 */ 2269 public static final UnicodeBlock MEROITIC_CURSIVE = 2270 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2271 /** @stable ICU 49 */ 2272 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2273 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2274 /** @stable ICU 49 */ 2275 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2276 /** @stable ICU 49 */ 2277 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2278 /** @stable ICU 49 */ 2279 public static final UnicodeBlock SORA_SOMPENG = 2280 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2281 /** @stable ICU 49 */ 2282 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2283 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2284 /** @stable ICU 49 */ 2285 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2286 2287 /* New blocks in Unicode 7.0 */ 2288 2289 /** @stable ICU 54 */ 2290 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2291 /** @stable ICU 54 */ 2292 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2293 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2294 /** @stable ICU 54 */ 2295 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2296 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2297 /** @stable ICU 54 */ 2298 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2299 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2300 /** @stable ICU 54 */ 2301 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2302 /** @stable ICU 54 */ 2303 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2304 /** @stable ICU 54 */ 2305 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2306 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2307 /** @stable ICU 54 */ 2308 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2309 /** @stable ICU 54 */ 2310 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2311 /** @stable ICU 54 */ 2312 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2313 /** @stable ICU 54 */ 2314 public static final UnicodeBlock LATIN_EXTENDED_E = 2315 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2316 /** @stable ICU 54 */ 2317 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2318 /** @stable ICU 54 */ 2319 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2320 /** @stable ICU 54 */ 2321 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2322 /** @stable ICU 54 */ 2323 public static final UnicodeBlock MENDE_KIKAKUI = 2324 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2325 /** @stable ICU 54 */ 2326 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2327 /** @stable ICU 54 */ 2328 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2329 /** @stable ICU 54 */ 2330 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2331 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2332 /** @stable ICU 54 */ 2333 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2334 /** @stable ICU 54 */ 2335 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2336 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2337 /** @stable ICU 54 */ 2338 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2339 /** @stable ICU 54 */ 2340 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2341 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2342 /** @stable ICU 54 */ 2343 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2344 /** @stable ICU 54 */ 2345 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2346 /** @stable ICU 54 */ 2347 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2348 /** @stable ICU 54 */ 2349 public static final UnicodeBlock PSALTER_PAHLAVI = 2350 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2351 /** @stable ICU 54 */ 2352 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2353 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2354 /** @stable ICU 54 */ 2355 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2356 /** @stable ICU 54 */ 2357 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2358 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2359 /** @stable ICU 54 */ 2360 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2361 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2362 /** @stable ICU 54 */ 2363 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2364 /** @stable ICU 54 */ 2365 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2366 2367 /* New blocks in Unicode 8.0 */ 2368 2369 /** @stable ICU 56 */ 2370 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2371 /** @stable ICU 56 */ 2372 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2373 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2374 /** @stable ICU 56 */ 2375 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2376 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2377 /** @stable ICU 56 */ 2378 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2379 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2380 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2381 /** @stable ICU 56 */ 2382 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2383 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2384 /** @stable ICU 56 */ 2385 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2386 /** @stable ICU 56 */ 2387 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2388 /** @stable ICU 56 */ 2389 public static final UnicodeBlock OLD_HUNGARIAN = 2390 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2391 /** @stable ICU 56 */ 2392 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2393 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2394 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2395 /** @stable ICU 56 */ 2396 public static final UnicodeBlock SUTTON_SIGNWRITING = 2397 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2398 2399 /* New blocks in Unicode 9.0 */ 2400 2401 /** @stable ICU 58 */ 2402 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2403 /** @stable ICU 58 */ 2404 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2405 /** @stable ICU 58 */ 2406 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2407 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2408 /** @stable ICU 58 */ 2409 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2410 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2411 /** @stable ICU 58 */ 2412 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2413 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2414 /** @stable ICU 58 */ 2415 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2416 /** @stable ICU 58 */ 2417 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2418 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2419 /** @stable ICU 58 */ 2420 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2421 /** @stable ICU 58 */ 2422 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2423 /** @stable ICU 58 */ 2424 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2425 /** @stable ICU 58 */ 2426 public static final UnicodeBlock TANGUT_COMPONENTS = 2427 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2428 2429 /** 2430 * @stable ICU 2.4 2431 */ 2432 public static final UnicodeBlock INVALID_CODE 2433 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2434 2435 static { 2436 for (int blockId = 0; blockId < COUNT; ++blockId) { 2437 if (BLOCKS_[blockId] == null) { 2438 throw new java.lang.IllegalStateException( 2439 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2440 } 2441 } 2442 } 2443 2444 // public methods -------------------------------------------------- 2445 2446 /** 2447 * {@icu} Returns the only instance of the UnicodeBlock with the argument ID. 2448 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2449 * @param id UnicodeBlock ID 2450 * @return the only instance of the UnicodeBlock with the argument ID 2451 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2452 * returned. 2453 * @stable ICU 2.4 2454 */ 2455 public static UnicodeBlock getInstance(int id) 2456 { 2457 if (id >= 0 && id < BLOCKS_.length) { 2458 return BLOCKS_[id]; 2459 } 2460 return INVALID_CODE; 2461 } 2462 2463 /** 2464 * Returns the Unicode allocation block that contains the code point, 2465 * or null if the code point is not a member of a defined block. 2466 * @param ch code point to be tested 2467 * @return the Unicode allocation block that contains the code point 2468 * @stable ICU 2.4 2469 */ 2470 public static UnicodeBlock of(int ch) 2471 { 2472 if (ch > MAX_VALUE) { 2473 return INVALID_CODE; 2474 } 2475 2476 return UnicodeBlock.getInstance( 2477 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2478 } 2479 2480 /** 2481 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2482 * Returns the Unicode block with the given name. {@icunote} Unlike 2483 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2484 * against the official UCD name and the Java block name 2485 * (ignoring case). 2486 * @param blockName the name of the block to match 2487 * @return the UnicodeBlock with that name 2488 * @throws IllegalArgumentException if the blockName could not be matched 2489 * @stable ICU 3.0 2490 */ 2491 public static final UnicodeBlock forName(String blockName) { 2492 Map<String, UnicodeBlock> m = null; 2493 if (mref != null) { 2494 m = mref.get(); 2495 } 2496 if (m == null) { 2497 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length); 2498 for (int i = 0; i < BLOCKS_.length; ++i) { 2499 UnicodeBlock b = BLOCKS_[i]; 2500 String name = trimBlockName( 2501 getPropertyValueName(UProperty.BLOCK, b.getID(), 2502 UProperty.NameChoice.LONG)); 2503 m.put(name, b); 2504 } 2505 mref = new SoftReference<Map<String, UnicodeBlock>>(m); 2506 } 2507 UnicodeBlock b = m.get(trimBlockName(blockName)); 2508 if (b == null) { 2509 throw new IllegalArgumentException(); 2510 } 2511 return b; 2512 } 2513 private static SoftReference<Map<String, UnicodeBlock>> mref; 2514 2515 private static String trimBlockName(String name) { 2516 String upper = name.toUpperCase(Locale.ENGLISH); 2517 StringBuilder result = new StringBuilder(upper.length()); 2518 for (int i = 0; i < upper.length(); i++) { 2519 char c = upper.charAt(i); 2520 if (c != ' ' && c != '_' && c != '-') { 2521 result.append(c); 2522 } 2523 } 2524 return result.toString(); 2525 } 2526 2527 /** 2528 * {icu} Returns the type ID of this Unicode block 2529 * @return integer type ID of this Unicode block 2530 * @stable ICU 2.4 2531 */ 2532 public int getID() 2533 { 2534 return m_id_; 2535 } 2536 2537 // private data members --------------------------------------------- 2538 2539 /** 2540 * Identification code for this UnicodeBlock 2541 */ 2542 private int m_id_; 2543 2544 // private constructor ---------------------------------------------- 2545 2546 /** 2547 * UnicodeBlock constructor 2548 * @param name name of this UnicodeBlock 2549 * @param id unique id of this UnicodeBlock 2550 * @exception NullPointerException if name is <code>null</code> 2551 */ 2552 private UnicodeBlock(String name, int id) 2553 { 2554 super(name); 2555 m_id_ = id; 2556 if (id >= 0) { 2557 BLOCKS_[id] = this; 2558 } 2559 } 2560 } 2561 2562 /** 2563 * East Asian Width constants. 2564 * @see UProperty#EAST_ASIAN_WIDTH 2565 * @see UCharacter#getIntPropertyValue 2566 * @stable ICU 2.4 2567 */ 2568 public static interface EastAsianWidth 2569 { 2570 /** 2571 * @stable ICU 2.4 2572 */ 2573 public static final int NEUTRAL = 0; 2574 /** 2575 * @stable ICU 2.4 2576 */ 2577 public static final int AMBIGUOUS = 1; 2578 /** 2579 * @stable ICU 2.4 2580 */ 2581 public static final int HALFWIDTH = 2; 2582 /** 2583 * @stable ICU 2.4 2584 */ 2585 public static final int FULLWIDTH = 3; 2586 /** 2587 * @stable ICU 2.4 2588 */ 2589 public static final int NARROW = 4; 2590 /** 2591 * @stable ICU 2.4 2592 */ 2593 public static final int WIDE = 5; 2594 /** 2595 * One more than the highest normal EastAsianWidth value. 2596 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2597 * 2598 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2599 */ 2600 @Deprecated 2601 public static final int COUNT = 6; 2602 } 2603 2604 /** 2605 * Decomposition Type constants. 2606 * @see UProperty#DECOMPOSITION_TYPE 2607 * @stable ICU 2.4 2608 */ 2609 public static interface DecompositionType 2610 { 2611 /** 2612 * @stable ICU 2.4 2613 */ 2614 public static final int NONE = 0; 2615 /** 2616 * @stable ICU 2.4 2617 */ 2618 public static final int CANONICAL = 1; 2619 /** 2620 * @stable ICU 2.4 2621 */ 2622 public static final int COMPAT = 2; 2623 /** 2624 * @stable ICU 2.4 2625 */ 2626 public static final int CIRCLE = 3; 2627 /** 2628 * @stable ICU 2.4 2629 */ 2630 public static final int FINAL = 4; 2631 /** 2632 * @stable ICU 2.4 2633 */ 2634 public static final int FONT = 5; 2635 /** 2636 * @stable ICU 2.4 2637 */ 2638 public static final int FRACTION = 6; 2639 /** 2640 * @stable ICU 2.4 2641 */ 2642 public static final int INITIAL = 7; 2643 /** 2644 * @stable ICU 2.4 2645 */ 2646 public static final int ISOLATED = 8; 2647 /** 2648 * @stable ICU 2.4 2649 */ 2650 public static final int MEDIAL = 9; 2651 /** 2652 * @stable ICU 2.4 2653 */ 2654 public static final int NARROW = 10; 2655 /** 2656 * @stable ICU 2.4 2657 */ 2658 public static final int NOBREAK = 11; 2659 /** 2660 * @stable ICU 2.4 2661 */ 2662 public static final int SMALL = 12; 2663 /** 2664 * @stable ICU 2.4 2665 */ 2666 public static final int SQUARE = 13; 2667 /** 2668 * @stable ICU 2.4 2669 */ 2670 public static final int SUB = 14; 2671 /** 2672 * @stable ICU 2.4 2673 */ 2674 public static final int SUPER = 15; 2675 /** 2676 * @stable ICU 2.4 2677 */ 2678 public static final int VERTICAL = 16; 2679 /** 2680 * @stable ICU 2.4 2681 */ 2682 public static final int WIDE = 17; 2683 /** 2684 * One more than the highest normal DecompositionType value. 2685 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 2686 * 2687 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2688 */ 2689 @Deprecated 2690 public static final int COUNT = 18; 2691 } 2692 2693 /** 2694 * Joining Type constants. 2695 * @see UProperty#JOINING_TYPE 2696 * @stable ICU 2.4 2697 */ 2698 public static interface JoiningType 2699 { 2700 /** 2701 * @stable ICU 2.4 2702 */ 2703 public static final int NON_JOINING = 0; 2704 /** 2705 * @stable ICU 2.4 2706 */ 2707 public static final int JOIN_CAUSING = 1; 2708 /** 2709 * @stable ICU 2.4 2710 */ 2711 public static final int DUAL_JOINING = 2; 2712 /** 2713 * @stable ICU 2.4 2714 */ 2715 public static final int LEFT_JOINING = 3; 2716 /** 2717 * @stable ICU 2.4 2718 */ 2719 public static final int RIGHT_JOINING = 4; 2720 /** 2721 * @stable ICU 2.4 2722 */ 2723 public static final int TRANSPARENT = 5; 2724 /** 2725 * One more than the highest normal JoiningType value. 2726 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 2727 * 2728 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2729 */ 2730 @Deprecated 2731 public static final int COUNT = 6; 2732 } 2733 2734 /** 2735 * Joining Group constants. 2736 * @see UProperty#JOINING_GROUP 2737 * @stable ICU 2.4 2738 */ 2739 public static interface JoiningGroup 2740 { 2741 /** 2742 * @stable ICU 2.4 2743 */ 2744 public static final int NO_JOINING_GROUP = 0; 2745 /** 2746 * @stable ICU 2.4 2747 */ 2748 public static final int AIN = 1; 2749 /** 2750 * @stable ICU 2.4 2751 */ 2752 public static final int ALAPH = 2; 2753 /** 2754 * @stable ICU 2.4 2755 */ 2756 public static final int ALEF = 3; 2757 /** 2758 * @stable ICU 2.4 2759 */ 2760 public static final int BEH = 4; 2761 /** 2762 * @stable ICU 2.4 2763 */ 2764 public static final int BETH = 5; 2765 /** 2766 * @stable ICU 2.4 2767 */ 2768 public static final int DAL = 6; 2769 /** 2770 * @stable ICU 2.4 2771 */ 2772 public static final int DALATH_RISH = 7; 2773 /** 2774 * @stable ICU 2.4 2775 */ 2776 public static final int E = 8; 2777 /** 2778 * @stable ICU 2.4 2779 */ 2780 public static final int FEH = 9; 2781 /** 2782 * @stable ICU 2.4 2783 */ 2784 public static final int FINAL_SEMKATH = 10; 2785 /** 2786 * @stable ICU 2.4 2787 */ 2788 public static final int GAF = 11; 2789 /** 2790 * @stable ICU 2.4 2791 */ 2792 public static final int GAMAL = 12; 2793 /** 2794 * @stable ICU 2.4 2795 */ 2796 public static final int HAH = 13; 2797 /** @stable ICU 4.6 */ 2798 public static final int TEH_MARBUTA_GOAL = 14; 2799 /** 2800 * @stable ICU 2.4 2801 */ 2802 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2803 /** 2804 * @stable ICU 2.4 2805 */ 2806 public static final int HE = 15; 2807 /** 2808 * @stable ICU 2.4 2809 */ 2810 public static final int HEH = 16; 2811 /** 2812 * @stable ICU 2.4 2813 */ 2814 public static final int HEH_GOAL = 17; 2815 /** 2816 * @stable ICU 2.4 2817 */ 2818 public static final int HETH = 18; 2819 /** 2820 * @stable ICU 2.4 2821 */ 2822 public static final int KAF = 19; 2823 /** 2824 * @stable ICU 2.4 2825 */ 2826 public static final int KAPH = 20; 2827 /** 2828 * @stable ICU 2.4 2829 */ 2830 public static final int KNOTTED_HEH = 21; 2831 /** 2832 * @stable ICU 2.4 2833 */ 2834 public static final int LAM = 22; 2835 /** 2836 * @stable ICU 2.4 2837 */ 2838 public static final int LAMADH = 23; 2839 /** 2840 * @stable ICU 2.4 2841 */ 2842 public static final int MEEM = 24; 2843 /** 2844 * @stable ICU 2.4 2845 */ 2846 public static final int MIM = 25; 2847 /** 2848 * @stable ICU 2.4 2849 */ 2850 public static final int NOON = 26; 2851 /** 2852 * @stable ICU 2.4 2853 */ 2854 public static final int NUN = 27; 2855 /** 2856 * @stable ICU 2.4 2857 */ 2858 public static final int PE = 28; 2859 /** 2860 * @stable ICU 2.4 2861 */ 2862 public static final int QAF = 29; 2863 /** 2864 * @stable ICU 2.4 2865 */ 2866 public static final int QAPH = 30; 2867 /** 2868 * @stable ICU 2.4 2869 */ 2870 public static final int REH = 31; 2871 /** 2872 * @stable ICU 2.4 2873 */ 2874 public static final int REVERSED_PE = 32; 2875 /** 2876 * @stable ICU 2.4 2877 */ 2878 public static final int SAD = 33; 2879 /** 2880 * @stable ICU 2.4 2881 */ 2882 public static final int SADHE = 34; 2883 /** 2884 * @stable ICU 2.4 2885 */ 2886 public static final int SEEN = 35; 2887 /** 2888 * @stable ICU 2.4 2889 */ 2890 public static final int SEMKATH = 36; 2891 /** 2892 * @stable ICU 2.4 2893 */ 2894 public static final int SHIN = 37; 2895 /** 2896 * @stable ICU 2.4 2897 */ 2898 public static final int SWASH_KAF = 38; 2899 /** 2900 * @stable ICU 2.4 2901 */ 2902 public static final int SYRIAC_WAW = 39; 2903 /** 2904 * @stable ICU 2.4 2905 */ 2906 public static final int TAH = 40; 2907 /** 2908 * @stable ICU 2.4 2909 */ 2910 public static final int TAW = 41; 2911 /** 2912 * @stable ICU 2.4 2913 */ 2914 public static final int TEH_MARBUTA = 42; 2915 /** 2916 * @stable ICU 2.4 2917 */ 2918 public static final int TETH = 43; 2919 /** 2920 * @stable ICU 2.4 2921 */ 2922 public static final int WAW = 44; 2923 /** 2924 * @stable ICU 2.4 2925 */ 2926 public static final int YEH = 45; 2927 /** 2928 * @stable ICU 2.4 2929 */ 2930 public static final int YEH_BARREE = 46; 2931 /** 2932 * @stable ICU 2.4 2933 */ 2934 public static final int YEH_WITH_TAIL = 47; 2935 /** 2936 * @stable ICU 2.4 2937 */ 2938 public static final int YUDH = 48; 2939 /** 2940 * @stable ICU 2.4 2941 */ 2942 public static final int YUDH_HE = 49; 2943 /** 2944 * @stable ICU 2.4 2945 */ 2946 public static final int ZAIN = 50; 2947 /** 2948 * @stable ICU 2.6 2949 */ 2950 public static final int FE = 51; 2951 /** 2952 * @stable ICU 2.6 2953 */ 2954 public static final int KHAPH = 52; 2955 /** 2956 * @stable ICU 2.6 2957 */ 2958 public static final int ZHAIN = 53; 2959 /** 2960 * @stable ICU 4.0 2961 */ 2962 public static final int BURUSHASKI_YEH_BARREE = 54; 2963 /** @stable ICU 4.4 */ 2964 public static final int FARSI_YEH = 55; 2965 /** @stable ICU 4.4 */ 2966 public static final int NYA = 56; 2967 /** @stable ICU 49 */ 2968 public static final int ROHINGYA_YEH = 57; 2969 2970 /** @stable ICU 54 */ 2971 public static final int MANICHAEAN_ALEPH = 58; 2972 /** @stable ICU 54 */ 2973 public static final int MANICHAEAN_AYIN = 59; 2974 /** @stable ICU 54 */ 2975 public static final int MANICHAEAN_BETH = 60; 2976 /** @stable ICU 54 */ 2977 public static final int MANICHAEAN_DALETH = 61; 2978 /** @stable ICU 54 */ 2979 public static final int MANICHAEAN_DHAMEDH = 62; 2980 /** @stable ICU 54 */ 2981 public static final int MANICHAEAN_FIVE = 63; 2982 /** @stable ICU 54 */ 2983 public static final int MANICHAEAN_GIMEL = 64; 2984 /** @stable ICU 54 */ 2985 public static final int MANICHAEAN_HETH = 65; 2986 /** @stable ICU 54 */ 2987 public static final int MANICHAEAN_HUNDRED = 66; 2988 /** @stable ICU 54 */ 2989 public static final int MANICHAEAN_KAPH = 67; 2990 /** @stable ICU 54 */ 2991 public static final int MANICHAEAN_LAMEDH = 68; 2992 /** @stable ICU 54 */ 2993 public static final int MANICHAEAN_MEM = 69; 2994 /** @stable ICU 54 */ 2995 public static final int MANICHAEAN_NUN = 70; 2996 /** @stable ICU 54 */ 2997 public static final int MANICHAEAN_ONE = 71; 2998 /** @stable ICU 54 */ 2999 public static final int MANICHAEAN_PE = 72; 3000 /** @stable ICU 54 */ 3001 public static final int MANICHAEAN_QOPH = 73; 3002 /** @stable ICU 54 */ 3003 public static final int MANICHAEAN_RESH = 74; 3004 /** @stable ICU 54 */ 3005 public static final int MANICHAEAN_SADHE = 75; 3006 /** @stable ICU 54 */ 3007 public static final int MANICHAEAN_SAMEKH = 76; 3008 /** @stable ICU 54 */ 3009 public static final int MANICHAEAN_TAW = 77; 3010 /** @stable ICU 54 */ 3011 public static final int MANICHAEAN_TEN = 78; 3012 /** @stable ICU 54 */ 3013 public static final int MANICHAEAN_TETH = 79; 3014 /** @stable ICU 54 */ 3015 public static final int MANICHAEAN_THAMEDH = 80; 3016 /** @stable ICU 54 */ 3017 public static final int MANICHAEAN_TWENTY = 81; 3018 /** @stable ICU 54 */ 3019 public static final int MANICHAEAN_WAW = 82; 3020 /** @stable ICU 54 */ 3021 public static final int MANICHAEAN_YODH = 83; 3022 /** @stable ICU 54 */ 3023 public static final int MANICHAEAN_ZAYIN = 84; 3024 /** @stable ICU 54 */ 3025 public static final int STRAIGHT_WAW = 85; 3026 3027 /** @stable ICU 58 */ 3028 public static final int AFRICAN_FEH = 86; 3029 /** @stable ICU 58 */ 3030 public static final int AFRICAN_NOON = 87; 3031 /** @stable ICU 58 */ 3032 public static final int AFRICAN_QAF = 88; 3033 3034 /** 3035 * One more than the highest normal JoiningGroup value. 3036 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 3037 * 3038 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3039 */ 3040 @Deprecated 3041 public static final int COUNT = 89; 3042 } 3043 3044 /** 3045 * Grapheme Cluster Break constants. 3046 * @see UProperty#GRAPHEME_CLUSTER_BREAK 3047 * @stable ICU 3.4 3048 */ 3049 public static interface GraphemeClusterBreak { 3050 /** 3051 * @stable ICU 3.4 3052 */ 3053 public static final int OTHER = 0; 3054 /** 3055 * @stable ICU 3.4 3056 */ 3057 public static final int CONTROL = 1; 3058 /** 3059 * @stable ICU 3.4 3060 */ 3061 public static final int CR = 2; 3062 /** 3063 * @stable ICU 3.4 3064 */ 3065 public static final int EXTEND = 3; 3066 /** 3067 * @stable ICU 3.4 3068 */ 3069 public static final int L = 4; 3070 /** 3071 * @stable ICU 3.4 3072 */ 3073 public static final int LF = 5; 3074 /** 3075 * @stable ICU 3.4 3076 */ 3077 public static final int LV = 6; 3078 /** 3079 * @stable ICU 3.4 3080 */ 3081 public static final int LVT = 7; 3082 /** 3083 * @stable ICU 3.4 3084 */ 3085 public static final int T = 8; 3086 /** 3087 * @stable ICU 3.4 3088 */ 3089 public static final int V = 9; 3090 /** 3091 * @stable ICU 4.0 3092 */ 3093 public static final int SPACING_MARK = 10; 3094 /** 3095 * @stable ICU 4.0 3096 */ 3097 public static final int PREPEND = 11; 3098 /** @stable ICU 50 */ 3099 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3100 /** @stable ICU 58 */ 3101 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3102 /** @stable ICU 58 */ 3103 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 3104 /** @stable ICU 58 */ 3105 public static final int E_MODIFIER = 15; /*[EM]*/ 3106 /** @stable ICU 58 */ 3107 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 3108 /** @stable ICU 58 */ 3109 public static final int ZWJ = 17; /*[ZWJ]*/ 3110 /** 3111 * One more than the highest normal GraphemeClusterBreak value. 3112 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 3113 * 3114 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3115 */ 3116 @Deprecated 3117 public static final int COUNT = 18; 3118 } 3119 3120 /** 3121 * Word Break constants. 3122 * @see UProperty#WORD_BREAK 3123 * @stable ICU 3.4 3124 */ 3125 public static interface WordBreak { 3126 /** 3127 * @stable ICU 3.8 3128 */ 3129 public static final int OTHER = 0; 3130 /** 3131 * @stable ICU 3.8 3132 */ 3133 public static final int ALETTER = 1; 3134 /** 3135 * @stable ICU 3.8 3136 */ 3137 public static final int FORMAT = 2; 3138 /** 3139 * @stable ICU 3.8 3140 */ 3141 public static final int KATAKANA = 3; 3142 /** 3143 * @stable ICU 3.8 3144 */ 3145 public static final int MIDLETTER = 4; 3146 /** 3147 * @stable ICU 3.8 3148 */ 3149 public static final int MIDNUM = 5; 3150 /** 3151 * @stable ICU 3.8 3152 */ 3153 public static final int NUMERIC = 6; 3154 /** 3155 * @stable ICU 3.8 3156 */ 3157 public static final int EXTENDNUMLET = 7; 3158 /** 3159 * @stable ICU 4.0 3160 */ 3161 public static final int CR = 8; 3162 /** 3163 * @stable ICU 4.0 3164 */ 3165 public static final int EXTEND = 9; 3166 /** 3167 * @stable ICU 4.0 3168 */ 3169 public static final int LF = 10; 3170 /** 3171 * @stable ICU 4.0 3172 */ 3173 public static final int MIDNUMLET = 11; 3174 /** 3175 * @stable ICU 4.0 3176 */ 3177 public static final int NEWLINE = 12; 3178 /** @stable ICU 50 */ 3179 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3180 /** @stable ICU 52 */ 3181 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 3182 /** @stable ICU 52 */ 3183 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 3184 /** @stable ICU 52 */ 3185 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 3186 /** @stable ICU 58 */ 3187 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3188 /** @stable ICU 58 */ 3189 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 3190 /** @stable ICU 58 */ 3191 public static final int E_MODIFIER = 19; /*[EM]*/ 3192 /** @stable ICU 58 */ 3193 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 3194 /** @stable ICU 58 */ 3195 public static final int ZWJ = 21; /*[ZWJ]*/ 3196 /** 3197 * One more than the highest normal WordBreak value. 3198 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 3199 * 3200 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3201 */ 3202 @Deprecated 3203 public static final int COUNT = 22; 3204 } 3205 3206 /** 3207 * Sentence Break constants. 3208 * @see UProperty#SENTENCE_BREAK 3209 * @stable ICU 3.4 3210 */ 3211 public static interface SentenceBreak { 3212 /** 3213 * @stable ICU 3.8 3214 */ 3215 public static final int OTHER = 0; 3216 /** 3217 * @stable ICU 3.8 3218 */ 3219 public static final int ATERM = 1; 3220 /** 3221 * @stable ICU 3.8 3222 */ 3223 public static final int CLOSE = 2; 3224 /** 3225 * @stable ICU 3.8 3226 */ 3227 public static final int FORMAT = 3; 3228 /** 3229 * @stable ICU 3.8 3230 */ 3231 public static final int LOWER = 4; 3232 /** 3233 * @stable ICU 3.8 3234 */ 3235 public static final int NUMERIC = 5; 3236 /** 3237 * @stable ICU 3.8 3238 */ 3239 public static final int OLETTER = 6; 3240 /** 3241 * @stable ICU 3.8 3242 */ 3243 public static final int SEP = 7; 3244 /** 3245 * @stable ICU 3.8 3246 */ 3247 public static final int SP = 8; 3248 /** 3249 * @stable ICU 3.8 3250 */ 3251 public static final int STERM = 9; 3252 /** 3253 * @stable ICU 3.8 3254 */ 3255 public static final int UPPER = 10; 3256 /** 3257 * @stable ICU 4.0 3258 */ 3259 public static final int CR = 11; 3260 /** 3261 * @stable ICU 4.0 3262 */ 3263 public static final int EXTEND = 12; 3264 /** 3265 * @stable ICU 4.0 3266 */ 3267 public static final int LF = 13; 3268 /** 3269 * @stable ICU 4.0 3270 */ 3271 public static final int SCONTINUE = 14; 3272 /** 3273 * One more than the highest normal SentenceBreak value. 3274 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 3275 * 3276 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3277 */ 3278 @Deprecated 3279 public static final int COUNT = 15; 3280 } 3281 3282 /** 3283 * Line Break constants. 3284 * @see UProperty#LINE_BREAK 3285 * @stable ICU 2.4 3286 */ 3287 public static interface LineBreak 3288 { 3289 /** 3290 * @stable ICU 2.4 3291 */ 3292 public static final int UNKNOWN = 0; 3293 /** 3294 * @stable ICU 2.4 3295 */ 3296 public static final int AMBIGUOUS = 1; 3297 /** 3298 * @stable ICU 2.4 3299 */ 3300 public static final int ALPHABETIC = 2; 3301 /** 3302 * @stable ICU 2.4 3303 */ 3304 public static final int BREAK_BOTH = 3; 3305 /** 3306 * @stable ICU 2.4 3307 */ 3308 public static final int BREAK_AFTER = 4; 3309 /** 3310 * @stable ICU 2.4 3311 */ 3312 public static final int BREAK_BEFORE = 5; 3313 /** 3314 * @stable ICU 2.4 3315 */ 3316 public static final int MANDATORY_BREAK = 6; 3317 /** 3318 * @stable ICU 2.4 3319 */ 3320 public static final int CONTINGENT_BREAK = 7; 3321 /** 3322 * @stable ICU 2.4 3323 */ 3324 public static final int CLOSE_PUNCTUATION = 8; 3325 /** 3326 * @stable ICU 2.4 3327 */ 3328 public static final int COMBINING_MARK = 9; 3329 /** 3330 * @stable ICU 2.4 3331 */ 3332 public static final int CARRIAGE_RETURN = 10; 3333 /** 3334 * @stable ICU 2.4 3335 */ 3336 public static final int EXCLAMATION = 11; 3337 /** 3338 * @stable ICU 2.4 3339 */ 3340 public static final int GLUE = 12; 3341 /** 3342 * @stable ICU 2.4 3343 */ 3344 public static final int HYPHEN = 13; 3345 /** 3346 * @stable ICU 2.4 3347 */ 3348 public static final int IDEOGRAPHIC = 14; 3349 /** 3350 * @see #INSEPARABLE 3351 * @stable ICU 2.4 3352 */ 3353 public static final int INSEPERABLE = 15; 3354 /** 3355 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3356 * @stable ICU 3.0 3357 */ 3358 public static final int INSEPARABLE = 15; 3359 /** 3360 * @stable ICU 2.4 3361 */ 3362 public static final int INFIX_NUMERIC = 16; 3363 /** 3364 * @stable ICU 2.4 3365 */ 3366 public static final int LINE_FEED = 17; 3367 /** 3368 * @stable ICU 2.4 3369 */ 3370 public static final int NONSTARTER = 18; 3371 /** 3372 * @stable ICU 2.4 3373 */ 3374 public static final int NUMERIC = 19; 3375 /** 3376 * @stable ICU 2.4 3377 */ 3378 public static final int OPEN_PUNCTUATION = 20; 3379 /** 3380 * @stable ICU 2.4 3381 */ 3382 public static final int POSTFIX_NUMERIC = 21; 3383 /** 3384 * @stable ICU 2.4 3385 */ 3386 public static final int PREFIX_NUMERIC = 22; 3387 /** 3388 * @stable ICU 2.4 3389 */ 3390 public static final int QUOTATION = 23; 3391 /** 3392 * @stable ICU 2.4 3393 */ 3394 public static final int COMPLEX_CONTEXT = 24; 3395 /** 3396 * @stable ICU 2.4 3397 */ 3398 public static final int SURROGATE = 25; 3399 /** 3400 * @stable ICU 2.4 3401 */ 3402 public static final int SPACE = 26; 3403 /** 3404 * @stable ICU 2.4 3405 */ 3406 public static final int BREAK_SYMBOLS = 27; 3407 /** 3408 * @stable ICU 2.4 3409 */ 3410 public static final int ZWSPACE = 28; 3411 /** 3412 * @stable ICU 2.6 3413 */ 3414 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3415 /** 3416 * @stable ICU 2.6 3417 */ 3418 public static final int WORD_JOINER = 30; /*[WJ]*/ 3419 /** 3420 * @stable ICU 3.4 3421 */ 3422 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3423 /** 3424 * @stable ICU 3.4 3425 */ 3426 public static final int H3 = 32; 3427 /** 3428 * @stable ICU 3.4 3429 */ 3430 public static final int JL = 33; 3431 /** 3432 * @stable ICU 3.4 3433 */ 3434 public static final int JT = 34; 3435 /** 3436 * @stable ICU 3.4 3437 */ 3438 public static final int JV = 35; 3439 /** @stable ICU 4.4 */ 3440 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3441 /** @stable ICU 49 */ 3442 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3443 /** @stable ICU 49 */ 3444 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3445 /** @stable ICU 50 */ 3446 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3447 /** @stable ICU 58 */ 3448 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3449 /** @stable ICU 58 */ 3450 public static final int E_MODIFIER = 41; /*[EM]*/ 3451 /** @stable ICU 58 */ 3452 public static final int ZWJ = 42; /*[ZWJ]*/ 3453 /** 3454 * One more than the highest normal LineBreak value. 3455 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 3456 * 3457 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3458 */ 3459 @Deprecated 3460 public static final int COUNT = 43; 3461 } 3462 3463 /** 3464 * Numeric Type constants. 3465 * @see UProperty#NUMERIC_TYPE 3466 * @stable ICU 2.4 3467 */ 3468 public static interface NumericType 3469 { 3470 /** 3471 * @stable ICU 2.4 3472 */ 3473 public static final int NONE = 0; 3474 /** 3475 * @stable ICU 2.4 3476 */ 3477 public static final int DECIMAL = 1; 3478 /** 3479 * @stable ICU 2.4 3480 */ 3481 public static final int DIGIT = 2; 3482 /** 3483 * @stable ICU 2.4 3484 */ 3485 public static final int NUMERIC = 3; 3486 /** 3487 * One more than the highest normal NumericType value. 3488 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 3489 * 3490 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3491 */ 3492 @Deprecated 3493 public static final int COUNT = 4; 3494 } 3495 3496 /** 3497 * Hangul Syllable Type constants. 3498 * 3499 * @see UProperty#HANGUL_SYLLABLE_TYPE 3500 * @stable ICU 2.6 3501 */ 3502 public static interface HangulSyllableType 3503 { 3504 /** 3505 * @stable ICU 2.6 3506 */ 3507 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3508 /** 3509 * @stable ICU 2.6 3510 */ 3511 public static final int LEADING_JAMO = 1; /*[L]*/ 3512 /** 3513 * @stable ICU 2.6 3514 */ 3515 public static final int VOWEL_JAMO = 2; /*[V]*/ 3516 /** 3517 * @stable ICU 2.6 3518 */ 3519 public static final int TRAILING_JAMO = 3; /*[T]*/ 3520 /** 3521 * @stable ICU 2.6 3522 */ 3523 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3524 /** 3525 * @stable ICU 2.6 3526 */ 3527 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3528 /** 3529 * One more than the highest normal HangulSyllableType value. 3530 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 3531 * 3532 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3533 */ 3534 @Deprecated 3535 public static final int COUNT = 6; 3536 } 3537 3538 /** 3539 * Bidi Paired Bracket Type constants. 3540 * 3541 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3542 * @stable ICU 52 3543 */ 3544 public static interface BidiPairedBracketType { 3545 /** 3546 * Not a paired bracket. 3547 * @stable ICU 52 3548 */ 3549 public static final int NONE = 0; 3550 /** 3551 * Open paired bracket. 3552 * @stable ICU 52 3553 */ 3554 public static final int OPEN = 1; 3555 /** 3556 * Close paired bracket. 3557 * @stable ICU 52 3558 */ 3559 public static final int CLOSE = 2; 3560 /** 3561 * One more than the highest normal BidiPairedBracketType value. 3562 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3563 * 3564 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3565 */ 3566 @Deprecated 3567 public static final int COUNT = 3; 3568 } 3569 3570 // public data members ----------------------------------------------- 3571 3572 /** 3573 * The lowest Unicode code point value, constant 0. 3574 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 3575 * 3576 * @stable ICU 2.1 3577 */ 3578 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 3579 3580 /** 3581 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 3582 * Same as {@link Character#MAX_CODE_POINT}. 3583 * 3584 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 3585 * which is still a char with the value U+FFFF. 3586 * 3587 * @stable ICU 2.1 3588 */ 3589 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 3590 3591 /** 3592 * The minimum value for Supplementary code points, constant U+10000. 3593 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 3594 * 3595 * @stable ICU 2.1 3596 */ 3597 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 3598 3599 /** 3600 * Unicode value used when translating into Unicode encoding form and there 3601 * is no existing character. 3602 * @stable ICU 2.1 3603 */ 3604 public static final int REPLACEMENT_CHAR = '\uFFFD'; 3605 3606 /** 3607 * Special value that is returned by getUnicodeNumericValue(int) when no 3608 * numeric value is defined for a code point. 3609 * @stable ICU 2.4 3610 * @see #getUnicodeNumericValue 3611 */ 3612 public static final double NO_NUMERIC_VALUE = -123456789; 3613 3614 /** 3615 * Compatibility constant for Java Character's MIN_RADIX. 3616 * @stable ICU 3.4 3617 */ 3618 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 3619 3620 /** 3621 * Compatibility constant for Java Character's MAX_RADIX. 3622 * @stable ICU 3.4 3623 */ 3624 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 3625 3626 /** 3627 * Do not lowercase non-initial parts of words when titlecasing. 3628 * Option bit for titlecasing APIs that take an options bit set. 3629 * 3630 * By default, titlecasing will titlecase the first cased character 3631 * of a word and lowercase all other characters. 3632 * With this option, the other characters will not be modified. 3633 * 3634 * @see #toTitleCase 3635 * @stable ICU 3.8 3636 */ 3637 public static final int TITLECASE_NO_LOWERCASE = 0x100; 3638 3639 /** 3640 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 3641 * titlecase exactly the characters at breaks from the iterator. 3642 * Option bit for titlecasing APIs that take an options bit set. 3643 * 3644 * By default, titlecasing will take each break iterator index, 3645 * adjust it by looking for the next cased character, and titlecase that one. 3646 * Other characters are lowercased. 3647 * 3648 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 3649 * 3650 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 3651 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 3652 * cased character F. If F exists, map F to default_title(F); then map each 3653 * subsequent character C to default_lower(C). 3654 * 3655 * @see #toTitleCase 3656 * @see #TITLECASE_NO_LOWERCASE 3657 * @stable ICU 3.8 3658 */ 3659 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 3660 3661 // public methods ---------------------------------------------------- 3662 3663 /** 3664 * Returnss the numeric value of a decimal digit code point. 3665 * <br>This method observes the semantics of 3666 * <code>java.lang.Character.digit()</code>. Note that this 3667 * will return positive values for code points for which isDigit 3668 * returns false, just like java.lang.Character. 3669 * <br><em>Semantic Change:</em> In release 1.3.1 and 3670 * prior, this did not treat the European letters as having a 3671 * digit value, and also treated numeric letters and other numbers as 3672 * digits. 3673 * This has been changed to conform to the java semantics. 3674 * <br>A code point is a valid digit if and only if: 3675 * <ul> 3676 * <li>ch is a decimal digit or one of the european letters, and 3677 * <li>the value of ch is less than the specified radix. 3678 * </ul> 3679 * @param ch the code point to query 3680 * @param radix the radix 3681 * @return the numeric value represented by the code point in the 3682 * specified radix, or -1 if the code point is not a decimal digit 3683 * or if its value is too large for the radix 3684 * @stable ICU 2.1 3685 */ 3686 public static int digit(int ch, int radix) 3687 { 3688 if (2 <= radix && radix <= 36) { 3689 int value = digit(ch); 3690 if (value < 0) { 3691 // ch is not a decimal digit, try latin letters 3692 value = UCharacterProperty.getEuropeanDigit(ch); 3693 } 3694 return (value < radix) ? value : -1; 3695 } else { 3696 return -1; // invalid radix 3697 } 3698 } 3699 3700 /** 3701 * Returnss the numeric value of a decimal digit code point. 3702 * <br>This is a convenience overload of <code>digit(int, int)</code> 3703 * that provides a decimal radix. 3704 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 3705 * treated numeric letters and other numbers as digits. This has 3706 * been changed to conform to the java semantics. 3707 * @param ch the code point to query 3708 * @return the numeric value represented by the code point, 3709 * or -1 if the code point is not a decimal digit or if its 3710 * value is too large for a decimal radix 3711 * @stable ICU 2.1 3712 */ 3713 public static int digit(int ch) 3714 { 3715 return UCharacterProperty.INSTANCE.digit(ch); 3716 } 3717 3718 /** 3719 * Returns the numeric value of the code point as a nonnegative 3720 * integer. 3721 * <br>If the code point does not have a numeric value, then -1 is returned. 3722 * <br> 3723 * If the code point has a numeric value that cannot be represented as a 3724 * nonnegative integer (for example, a fractional value), then -2 is 3725 * returned. 3726 * @param ch the code point to query 3727 * @return the numeric value of the code point, or -1 if it has no numeric 3728 * value, or -2 if it has a numeric value that cannot be represented as a 3729 * nonnegative integer 3730 * @stable ICU 2.1 3731 */ 3732 public static int getNumericValue(int ch) 3733 { 3734 return UCharacterProperty.INSTANCE.getNumericValue(ch); 3735 } 3736 3737 /** 3738 * {@icu} Returns the numeric value for a Unicode code point as defined in the 3739 * Unicode Character Database. 3740 * <p>A "double" return type is necessary because some numeric values are 3741 * fractions, negative, or too large for int. 3742 * <p>For characters without any numeric values in the Unicode Character 3743 * Database, this function will return NO_NUMERIC_VALUE. 3744 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 3745 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 3746 * return type int and returns -1 when the argument ch does not have a 3747 * corresponding numeric value. This has been changed to synch with ICU4C 3748 * 3749 * This corresponds to the ICU4C function u_getNumericValue. 3750 * @param ch Code point to get the numeric value for. 3751 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 3752 * @stable ICU 2.4 3753 */ 3754 public static double getUnicodeNumericValue(int ch) 3755 { 3756 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 3757 } 3758 3759 /** 3760 * Compatibility override of Java deprecated method. This 3761 * method will always remain deprecated. 3762 * Same as java.lang.Character.isSpace(). 3763 * @param ch the code point 3764 * @return true if the code point is a space character as 3765 * defined by java.lang.Character.isSpace. 3766 * @deprecated ICU 3.4 (Java) 3767 */ 3768 @Deprecated 3769 public static boolean isSpace(int ch) { 3770 return ch <= 0x20 && 3771 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 3772 } 3773 3774 /** 3775 * Returns a value indicating a code point's Unicode category. 3776 * Up-to-date Unicode implementation of java.lang.Character.getType() 3777 * except for the above mentioned code points that had their category 3778 * changed.<br> 3779 * Return results are constants from the interface 3780 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 3781 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 3782 * those returned by java.lang.Character.getType. UCharacterCategory values 3783 * match the ones used in ICU4C, while java.lang.Character type 3784 * values, though similar, skip the value 17. 3785 * @param ch code point whose type is to be determined 3786 * @return category which is a value of UCharacterCategory 3787 * @stable ICU 2.1 3788 */ 3789 public static int getType(int ch) 3790 { 3791 return UCharacterProperty.INSTANCE.getType(ch); 3792 } 3793 3794 /** 3795 * Determines if a code point has a defined meaning in the up-to-date 3796 * Unicode standard. 3797 * E.g. supplementary code points though allocated space are not defined in 3798 * Unicode yet.<br> 3799 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 3800 * @param ch code point to be determined if it is defined in the most 3801 * current version of Unicode 3802 * @return true if this code point is defined in unicode 3803 * @stable ICU 2.1 3804 */ 3805 public static boolean isDefined(int ch) 3806 { 3807 return getType(ch) != 0; 3808 } 3809 3810 /** 3811 * Determines if a code point is a Java digit. 3812 * <br>This method observes the semantics of 3813 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 3814 * digits only. 3815 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 3816 * numeric letters and other numbers as digits. 3817 * This has been changed to conform to the java semantics. 3818 * @param ch code point to query 3819 * @return true if this code point is a digit 3820 * @stable ICU 2.1 3821 */ 3822 public static boolean isDigit(int ch) 3823 { 3824 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 3825 } 3826 3827 /** 3828 * Determines if the specified code point is an ISO control character. 3829 * A code point is considered to be an ISO control character if it is in 3830 * the range \u0000 through \u001F or in the range \u007F through 3831 * \u009F.<br> 3832 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 3833 * @param ch code point to determine if it is an ISO control character 3834 * @return true if code point is a ISO control character 3835 * @stable ICU 2.1 3836 */ 3837 public static boolean isISOControl(int ch) 3838 { 3839 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 3840 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 3841 } 3842 3843 /** 3844 * Determines if the specified code point is a letter. 3845 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 3846 * @param ch code point to determine if it is a letter 3847 * @return true if code point is a letter 3848 * @stable ICU 2.1 3849 */ 3850 public static boolean isLetter(int ch) 3851 { 3852 // if props == 0, it will just fall through and return false 3853 return ((1 << getType(ch)) 3854 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3855 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3856 | (1 << UCharacterCategory.TITLECASE_LETTER) 3857 | (1 << UCharacterCategory.MODIFIER_LETTER) 3858 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 3859 } 3860 3861 /** 3862 * Determines if the specified code point is a letter or digit. 3863 * {@icunote} This method, unlike java.lang.Character does not regard the ascii 3864 * characters 'A' - 'Z' and 'a' - 'z' as digits. 3865 * @param ch code point to determine if it is a letter or a digit 3866 * @return true if code point is a letter or a digit 3867 * @stable ICU 2.1 3868 */ 3869 public static boolean isLetterOrDigit(int ch) 3870 { 3871 return ((1 << getType(ch)) 3872 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3873 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3874 | (1 << UCharacterCategory.TITLECASE_LETTER) 3875 | (1 << UCharacterCategory.MODIFIER_LETTER) 3876 | (1 << UCharacterCategory.OTHER_LETTER) 3877 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 3878 } 3879 3880 /** 3881 * Compatibility override of Java deprecated method. This 3882 * method will always remain deprecated. Delegates to 3883 * java.lang.Character.isJavaIdentifierStart. 3884 * @param cp the code point 3885 * @return true if the code point can start a java identifier. 3886 * @deprecated ICU 3.4 (Java) 3887 */ 3888 @Deprecated 3889 public static boolean isJavaLetter(int cp) { 3890 return isJavaIdentifierStart(cp); 3891 } 3892 3893 /** 3894 * Compatibility override of Java deprecated method. This 3895 * method will always remain deprecated. Delegates to 3896 * java.lang.Character.isJavaIdentifierPart. 3897 * @param cp the code point 3898 * @return true if the code point can continue a java identifier. 3899 * @deprecated ICU 3.4 (Java) 3900 */ 3901 @Deprecated 3902 public static boolean isJavaLetterOrDigit(int cp) { 3903 return isJavaIdentifierPart(cp); 3904 } 3905 3906 /** 3907 * Compatibility override of Java method, delegates to 3908 * java.lang.Character.isJavaIdentifierStart. 3909 * @param cp the code point 3910 * @return true if the code point can start a java identifier. 3911 * @stable ICU 3.4 3912 */ 3913 public static boolean isJavaIdentifierStart(int cp) { 3914 // note, downcast to char for jdk 1.4 compatibility 3915 return java.lang.Character.isJavaIdentifierStart((char)cp); 3916 } 3917 3918 /** 3919 * Compatibility override of Java method, delegates to 3920 * java.lang.Character.isJavaIdentifierPart. 3921 * @param cp the code point 3922 * @return true if the code point can continue a java identifier. 3923 * @stable ICU 3.4 3924 */ 3925 public static boolean isJavaIdentifierPart(int cp) { 3926 // note, downcast to char for jdk 1.4 compatibility 3927 return java.lang.Character.isJavaIdentifierPart((char)cp); 3928 } 3929 3930 /** 3931 * Determines if the specified code point is a lowercase character. 3932 * UnicodeData only contains case mappings for code points where they are 3933 * one-to-one mappings; it also omits information about context-sensitive 3934 * case mappings.<br> For more information about Unicode case mapping 3935 * please refer to the 3936 * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 3937 * #21</a>.<br> 3938 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 3939 * @param ch code point to determine if it is in lowercase 3940 * @return true if code point is a lowercase character 3941 * @stable ICU 2.1 3942 */ 3943 public static boolean isLowerCase(int ch) 3944 { 3945 // if props == 0, it will just fall through and return false 3946 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 3947 } 3948 3949 /** 3950 * Determines if the specified code point is a white space character. 3951 * A code point is considered to be an whitespace character if and only 3952 * if it satisfies one of the following criteria: 3953 * <ul> 3954 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 3955 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 3956 * <li> It is \u0009, HORIZONTAL TABULATION. 3957 * <li> It is \u000A, LINE FEED. 3958 * <li> It is \u000B, VERTICAL TABULATION. 3959 * <li> It is \u000C, FORM FEED. 3960 * <li> It is \u000D, CARRIAGE RETURN. 3961 * <li> It is \u001C, FILE SEPARATOR. 3962 * <li> It is \u001D, GROUP SEPARATOR. 3963 * <li> It is \u001E, RECORD SEPARATOR. 3964 * <li> It is \u001F, UNIT SEPARATOR. 3965 * </ul> 3966 * 3967 * This API tries to sync with the semantics of Java's 3968 * java.lang.Character.isWhitespace(), but it may not return 3969 * the exact same results because of the Unicode version 3970 * difference. 3971 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 3972 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 3973 * See http://www.unicode.org/versions/Unicode4.0.1/ 3974 * @param ch code point to determine if it is a white space 3975 * @return true if the specified code point is a white space character 3976 * @stable ICU 2.1 3977 */ 3978 public static boolean isWhitespace(int ch) 3979 { 3980 // exclude no-break spaces 3981 // if props == 0, it will just fall through and return false 3982 return ((1 << getType(ch)) & 3983 ((1 << UCharacterCategory.SPACE_SEPARATOR) 3984 | (1 << UCharacterCategory.LINE_SEPARATOR) 3985 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 3986 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 3987 // TAB VT LF FF CR FS GS RS US NL are all control characters 3988 // that are white spaces. 3989 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 3990 } 3991 3992 /** 3993 * Determines if the specified code point is a Unicode specified space 3994 * character, i.e. if code point is in the category Zs, Zl and Zp. 3995 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 3996 * @param ch code point to determine if it is a space 3997 * @return true if the specified code point is a space character 3998 * @stable ICU 2.1 3999 */ 4000 public static boolean isSpaceChar(int ch) 4001 { 4002 // if props == 0, it will just fall through and return false 4003 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 4004 | (1 << UCharacterCategory.LINE_SEPARATOR) 4005 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 4006 != 0; 4007 } 4008 4009 /** 4010 * Determines if the specified code point is a titlecase character. 4011 * UnicodeData only contains case mappings for code points where they are 4012 * one-to-one mappings; it also omits information about context-sensitive 4013 * case mappings.<br> 4014 * For more information about Unicode case mapping please refer to the 4015 * <a href=http://www.unicode.org/unicode/reports/tr21/> 4016 * Technical report #21</a>.<br> 4017 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 4018 * @param ch code point to determine if it is in title case 4019 * @return true if the specified code point is a titlecase character 4020 * @stable ICU 2.1 4021 */ 4022 public static boolean isTitleCase(int ch) 4023 { 4024 // if props == 0, it will just fall through and return false 4025 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 4026 } 4027 4028 /** 4029 * Determines if the specified code point may be any part of a Unicode 4030 * identifier other than the starting character. 4031 * A code point may be part of a Unicode identifier if and only if it is 4032 * one of the following: 4033 * <ul> 4034 * <li> Lu Uppercase letter 4035 * <li> Ll Lowercase letter 4036 * <li> Lt Titlecase letter 4037 * <li> Lm Modifier letter 4038 * <li> Lo Other letter 4039 * <li> Nl Letter number 4040 * <li> Pc Connecting punctuation character 4041 * <li> Nd decimal number 4042 * <li> Mc Spacing combining mark 4043 * <li> Mn Non-spacing mark 4044 * <li> Cf formatting code 4045 * </ul> 4046 * Up-to-date Unicode implementation of 4047 * java.lang.Character.isUnicodeIdentifierPart().<br> 4048 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4049 * @param ch code point to determine if is can be part of a Unicode 4050 * identifier 4051 * @return true if code point is any character belonging a unicode 4052 * identifier suffix after the first character 4053 * @stable ICU 2.1 4054 */ 4055 public static boolean isUnicodeIdentifierPart(int ch) 4056 { 4057 // if props == 0, it will just fall through and return false 4058 // cat == format 4059 return ((1 << getType(ch)) 4060 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4061 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4062 | (1 << UCharacterCategory.TITLECASE_LETTER) 4063 | (1 << UCharacterCategory.MODIFIER_LETTER) 4064 | (1 << UCharacterCategory.OTHER_LETTER) 4065 | (1 << UCharacterCategory.LETTER_NUMBER) 4066 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 4067 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 4068 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 4069 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 4070 || isIdentifierIgnorable(ch); 4071 } 4072 4073 /** 4074 * Determines if the specified code point is permissible as the first 4075 * character in a Unicode identifier. 4076 * A code point may start a Unicode identifier if it is of type either 4077 * <ul> 4078 * <li> Lu Uppercase letter 4079 * <li> Ll Lowercase letter 4080 * <li> Lt Titlecase letter 4081 * <li> Lm Modifier letter 4082 * <li> Lo Other letter 4083 * <li> Nl Letter number 4084 * </ul> 4085 * Up-to-date Unicode implementation of 4086 * java.lang.Character.isUnicodeIdentifierStart().<br> 4087 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4088 * @param ch code point to determine if it can start a Unicode identifier 4089 * @return true if code point is the first character belonging a unicode 4090 * identifier 4091 * @stable ICU 2.1 4092 */ 4093 public static boolean isUnicodeIdentifierStart(int ch) 4094 { 4095 /*int cat = getType(ch);*/ 4096 // if props == 0, it will just fall through and return false 4097 return ((1 << getType(ch)) 4098 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4099 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4100 | (1 << UCharacterCategory.TITLECASE_LETTER) 4101 | (1 << UCharacterCategory.MODIFIER_LETTER) 4102 | (1 << UCharacterCategory.OTHER_LETTER) 4103 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 4104 } 4105 4106 /** 4107 * Determines if the specified code point should be regarded as an 4108 * ignorable character in a Java identifier. 4109 * A character is Java-identifier-ignorable if it has the general category 4110 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 4111 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 4112 * Up-to-date Unicode implementation of 4113 * java.lang.Character.isIdentifierIgnorable().<br> 4114 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4115 * <p>Note that Unicode just recommends to ignore Cf (format controls). 4116 * @param ch code point to be determined if it can be ignored in a Unicode 4117 * identifier. 4118 * @return true if the code point is ignorable 4119 * @stable ICU 2.1 4120 */ 4121 public static boolean isIdentifierIgnorable(int ch) 4122 { 4123 // see java.lang.Character.isIdentifierIgnorable() on range of 4124 // ignorable characters. 4125 if (ch <= 0x9f) { 4126 return isISOControl(ch) 4127 && !((ch >= 0x9 && ch <= 0xd) 4128 || (ch >= 0x1c && ch <= 0x1f)); 4129 } 4130 return getType(ch) == UCharacterCategory.FORMAT; 4131 } 4132 4133 /** 4134 * Determines if the specified code point is an uppercase character. 4135 * UnicodeData only contains case mappings for code point where they are 4136 * one-to-one mappings; it also omits information about context-sensitive 4137 * case mappings.<br> 4138 * For language specific case conversion behavior, use 4139 * toUpperCase(locale, str). <br> 4140 * For example, the case conversion for dot-less i and dotted I in Turkish, 4141 * or for final sigma in Greek. 4142 * For more information about Unicode case mapping please refer to the 4143 * <a href=http://www.unicode.org/unicode/reports/tr21/> 4144 * Technical report #21</a>.<br> 4145 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 4146 * @param ch code point to determine if it is in uppercase 4147 * @return true if the code point is an uppercase character 4148 * @stable ICU 2.1 4149 */ 4150 public static boolean isUpperCase(int ch) 4151 { 4152 // if props == 0, it will just fall through and return false 4153 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 4154 } 4155 4156 /** 4157 * The given code point is mapped to its lowercase equivalent; if the code 4158 * point has no lowercase equivalent, the code point itself is returned. 4159 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 4160 * 4161 * <p>This function only returns the simple, single-code point case mapping. 4162 * Full case mappings should be used whenever possible because they produce 4163 * better results by working on whole strings. 4164 * They take into account the string context and the language and can map 4165 * to a result string with a different length as appropriate. 4166 * Full case mappings are applied by the case mapping functions 4167 * that take String parameters rather than code points (int). 4168 * See also the User Guide chapter on C/POSIX migration: 4169 * http://www.icu-project.org/userguide/posix.html#case_mappings 4170 * 4171 * @param ch code point whose lowercase equivalent is to be retrieved 4172 * @return the lowercase equivalent code point 4173 * @stable ICU 2.1 4174 */ 4175 public static int toLowerCase(int ch) { 4176 return UCaseProps.INSTANCE.tolower(ch); 4177 } 4178 4179 /** 4180 * Converts argument code point and returns a String object representing 4181 * the code point's value in UTF-16 format. 4182 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 4183 * 4184 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 4185 * 4186 * @param ch code point 4187 * @return string representation of the code point, null if code point is not 4188 * defined in unicode 4189 * @stable ICU 2.1 4190 */ 4191 public static String toString(int ch) 4192 { 4193 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4194 return null; 4195 } 4196 4197 if (ch < SUPPLEMENTARY_MIN_VALUE) { 4198 return String.valueOf((char)ch); 4199 } 4200 4201 return new String(Character.toChars(ch)); 4202 } 4203 4204 /** 4205 * Converts the code point argument to titlecase. 4206 * If no titlecase is available, the uppercase is returned. If no uppercase 4207 * is available, the code point itself is returned. 4208 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4209 * 4210 * <p>This function only returns the simple, single-code point case mapping. 4211 * Full case mappings should be used whenever possible because they produce 4212 * better results by working on whole strings. 4213 * They take into account the string context and the language and can map 4214 * to a result string with a different length as appropriate. 4215 * Full case mappings are applied by the case mapping functions 4216 * that take String parameters rather than code points (int). 4217 * See also the User Guide chapter on C/POSIX migration: 4218 * http://www.icu-project.org/userguide/posix.html#case_mappings 4219 * 4220 * @param ch code point whose title case is to be retrieved 4221 * @return titlecase code point 4222 * @stable ICU 2.1 4223 */ 4224 public static int toTitleCase(int ch) { 4225 return UCaseProps.INSTANCE.totitle(ch); 4226 } 4227 4228 /** 4229 * Converts the character argument to uppercase. 4230 * If no uppercase is available, the character itself is returned. 4231 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4232 * 4233 * <p>This function only returns the simple, single-code point case mapping. 4234 * Full case mappings should be used whenever possible because they produce 4235 * better results by working on whole strings. 4236 * They take into account the string context and the language and can map 4237 * to a result string with a different length as appropriate. 4238 * Full case mappings are applied by the case mapping functions 4239 * that take String parameters rather than code points (int). 4240 * See also the User Guide chapter on C/POSIX migration: 4241 * http://www.icu-project.org/userguide/posix.html#case_mappings 4242 * 4243 * @param ch code point whose uppercase is to be retrieved 4244 * @return uppercase code point 4245 * @stable ICU 2.1 4246 */ 4247 public static int toUpperCase(int ch) { 4248 return UCaseProps.INSTANCE.toupper(ch); 4249 } 4250 4251 // extra methods not in java.lang.Character -------------------------- 4252 4253 /** 4254 * {@icu} Determines if the code point is a supplementary character. 4255 * A code point is a supplementary character if and only if it is greater 4256 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4257 * @param ch code point to be determined if it is in the supplementary 4258 * plane 4259 * @return true if code point is a supplementary character 4260 * @stable ICU 2.1 4261 */ 4262 public static boolean isSupplementary(int ch) 4263 { 4264 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4265 ch <= UCharacter.MAX_VALUE; 4266 } 4267 4268 /** 4269 * {@icu} Determines if the code point is in the BMP plane. 4270 * @param ch code point to be determined if it is not a supplementary 4271 * character 4272 * @return true if code point is not a supplementary character 4273 * @stable ICU 2.1 4274 */ 4275 public static boolean isBMP(int ch) 4276 { 4277 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4278 } 4279 4280 /** 4281 * {@icu} Determines whether the specified code point is a printable character 4282 * according to the Unicode standard. 4283 * @param ch code point to be determined if it is printable 4284 * @return true if the code point is a printable character 4285 * @stable ICU 2.1 4286 */ 4287 public static boolean isPrintable(int ch) 4288 { 4289 int cat = getType(ch); 4290 // if props == 0, it will just fall through and return false 4291 return (cat != UCharacterCategory.UNASSIGNED && 4292 cat != UCharacterCategory.CONTROL && 4293 cat != UCharacterCategory.FORMAT && 4294 cat != UCharacterCategory.PRIVATE_USE && 4295 cat != UCharacterCategory.SURROGATE && 4296 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4297 } 4298 4299 /** 4300 * {@icu} Determines whether the specified code point is of base form. 4301 * A code point of base form does not graphically combine with preceding 4302 * characters, and is neither a control nor a format character. 4303 * @param ch code point to be determined if it is of base form 4304 * @return true if the code point is of base form 4305 * @stable ICU 2.1 4306 */ 4307 public static boolean isBaseForm(int ch) 4308 { 4309 int cat = getType(ch); 4310 // if props == 0, it will just fall through and return false 4311 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4312 cat == UCharacterCategory.OTHER_NUMBER || 4313 cat == UCharacterCategory.LETTER_NUMBER || 4314 cat == UCharacterCategory.UPPERCASE_LETTER || 4315 cat == UCharacterCategory.LOWERCASE_LETTER || 4316 cat == UCharacterCategory.TITLECASE_LETTER || 4317 cat == UCharacterCategory.MODIFIER_LETTER || 4318 cat == UCharacterCategory.OTHER_LETTER || 4319 cat == UCharacterCategory.NON_SPACING_MARK || 4320 cat == UCharacterCategory.ENCLOSING_MARK || 4321 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4322 } 4323 4324 /** 4325 * {@icu} Returns the Bidirection property of a code point. 4326 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4327 * property.<br> 4328 * Result returned belongs to the interface 4329 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4330 * @param ch the code point to be determined its direction 4331 * @return direction constant from UCharacterDirection. 4332 * @stable ICU 2.1 4333 */ 4334 public static int getDirection(int ch) 4335 { 4336 return UBiDiProps.INSTANCE.getClass(ch); 4337 } 4338 4339 /** 4340 * Determines whether the code point has the "mirrored" property. 4341 * This property is set for characters that are commonly used in 4342 * Right-To-Left contexts and need to be displayed with a "mirrored" 4343 * glyph. 4344 * @param ch code point whose mirror is to be determined 4345 * @return true if the code point has the "mirrored" property 4346 * @stable ICU 2.1 4347 */ 4348 public static boolean isMirrored(int ch) 4349 { 4350 return UBiDiProps.INSTANCE.isMirrored(ch); 4351 } 4352 4353 /** 4354 * {@icu} Maps the specified code point to a "mirror-image" code point. 4355 * For code points with the "mirrored" property, implementations sometimes 4356 * need a "poor man's" mapping to another code point such that the default 4357 * glyph may serve as the mirror-image of the default glyph of the 4358 * specified code point.<br> 4359 * This is useful for text conversion to and from codepages with visual 4360 * order, and for displays without glyph selection capabilities. 4361 * @param ch code point whose mirror is to be retrieved 4362 * @return another code point that may serve as a mirror-image substitute, 4363 * or ch itself if there is no such mapping or ch does not have the 4364 * "mirrored" property 4365 * @stable ICU 2.1 4366 */ 4367 public static int getMirror(int ch) 4368 { 4369 return UBiDiProps.INSTANCE.getMirror(ch); 4370 } 4371 4372 /** 4373 * {@icu} Maps the specified character to its paired bracket character. 4374 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4375 * Otherwise c itself is returned. 4376 * See http://www.unicode.org/reports/tr9/ 4377 * 4378 * @param c the code point to be mapped 4379 * @return the paired bracket code point, 4380 * or c itself if there is no such mapping 4381 * (Bidi_Paired_Bracket_Type=None) 4382 * 4383 * @see UProperty#BIDI_PAIRED_BRACKET 4384 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4385 * @see #getMirror(int) 4386 * @stable ICU 52 4387 */ 4388 public static int getBidiPairedBracket(int c) { 4389 return UBiDiProps.INSTANCE.getPairedBracket(c); 4390 } 4391 4392 /** 4393 * {@icu} Returns the combining class of the argument codepoint 4394 * @param ch code point whose combining is to be retrieved 4395 * @return the combining class of the codepoint 4396 * @stable ICU 2.1 4397 */ 4398 public static int getCombiningClass(int ch) 4399 { 4400 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4401 } 4402 4403 /** 4404 * {@icu} A code point is illegal if and only if 4405 * <ul> 4406 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4407 * <li> A surrogate value, 0xD800 to 0xDFFF 4408 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4409 * </ul> 4410 * Note: legal does not mean that it is assigned in this version of Unicode. 4411 * @param ch code point to determine if it is a legal code point by itself 4412 * @return true if and only if legal. 4413 * @stable ICU 2.1 4414 */ 4415 public static boolean isLegal(int ch) 4416 { 4417 if (ch < MIN_VALUE) { 4418 return false; 4419 } 4420 if (ch < Character.MIN_SURROGATE) { 4421 return true; 4422 } 4423 if (ch <= Character.MAX_SURROGATE) { 4424 return false; 4425 } 4426 if (UCharacterUtility.isNonCharacter(ch)) { 4427 return false; 4428 } 4429 return (ch <= MAX_VALUE); 4430 } 4431 4432 /** 4433 * {@icu} A string is legal iff all its code points are legal. 4434 * A code point is illegal if and only if 4435 * <ul> 4436 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4437 * <li> A surrogate value, 0xD800 to 0xDFFF 4438 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4439 * </ul> 4440 * Note: legal does not mean that it is assigned in this version of Unicode. 4441 * @param str containing code points to examin 4442 * @return true if and only if legal. 4443 * @stable ICU 2.1 4444 */ 4445 public static boolean isLegal(String str) 4446 { 4447 int size = str.length(); 4448 int codepoint; 4449 for (int i = 0; i < size; i += Character.charCount(codepoint)) 4450 { 4451 codepoint = str.codePointAt(i); 4452 if (!isLegal(codepoint)) { 4453 return false; 4454 } 4455 } 4456 return true; 4457 } 4458 4459 /** 4460 * {@icu} Returns the version of Unicode data used. 4461 * @return the unicode version number used 4462 * @stable ICU 2.1 4463 */ 4464 public static VersionInfo getUnicodeVersion() 4465 { 4466 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 4467 } 4468 4469 /** 4470 * {@icu} Returns the most current Unicode name of the argument code point, or 4471 * null if the character is unassigned or outside the range 4472 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4473 * <br> 4474 * Note calling any methods related to code point names, e.g. get*Name*() 4475 * incurs a one-time initialisation cost to construct the name tables. 4476 * @param ch the code point for which to get the name 4477 * @return most current Unicode name 4478 * @stable ICU 2.1 4479 */ 4480 public static String getName(int ch) 4481 { 4482 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 4483 } 4484 4485 /** 4486 * {@icu} Returns the names for each of the characters in a string 4487 * @param s string to format 4488 * @param separator string to go between names 4489 * @return string of names 4490 * @stable ICU 3.8 4491 */ 4492 public static String getName(String s, String separator) { 4493 if (s.length() == 1) { // handle common case 4494 return getName(s.charAt(0)); 4495 } 4496 int cp; 4497 StringBuilder sb = new StringBuilder(); 4498 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 4499 cp = s.codePointAt(i); 4500 if (i != 0) sb.append(separator); 4501 sb.append(UCharacter.getName(cp)); 4502 } 4503 return sb.toString(); 4504 } 4505 4506 /** 4507 * {@icu} Returns null. 4508 * Used to return the Unicode_1_Name property value which was of little practical value. 4509 * @param ch the code point for which to get the name 4510 * @return null 4511 * @deprecated ICU 49 4512 */ 4513 @Deprecated 4514 public static String getName1_0(int ch) 4515 { 4516 return null; 4517 } 4518 4519 /** 4520 * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and 4521 * getName1_0(int), this method will return a name even for codepoints that 4522 * are not assigned a name in UnicodeData.txt. 4523 * 4524 * <p>The names are returned in the following order. 4525 * <ul> 4526 * <li> Most current Unicode name if there is any 4527 * <li> Unicode 1.0 name if there is any 4528 * <li> Extended name in the form of 4529 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 4530 * </ul> 4531 * Note calling any methods related to code point names, e.g. get*Name*() 4532 * incurs a one-time initialisation cost to construct the name tables. 4533 * @param ch the code point for which to get the name 4534 * @return a name for the argument codepoint 4535 * @stable ICU 2.6 4536 */ 4537 public static String getExtendedName(int ch) { 4538 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 4539 } 4540 4541 /** 4542 * {@icu} Returns the corrected name from NameAliases.txt if there is one. 4543 * Returns null if the character is unassigned or outside the range 4544 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4545 * <br> 4546 * Note calling any methods related to code point names, e.g. get*Name*() 4547 * incurs a one-time initialisation cost to construct the name tables. 4548 * @param ch the code point for which to get the name alias 4549 * @return Unicode name alias, or null 4550 * @stable ICU 4.4 4551 */ 4552 public static String getNameAlias(int ch) 4553 { 4554 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 4555 } 4556 4557 /** 4558 * {@icu} Returns null. 4559 * Used to return the ISO 10646 comment for a character. 4560 * The Unicode ISO_Comment property is deprecated and has no values. 4561 * 4562 * @param ch The code point for which to get the ISO comment. 4563 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 4564 * @return null 4565 * @deprecated ICU 49 4566 */ 4567 @Deprecated 4568 public static String getISOComment(int ch) 4569 { 4570 return null; 4571 } 4572 4573 /** 4574 * {@icu} <p>Finds a Unicode code point by its most current Unicode name and 4575 * return its code point value. All Unicode names are in uppercase. 4576 * Note calling any methods related to code point names, e.g. get*Name*() 4577 * incurs a one-time initialisation cost to construct the name tables. 4578 * @param name most current Unicode character name whose code point is to 4579 * be returned 4580 * @return code point or -1 if name is not found 4581 * @stable ICU 2.1 4582 */ 4583 public static int getCharFromName(String name){ 4584 return UCharacterName.INSTANCE.getCharFromName( 4585 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 4586 } 4587 4588 /** 4589 * {@icu} Returns -1. 4590 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 4591 * its code point value. 4592 * @param name Unicode 1.0 code point name whose code point is to be 4593 * returned 4594 * @return -1 4595 * @deprecated ICU 49 4596 * @see #getName1_0(int) 4597 */ 4598 @Deprecated 4599 public static int getCharFromName1_0(String name){ 4600 return -1; 4601 } 4602 4603 /** 4604 * {@icu} <p>Find a Unicode character by either its name and return its code 4605 * point value. All Unicode names are in uppercase. 4606 * Extended names are all lowercase except for numbers and are contained 4607 * within angle brackets. 4608 * The names are searched in the following order 4609 * <ul> 4610 * <li> Most current Unicode name if there is any 4611 * <li> Unicode 1.0 name if there is any 4612 * <li> Extended name in the form of 4613 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 4614 * </ul> 4615 * Note calling any methods related to code point names, e.g. get*Name*() 4616 * incurs a one-time initialisation cost to construct the name tables. 4617 * @param name codepoint name 4618 * @return code point associated with the name or -1 if the name is not 4619 * found. 4620 * @stable ICU 2.6 4621 */ 4622 public static int getCharFromExtendedName(String name){ 4623 return UCharacterName.INSTANCE.getCharFromName( 4624 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 4625 } 4626 4627 /** 4628 * {@icu} <p>Find a Unicode character by its corrected name alias and return 4629 * its code point value. All Unicode names are in uppercase. 4630 * Note calling any methods related to code point names, e.g. get*Name*() 4631 * incurs a one-time initialisation cost to construct the name tables. 4632 * @param name Unicode name alias whose code point is to be returned 4633 * @return code point or -1 if name is not found 4634 * @stable ICU 4.4 4635 */ 4636 public static int getCharFromNameAlias(String name){ 4637 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 4638 } 4639 4640 /** 4641 * {@icu} Return the Unicode name for a given property, as given in the 4642 * Unicode database file PropertyAliases.txt. Most properties 4643 * have more than one name. The nameChoice determines which one 4644 * is returned. 4645 * 4646 * In addition, this function maps the property 4647 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 4648 * "General_Category_Mask". These names are not in 4649 * PropertyAliases.txt. 4650 * 4651 * @param property UProperty selector. 4652 * 4653 * @param nameChoice UProperty.NameChoice selector for which name 4654 * to get. All properties have a long name. Most have a short 4655 * name, but some do not. Unicode allows for additional names; if 4656 * present these will be returned by UProperty.NameChoice.LONG + i, 4657 * where i=1, 2,... 4658 * 4659 * @return a name, or null if Unicode explicitly defines no name 4660 * ("n/a") for a given property/nameChoice. If a given nameChoice 4661 * throws an exception, then all larger values of nameChoice will 4662 * throw an exception. If null is returned for a given 4663 * nameChoice, then other nameChoice values may return non-null 4664 * results. 4665 * 4666 * @exception IllegalArgumentException thrown if property or 4667 * nameChoice are invalid. 4668 * 4669 * @see UProperty 4670 * @see UProperty.NameChoice 4671 * @stable ICU 2.4 4672 */ 4673 public static String getPropertyName(int property, 4674 int nameChoice) { 4675 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 4676 } 4677 4678 /** 4679 * {@icu} Return the UProperty selector for a given property name, as 4680 * specified in the Unicode database file PropertyAliases.txt. 4681 * Short, long, and any other variants are recognized. 4682 * 4683 * In addition, this function maps the synthetic names "gcm" / 4684 * "General_Category_Mask" to the property 4685 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 4686 * PropertyAliases.txt. 4687 * 4688 * @param propertyAlias the property name to be matched. The name 4689 * is compared using "loose matching" as described in 4690 * PropertyAliases.txt. 4691 * 4692 * @return a UProperty enum. 4693 * 4694 * @exception IllegalArgumentException thrown if propertyAlias 4695 * is not recognized. 4696 * 4697 * @see UProperty 4698 * @stable ICU 2.4 4699 */ 4700 public static int getPropertyEnum(CharSequence propertyAlias) { 4701 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 4702 if (propEnum == UProperty.UNDEFINED) { 4703 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 4704 } 4705 return propEnum; 4706 } 4707 4708 /** 4709 * {@icu} Return the Unicode name for a given property value, as given in 4710 * the Unicode database file PropertyValueAliases.txt. Most 4711 * values have more than one name. The nameChoice determines 4712 * which one is returned. 4713 * 4714 * Note: Some of the names in PropertyValueAliases.txt can only be 4715 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 4716 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4717 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4718 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4719 * 4720 * @param property UProperty selector constant. 4721 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4722 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4723 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4724 * If out of range, null is returned. 4725 * 4726 * @param value selector for a value for the given property. In 4727 * general, valid values range from 0 up to some maximum. There 4728 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 4729 * non-zero value BASIC_LATIN.getID(). (2.) 4730 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 4731 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 4732 * are mask values produced by left-shifting 1 by 4733 * UCharacter.getType(). This allows grouped categories such as 4734 * [:L:] to be represented. Mask values are non-contiguous. 4735 * 4736 * @param nameChoice UProperty.NameChoice selector for which name 4737 * to get. All values have a long name. Most have a short name, 4738 * but some do not. Unicode allows for additional names; if 4739 * present these will be returned by UProperty.NameChoice.LONG + i, 4740 * where i=1, 2,... 4741 * 4742 * @return a name, or null if Unicode explicitly defines no name 4743 * ("n/a") for a given property/value/nameChoice. If a given 4744 * nameChoice throws an exception, then all larger values of 4745 * nameChoice will throw an exception. If null is returned for a 4746 * given nameChoice, then other nameChoice values may return 4747 * non-null results. 4748 * 4749 * @exception IllegalArgumentException thrown if property, value, 4750 * or nameChoice are invalid. 4751 * 4752 * @see UProperty 4753 * @see UProperty.NameChoice 4754 * @stable ICU 2.4 4755 */ 4756 public static String getPropertyValueName(int property, 4757 int value, 4758 int nameChoice) 4759 { 4760 if ((property == UProperty.CANONICAL_COMBINING_CLASS 4761 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 4762 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 4763 && value >= UCharacter.getIntPropertyMinValue( 4764 UProperty.CANONICAL_COMBINING_CLASS) 4765 && value <= UCharacter.getIntPropertyMaxValue( 4766 UProperty.CANONICAL_COMBINING_CLASS) 4767 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 4768 // this is hard coded for the valid cc 4769 // because PropertyValueAliases.txt does not contain all of them 4770 try { 4771 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 4772 nameChoice); 4773 } 4774 catch (IllegalArgumentException e) { 4775 return null; 4776 } 4777 } 4778 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 4779 } 4780 4781 /** 4782 * {@icu} Return the property value integer for a given value name, as 4783 * specified in the Unicode database file PropertyValueAliases.txt. 4784 * Short, long, and any other variants are recognized. 4785 * 4786 * Note: Some of the names in PropertyValueAliases.txt will only be 4787 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 4788 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4789 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4790 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4791 * 4792 * @param property UProperty selector constant. 4793 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4794 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4795 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4796 * Only these properties can be enumerated. 4797 * 4798 * @param valueAlias the value name to be matched. The name is 4799 * compared using "loose matching" as described in 4800 * PropertyValueAliases.txt. 4801 * 4802 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 4803 * values are mask values produced by left-shifting 1 by 4804 * UCharacter.getType(). This allows grouped categories such as 4805 * [:L:] to be represented. 4806 * 4807 * @see UProperty 4808 * @throws IllegalArgumentException if property is not a valid UProperty 4809 * selector or valueAlias is not a value of this property 4810 * @stable ICU 2.4 4811 */ 4812 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 4813 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 4814 if (propEnum == UProperty.UNDEFINED) { 4815 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 4816 } 4817 return propEnum; 4818 } 4819 4820 /** 4821 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 4822 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 4823 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 4824 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 4825 * @internal 4826 * @deprecated This API is ICU internal only. 4827 */ 4828 @Deprecated 4829 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 4830 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 4831 } 4832 4833 4834 /** 4835 * {@icu} Returns a code point corresponding to the two surrogate code units. 4836 * 4837 * @param lead the lead char 4838 * @param trail the trail char 4839 * @return code point if surrogate characters are valid. 4840 * @exception IllegalArgumentException thrown when the code units do 4841 * not form a valid code point 4842 * @stable ICU 2.1 4843 */ 4844 public static int getCodePoint(char lead, char trail) 4845 { 4846 if (Character.isSurrogatePair(lead, trail)) { 4847 return Character.toCodePoint(lead, trail); 4848 } 4849 throw new IllegalArgumentException("Illegal surrogate characters"); 4850 } 4851 4852 /** 4853 * {@icu} Returns the code point corresponding to the BMP code point. 4854 * 4855 * @param char16 the BMP code point 4856 * @return code point if argument is a valid character. 4857 * @exception IllegalArgumentException thrown when char16 is not a valid 4858 * code point 4859 * @stable ICU 2.1 4860 */ 4861 public static int getCodePoint(char char16) 4862 { 4863 if (UCharacter.isLegal(char16)) { 4864 return char16; 4865 } 4866 throw new IllegalArgumentException("Illegal codepoint"); 4867 } 4868 4869 /** 4870 * Returns the uppercase version of the argument string. 4871 * Casing is dependent on the default locale and context-sensitive. 4872 * @param str source string to be performed on 4873 * @return uppercase version of the argument string 4874 * @stable ICU 2.1 4875 */ 4876 public static String toUpperCase(String str) 4877 { 4878 return toUpperCase(getDefaultCaseLocale(), str); 4879 } 4880 4881 /** 4882 * Returns the lowercase version of the argument string. 4883 * Casing is dependent on the default locale and context-sensitive 4884 * @param str source string to be performed on 4885 * @return lowercase version of the argument string 4886 * @stable ICU 2.1 4887 */ 4888 public static String toLowerCase(String str) 4889 { 4890 return toLowerCase(getDefaultCaseLocale(), str); 4891 } 4892 4893 /** 4894 * <p>Returns the titlecase version of the argument string. 4895 * <p>Position for titlecasing is determined by the argument break 4896 * iterator, hence the user can customize his break iterator for 4897 * a specialized titlecasing. In this case only the forward iteration 4898 * needs to be implemented. 4899 * If the break iterator passed in is null, the default Unicode algorithm 4900 * will be used to determine the titlecase positions. 4901 * 4902 * <p>Only positions returned by the break iterator will be title cased, 4903 * character in between the positions will all be in lower case. 4904 * <p>Casing is dependent on the default locale and context-sensitive 4905 * @param str source string to be performed on 4906 * @param breakiter break iterator to determine the positions in which 4907 * the character should be title cased. 4908 * @return lowercase version of the argument string 4909 * @stable ICU 2.6 4910 */ 4911 public static String toTitleCase(String str, BreakIterator breakiter) 4912 { 4913 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 4914 } 4915 4916 private static int getDefaultCaseLocale() { 4917 return UCaseProps.getCaseLocale(Locale.getDefault()); 4918 } 4919 4920 private static int getCaseLocale(Locale locale) { 4921 if (locale == null) { 4922 locale = Locale.getDefault(); 4923 } 4924 return UCaseProps.getCaseLocale(locale); 4925 } 4926 4927 private static int getCaseLocale(ULocale locale) { 4928 if (locale == null) { 4929 locale = ULocale.getDefault(); 4930 } 4931 return UCaseProps.getCaseLocale(locale); 4932 } 4933 4934 private static String toLowerCase(int caseLocale, String str) { 4935 if (str.length() <= 100) { 4936 if (str.isEmpty()) { 4937 return str; 4938 } 4939 // Collect and apply only changes. 4940 // Good if no or few changes. Bad (slow) if many changes. 4941 Edits edits = new Edits(); 4942 StringBuilder replacementChars = CaseMapImpl.toLower( 4943 caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); 4944 return applyEdits(str, replacementChars, edits); 4945 } else { 4946 return CaseMapImpl.toLower(caseLocale, 0, str, 4947 new StringBuilder(str.length()), null).toString(); 4948 } 4949 } 4950 4951 private static String toUpperCase(int caseLocale, String str) { 4952 if (str.length() <= 100) { 4953 if (str.isEmpty()) { 4954 return str; 4955 } 4956 // Collect and apply only changes. 4957 // Good if no or few changes. Bad (slow) if many changes. 4958 Edits edits = new Edits(); 4959 StringBuilder replacementChars = CaseMapImpl.toUpper( 4960 caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); 4961 return applyEdits(str, replacementChars, edits); 4962 } else { 4963 return CaseMapImpl.toUpper(caseLocale, 0, str, 4964 new StringBuilder(str.length()), null).toString(); 4965 } 4966 } 4967 4968 private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) { 4969 if (str.length() <= 100) { 4970 if (str.isEmpty()) { 4971 return str; 4972 } 4973 // Collect and apply only changes. 4974 // Good if no or few changes. Bad (slow) if many changes. 4975 Edits edits = new Edits(); 4976 StringBuilder replacementChars = CaseMapImpl.toTitle( 4977 caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str, 4978 new StringBuilder(), edits); 4979 return applyEdits(str, replacementChars, edits); 4980 } else { 4981 return CaseMapImpl.toTitle(caseLocale, options, titleIter, str, 4982 new StringBuilder(str.length()), null).toString(); 4983 } 4984 } 4985 4986 private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) { 4987 if (!edits.hasChanges()) { 4988 return str; 4989 } 4990 StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta()); 4991 for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { 4992 if (ei.hasChange()) { 4993 int i = ei.replacementIndex(); 4994 result.append(replacementChars, i, i + ei.newLength()); 4995 } else { 4996 int i = ei.sourceIndex(); 4997 result.append(str, i, i + ei.oldLength()); 4998 } 4999 } 5000 return result.toString(); 5001 } 5002 5003 /** 5004 * Returns the uppercase version of the argument string. 5005 * Casing is dependent on the argument locale and context-sensitive. 5006 * @param locale which string is to be converted in 5007 * @param str source string to be performed on 5008 * @return uppercase version of the argument string 5009 * @stable ICU 2.1 5010 */ 5011 public static String toUpperCase(Locale locale, String str) 5012 { 5013 return toUpperCase(getCaseLocale(locale), str); 5014 } 5015 5016 /** 5017 * Returns the uppercase version of the argument string. 5018 * Casing is dependent on the argument locale and context-sensitive. 5019 * @param locale which string is to be converted in 5020 * @param str source string to be performed on 5021 * @return uppercase version of the argument string 5022 * @stable ICU 3.2 5023 */ 5024 public static String toUpperCase(ULocale locale, String str) { 5025 return toUpperCase(getCaseLocale(locale), str); 5026 } 5027 5028 /** 5029 * Returns the lowercase version of the argument string. 5030 * Casing is dependent on the argument locale and context-sensitive 5031 * @param locale which string is to be converted in 5032 * @param str source string to be performed on 5033 * @return lowercase version of the argument string 5034 * @stable ICU 2.1 5035 */ 5036 public static String toLowerCase(Locale locale, String str) 5037 { 5038 return toLowerCase(getCaseLocale(locale), str); 5039 } 5040 5041 /** 5042 * Returns the lowercase version of the argument string. 5043 * Casing is dependent on the argument locale and context-sensitive 5044 * @param locale which string is to be converted in 5045 * @param str source string to be performed on 5046 * @return lowercase version of the argument string 5047 * @stable ICU 3.2 5048 */ 5049 public static String toLowerCase(ULocale locale, String str) { 5050 return toLowerCase(getCaseLocale(locale), str); 5051 } 5052 5053 /** 5054 * <p>Returns the titlecase version of the argument string. 5055 * <p>Position for titlecasing is determined by the argument break 5056 * iterator, hence the user can customize his break iterator for 5057 * a specialized titlecasing. In this case only the forward iteration 5058 * needs to be implemented. 5059 * If the break iterator passed in is null, the default Unicode algorithm 5060 * will be used to determine the titlecase positions. 5061 * 5062 * <p>Only positions returned by the break iterator will be title cased, 5063 * character in between the positions will all be in lower case. 5064 * <p>Casing is dependent on the argument locale and context-sensitive 5065 * @param locale which string is to be converted in 5066 * @param str source string to be performed on 5067 * @param breakiter break iterator to determine the positions in which 5068 * the character should be title cased. 5069 * @return lowercase version of the argument string 5070 * @stable ICU 2.6 5071 */ 5072 public static String toTitleCase(Locale locale, String str, 5073 BreakIterator breakiter) 5074 { 5075 return toTitleCase(locale, str, breakiter, 0); 5076 } 5077 5078 /** 5079 * <p>Returns the titlecase version of the argument string. 5080 * <p>Position for titlecasing is determined by the argument break 5081 * iterator, hence the user can customize his break iterator for 5082 * a specialized titlecasing. In this case only the forward iteration 5083 * needs to be implemented. 5084 * If the break iterator passed in is null, the default Unicode algorithm 5085 * will be used to determine the titlecase positions. 5086 * 5087 * <p>Only positions returned by the break iterator will be title cased, 5088 * character in between the positions will all be in lower case. 5089 * <p>Casing is dependent on the argument locale and context-sensitive 5090 * @param locale which string is to be converted in 5091 * @param str source string to be performed on 5092 * @param titleIter break iterator to determine the positions in which 5093 * the character should be title cased. 5094 * @return lowercase version of the argument string 5095 * @stable ICU 3.2 5096 */ 5097 public static String toTitleCase(ULocale locale, String str, 5098 BreakIterator titleIter) { 5099 return toTitleCase(locale, str, titleIter, 0); 5100 } 5101 5102 /** 5103 * <p>Returns the titlecase version of the argument string. 5104 * <p>Position for titlecasing is determined by the argument break 5105 * iterator, hence the user can customize his break iterator for 5106 * a specialized titlecasing. In this case only the forward iteration 5107 * needs to be implemented. 5108 * If the break iterator passed in is null, the default Unicode algorithm 5109 * will be used to determine the titlecase positions. 5110 * 5111 * <p>Only positions returned by the break iterator will be title cased, 5112 * character in between the positions will all be in lower case. 5113 * <p>Casing is dependent on the argument locale and context-sensitive 5114 * @param locale which string is to be converted in 5115 * @param str source string to be performed on 5116 * @param titleIter break iterator to determine the positions in which 5117 * the character should be title cased. 5118 * @param options bit set to modify the titlecasing operation 5119 * @return lowercase version of the argument string 5120 * @stable ICU 3.8 5121 * @see #TITLECASE_NO_LOWERCASE 5122 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5123 */ 5124 public static String toTitleCase(ULocale locale, String str, 5125 BreakIterator titleIter, int options) { 5126 if(titleIter == null) { 5127 if (locale == null) { 5128 locale = ULocale.getDefault(); 5129 } 5130 titleIter = BreakIterator.getWordInstance(locale); 5131 } 5132 titleIter.setText(str); 5133 return toTitleCase(getCaseLocale(locale), options, titleIter, str); 5134 } 5135 5136 5137 private static final int BREAK_MASK = 5138 (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER) 5139 | (1<<UCharacterCategory.OTHER_LETTER) 5140 | (1<<UCharacterCategory.MODIFIER_LETTER); 5141 5142 /** 5143 * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string, 5144 * and sometimes has no effect at all; the original string is returned whenever casing 5145 * would not be appropriate for the first word (such as for CJK characters or initial numbers). 5146 * Initial non-letters are skipped in order to find the character to change. 5147 * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE. 5148 * <p>Examples: 5149 * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr> 5150 * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr> 5151 * <tr><td>contact us</td><td>Contact us</td></tr> 5152 * <tr><td>49ers win!</td><td>49ers win!</td></tr> 5153 * <tr><td>(abc)</td><td>(abc)</td></tr> 5154 * <tr><td>ijs</td><td>Ijs</td></tr> 5155 * <tr><td>ijs</td><td>IJs</td><td>nl-BE</td></tr> 5156 * <tr><td>ijs</td><td>js</td><td>tr-DE</td></tr> 5157 * </table> 5158 * @param locale the locale for accessing exceptional behavior (eg for tr). 5159 * @param str the source string to change 5160 * @return the modified string, or the original if no modifications were necessary. 5161 * @internal 5162 * @deprecated ICU internal only 5163 */ 5164 @Deprecated 5165 public static String toTitleFirst(ULocale locale, String str) { 5166 int c = 0; 5167 for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) { 5168 c = UCharacter.codePointAt(str, i); 5169 int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK); 5170 if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK 5171 break; 5172 } 5173 if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) { 5174 continue; 5175 } 5176 5177 // we now have the first cased character 5178 // What we really want is something like: 5179 // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken); 5180 // That is, just give us the titlecased string, for the locale, at i and following, 5181 // and tell us how many characters are replaced. 5182 // The following won't work completely: it needs some more substantial changes to UCaseProps 5183 5184 String substring = str.substring(i, i+UCharacter.charCount(c)); 5185 String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0); 5186 5187 // skip if no change 5188 if (titled.codePointAt(0) == c) { 5189 // Using 0 is safe, since any change in titling will not have first initial character 5190 break; 5191 } 5192 StringBuilder result = new StringBuilder(str.length()).append(str, 0, i); 5193 int startOfSuffix; 5194 5195 // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps. 5196 5197 if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') { 5198 result.append("IJ"); 5199 startOfSuffix = 2; 5200 } else { 5201 result.append(titled); 5202 startOfSuffix = i + UCharacter.charCount(c); 5203 } 5204 5205 // add the remainder, and return 5206 return result.append(str, startOfSuffix, str.length()).toString(); 5207 } 5208 return str; // no change 5209 } 5210 5211 /** 5212 * {@icu} <p>Returns the titlecase version of the argument string. 5213 * <p>Position for titlecasing is determined by the argument break 5214 * iterator, hence the user can customize his break iterator for 5215 * a specialized titlecasing. In this case only the forward iteration 5216 * needs to be implemented. 5217 * If the break iterator passed in is null, the default Unicode algorithm 5218 * will be used to determine the titlecase positions. 5219 * 5220 * <p>Only positions returned by the break iterator will be title cased, 5221 * character in between the positions will all be in lower case. 5222 * <p>Casing is dependent on the argument locale and context-sensitive 5223 * @param locale which string is to be converted in 5224 * @param str source string to be performed on 5225 * @param titleIter break iterator to determine the positions in which 5226 * the character should be title cased. 5227 * @param options bit set to modify the titlecasing operation 5228 * @return lowercase version of the argument string 5229 * @see #TITLECASE_NO_LOWERCASE 5230 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5231 * @stable ICU 54 5232 */ 5233 public static String toTitleCase(Locale locale, String str, 5234 BreakIterator titleIter, 5235 int options) { 5236 if(titleIter == null) { 5237 titleIter = BreakIterator.getWordInstance(locale); 5238 } 5239 titleIter.setText(str); 5240 return toTitleCase(getCaseLocale(locale), options, titleIter, str); 5241 } 5242 5243 /** 5244 * {@icu} The given character is mapped to its case folding equivalent according 5245 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5246 * folding equivalent, the character itself is returned. 5247 * 5248 * <p>This function only returns the simple, single-code point case mapping. 5249 * Full case mappings should be used whenever possible because they produce 5250 * better results by working on whole strings. 5251 * They can map to a result string with a different length as appropriate. 5252 * Full case mappings are applied by the case mapping functions 5253 * that take String parameters rather than code points (int). 5254 * See also the User Guide chapter on C/POSIX migration: 5255 * http://www.icu-project.org/userguide/posix.html#case_mappings 5256 * 5257 * @param ch the character to be converted 5258 * @param defaultmapping Indicates whether the default mappings defined in 5259 * CaseFolding.txt are to be used, otherwise the 5260 * mappings for dotted I and dotless i marked with 5261 * 'T' in CaseFolding.txt are included. 5262 * @return the case folding equivalent of the character, if 5263 * any; otherwise the character itself. 5264 * @see #foldCase(String, boolean) 5265 * @stable ICU 2.1 5266 */ 5267 public static int foldCase(int ch, boolean defaultmapping) { 5268 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5269 } 5270 5271 /** 5272 * {@icu} The given string is mapped to its case folding equivalent according to 5273 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5274 * folding equivalent, the character itself is returned. 5275 * "Full", multiple-code point case folding mappings are returned here. 5276 * For "simple" single-code point mappings use the API 5277 * foldCase(int ch, boolean defaultmapping). 5278 * @param str the String to be converted 5279 * @param defaultmapping Indicates whether the default mappings defined in 5280 * CaseFolding.txt are to be used, otherwise the 5281 * mappings for dotted I and dotless i marked with 5282 * 'T' in CaseFolding.txt are included. 5283 * @return the case folding equivalent of the character, if 5284 * any; otherwise the character itself. 5285 * @see #foldCase(int, boolean) 5286 * @stable ICU 2.1 5287 */ 5288 public static String foldCase(String str, boolean defaultmapping) { 5289 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5290 } 5291 5292 /** 5293 * {@icu} Option value for case folding: use default mappings defined in 5294 * CaseFolding.txt. 5295 * @stable ICU 2.6 5296 */ 5297 public static final int FOLD_CASE_DEFAULT = 0x0000; 5298 /** 5299 * {@icu} Option value for case folding: 5300 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 5301 * and dotless i appropriately for Turkic languages (tr, az). 5302 * 5303 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 5304 * are to be included for default mappings and 5305 * excluded for the Turkic-specific mappings. 5306 * 5307 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 5308 * are to be excluded for default mappings and 5309 * included for the Turkic-specific mappings. 5310 * 5311 * @stable ICU 2.6 5312 */ 5313 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 5314 5315 /** 5316 * {@icu} The given character is mapped to its case folding equivalent according 5317 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5318 * folding equivalent, the character itself is returned. 5319 * 5320 * <p>This function only returns the simple, single-code point case mapping. 5321 * Full case mappings should be used whenever possible because they produce 5322 * better results by working on whole strings. 5323 * They can map to a result string with a different length as appropriate. 5324 * Full case mappings are applied by the case mapping functions 5325 * that take String parameters rather than code points (int). 5326 * See also the User Guide chapter on C/POSIX migration: 5327 * http://www.icu-project.org/userguide/posix.html#case_mappings 5328 * 5329 * @param ch the character to be converted 5330 * @param options A bit set for special processing. Currently the recognised options 5331 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5332 * @return the case folding equivalent of the character, if any; otherwise the 5333 * character itself. 5334 * @see #foldCase(String, boolean) 5335 * @stable ICU 2.6 5336 */ 5337 public static int foldCase(int ch, int options) { 5338 return UCaseProps.INSTANCE.fold(ch, options); 5339 } 5340 5341 /** 5342 * {@icu} The given string is mapped to its case folding equivalent according to 5343 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5344 * folding equivalent, the character itself is returned. 5345 * "Full", multiple-code point case folding mappings are returned here. 5346 * For "simple" single-code point mappings use the API 5347 * foldCase(int ch, boolean defaultmapping). 5348 * @param str the String to be converted 5349 * @param options A bit set for special processing. Currently the recognised options 5350 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5351 * @return the case folding equivalent of the character, if any; otherwise the 5352 * character itself. 5353 * @see #foldCase(int, boolean) 5354 * @stable ICU 2.6 5355 */ 5356 public static final String foldCase(String str, int options) { 5357 if (str.length() <= 100) { 5358 if (str.isEmpty()) { 5359 return str; 5360 } 5361 // Collect and apply only changes. 5362 // Good if no or few changes. Bad (slow) if many changes. 5363 Edits edits = new Edits(); 5364 StringBuilder replacementChars = CaseMapImpl.fold( 5365 options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); 5366 return applyEdits(str, replacementChars, edits); 5367 } else { 5368 return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString(); 5369 } 5370 } 5371 5372 /** 5373 * {@icu} Returns the numeric value of a Han character. 5374 * 5375 * <p>This returns the value of Han 'numeric' code points, 5376 * including those for zero, ten, hundred, thousand, ten thousand, 5377 * and hundred million. 5378 * This includes both the standard and 'checkwriting' 5379 * characters, the 'big circle' zero character, and the standard 5380 * zero character. 5381 * 5382 * <p>Note: The Unicode Standard has numeric values for more 5383 * Han characters recognized by this method 5384 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 5385 * and a {@link com.ibm.icu.text.NumberFormat} can be used with 5386 * a Chinese {@link com.ibm.icu.text.NumberingSystem}. 5387 * 5388 * @param ch code point to query 5389 * @return value if it is a Han 'numeric character,' otherwise return -1. 5390 * @stable ICU 2.4 5391 */ 5392 public static int getHanNumericValue(int ch) 5393 { 5394 switch(ch) 5395 { 5396 case IDEOGRAPHIC_NUMBER_ZERO_ : 5397 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 5398 return 0; // Han Zero 5399 case CJK_IDEOGRAPH_FIRST_ : 5400 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5401 return 1; // Han One 5402 case CJK_IDEOGRAPH_SECOND_ : 5403 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5404 return 2; // Han Two 5405 case CJK_IDEOGRAPH_THIRD_ : 5406 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5407 return 3; // Han Three 5408 case CJK_IDEOGRAPH_FOURTH_ : 5409 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5410 return 4; // Han Four 5411 case CJK_IDEOGRAPH_FIFTH_ : 5412 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5413 return 5; // Han Five 5414 case CJK_IDEOGRAPH_SIXTH_ : 5415 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5416 return 6; // Han Six 5417 case CJK_IDEOGRAPH_SEVENTH_ : 5418 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5419 return 7; // Han Seven 5420 case CJK_IDEOGRAPH_EIGHTH_ : 5421 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5422 return 8; // Han Eight 5423 case CJK_IDEOGRAPH_NINETH_ : 5424 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5425 return 9; // Han Nine 5426 case CJK_IDEOGRAPH_TEN_ : 5427 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5428 return 10; 5429 case CJK_IDEOGRAPH_HUNDRED_ : 5430 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5431 return 100; 5432 case CJK_IDEOGRAPH_THOUSAND_ : 5433 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5434 return 1000; 5435 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5436 return 10000; 5437 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5438 return 100000000; 5439 } 5440 return -1; // no value 5441 } 5442 5443 /** 5444 * {@icu} <p>Returns an iterator for character types, iterating over codepoints. 5445 * <p>Example of use:<br> 5446 * <pre> 5447 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5448 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5449 * while (iterator.next(element)) { 5450 * System.out.println("Codepoint \\u" + 5451 * Integer.toHexString(element.start) + 5452 * " to codepoint \\u" + 5453 * Integer.toHexString(element.limit - 1) + 5454 * " has the character type " + 5455 * element.value); 5456 * } 5457 * </pre> 5458 * @return an iterator 5459 * @stable ICU 2.6 5460 */ 5461 public static RangeValueIterator getTypeIterator() 5462 { 5463 return new UCharacterTypeIterator(); 5464 } 5465 5466 private static final class UCharacterTypeIterator implements RangeValueIterator { 5467 UCharacterTypeIterator() { 5468 reset(); 5469 } 5470 5471 // implements RangeValueIterator 5472 @Override 5473 public boolean next(Element element) { 5474 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5475 element.start=range.startCodePoint; 5476 element.limit=range.endCodePoint+1; 5477 element.value=range.value; 5478 return true; 5479 } else { 5480 return false; 5481 } 5482 } 5483 5484 // implements RangeValueIterator 5485 @Override 5486 public void reset() { 5487 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5488 } 5489 5490 private Iterator<Trie2.Range> trieIterator; 5491 private Trie2.Range range; 5492 5493 private static final class MaskType implements Trie2.ValueMapper { 5494 // Extracts the general category ("character type") from the trie value. 5495 @Override 5496 public int map(int value) { 5497 return value & UCharacterProperty.TYPE_MASK; 5498 } 5499 } 5500 private static final MaskType MASK_TYPE=new MaskType(); 5501 } 5502 5503 /** 5504 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5505 * <p>This API only gets the iterator for the modern, most up-to-date 5506 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5507 * for extended names use getExtendedNameIterator(). 5508 * <p>Example of use:<br> 5509 * <pre> 5510 * ValueIterator iterator = UCharacter.getNameIterator(); 5511 * ValueIterator.Element element = new ValueIterator.Element(); 5512 * while (iterator.next(element)) { 5513 * System.out.println("Codepoint \\u" + 5514 * Integer.toHexString(element.codepoint) + 5515 * " has the name " + (String)element.value); 5516 * } 5517 * </pre> 5518 * <p>The maximal range which the name iterator iterates is from 5519 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 5520 * @return an iterator 5521 * @stable ICU 2.6 5522 */ 5523 public static ValueIterator getNameIterator(){ 5524 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5525 UCharacterNameChoice.UNICODE_CHAR_NAME); 5526 } 5527 5528 /** 5529 * {@icu} Returns an empty iterator. 5530 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 5531 * @return an empty iterator 5532 * @deprecated ICU 49 5533 * @see #getName1_0(int) 5534 */ 5535 @Deprecated 5536 public static ValueIterator getName1_0Iterator(){ 5537 return new DummyValueIterator(); 5538 } 5539 5540 private static final class DummyValueIterator implements ValueIterator { 5541 @Override 5542 public boolean next(Element element) { return false; } 5543 @Override 5544 public void reset() {} 5545 @Override 5546 public void setRange(int start, int limit) {} 5547 } 5548 5549 /** 5550 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5551 * <p>This API only gets the iterator for the extended names. 5552 * For modern, most up-to-date Unicode names use getNameIterator() or 5553 * for older 1.0 Unicode names use get1_0NameIterator(). 5554 * <p>Example of use:<br> 5555 * <pre> 5556 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5557 * ValueIterator.Element element = new ValueIterator.Element(); 5558 * while (iterator.next(element)) { 5559 * System.out.println("Codepoint \\u" + 5560 * Integer.toHexString(element.codepoint) + 5561 * " has the name " + (String)element.value); 5562 * } 5563 * </pre> 5564 * <p>The maximal range which the name iterator iterates is from 5565 * @return an iterator 5566 * @stable ICU 2.6 5567 */ 5568 public static ValueIterator getExtendedNameIterator(){ 5569 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5570 UCharacterNameChoice.EXTENDED_CHAR_NAME); 5571 } 5572 5573 /** 5574 * {@icu} Returns the "age" of the code point. 5575 * <p>The "age" is the Unicode version when the code point was first 5576 * designated (as a non-character or for Private Use) or assigned a 5577 * character. 5578 * <p>This can be useful to avoid emitting code points to receiving 5579 * processes that do not accept newer characters. 5580 * <p>The data is from the UCD file DerivedAge.txt. 5581 * @param ch The code point. 5582 * @return the Unicode version number 5583 * @stable ICU 2.6 5584 */ 5585 public static VersionInfo getAge(int ch) 5586 { 5587 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5588 throw new IllegalArgumentException("Codepoint out of bounds"); 5589 } 5590 return UCharacterProperty.INSTANCE.getAge(ch); 5591 } 5592 5593 /** 5594 * {@icu} <p>Check a binary Unicode property for a code point. 5595 * <p>Unicode, especially in version 3.2, defines many more properties 5596 * than the original set in UnicodeData.txt. 5597 * <p>This API is intended to reflect Unicode properties as defined in 5598 * the Unicode Character Database (UCD) and Unicode Technical Reports 5599 * (UTR). 5600 * <p>For details about the properties see 5601 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 5602 * <p>For names of Unicode properties see the UCD file 5603 * PropertyAliases.txt. 5604 * <p>This API does not check the validity of the codepoint. 5605 * <p>Important: If ICU is built with UCD files from Unicode versions 5606 * below 3.2, then properties marked with "new" are not or 5607 * not fully available. 5608 * @param ch code point to test. 5609 * @param property selector constant from com.ibm.icu.lang.UProperty, 5610 * identifies which binary property to check. 5611 * @return true or false according to the binary Unicode property value 5612 * for ch. Also false if property is out of bounds or if the 5613 * Unicode version does not have data for the property at all, or 5614 * not for this code point. 5615 * @see com.ibm.icu.lang.UProperty 5616 * @stable ICU 2.6 5617 */ 5618 public static boolean hasBinaryProperty(int ch, int property) 5619 { 5620 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5621 } 5622 5623 /** 5624 * {@icu} <p>Check if a code point has the Alphabetic Unicode property. 5625 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 5626 * <p>Different from UCharacter.isLetter(ch)! 5627 * @stable ICU 2.6 5628 * @param ch codepoint to be tested 5629 */ 5630 public static boolean isUAlphabetic(int ch) 5631 { 5632 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5633 } 5634 5635 /** 5636 * {@icu} <p>Check if a code point has the Lowercase Unicode property. 5637 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 5638 * <p>This is different from UCharacter.isLowerCase(ch)! 5639 * @param ch codepoint to be tested 5640 * @stable ICU 2.6 5641 */ 5642 public static boolean isULowercase(int ch) 5643 { 5644 return hasBinaryProperty(ch, UProperty.LOWERCASE); 5645 } 5646 5647 /** 5648 * {@icu} <p>Check if a code point has the Uppercase Unicode property. 5649 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 5650 * <p>This is different from UCharacter.isUpperCase(ch)! 5651 * @param ch codepoint to be tested 5652 * @stable ICU 2.6 5653 */ 5654 public static boolean isUUppercase(int ch) 5655 { 5656 return hasBinaryProperty(ch, UProperty.UPPERCASE); 5657 } 5658 5659 /** 5660 * {@icu} <p>Check if a code point has the White_Space Unicode property. 5661 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 5662 * <p>This is different from both UCharacter.isSpace(ch) and 5663 * UCharacter.isWhitespace(ch)! 5664 * @param ch codepoint to be tested 5665 * @stable ICU 2.6 5666 */ 5667 public static boolean isUWhiteSpace(int ch) 5668 { 5669 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 5670 } 5671 5672 /** 5673 * {@icu} <p>Returns the property value for an Unicode property type of a code point. 5674 * Also returns binary and mask property values. 5675 * <p>Unicode, especially in version 3.2, defines many more properties than 5676 * the original set in UnicodeData.txt. 5677 * <p>The properties APIs are intended to reflect Unicode properties as 5678 * defined in the Unicode Character Database (UCD) and Unicode Technical 5679 * Reports (UTR). For details about the properties see 5680 * http://www.unicode.org/. 5681 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 5682 * 5683 * <pre> 5684 * Sample usage: 5685 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 5686 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 5687 * boolean b = (ideo == 1) ? true : false; 5688 * </pre> 5689 * @param ch code point to test. 5690 * @param type UProperty selector constant, identifies which binary 5691 * property to check. Must be 5692 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5693 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5694 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5695 * @return numeric value that is directly the property value or, 5696 * for enumerated properties, corresponds to the numeric value of 5697 * the enumerated constant of the respective property value 5698 * enumeration type (cast to enum type if necessary). 5699 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5700 * Returns a bit-mask for mask properties. 5701 * Returns 0 if 'type' is out of bounds or if the Unicode version 5702 * does not have data for the property at all, or not for this code 5703 * point. 5704 * @see UProperty 5705 * @see #hasBinaryProperty 5706 * @see #getIntPropertyMinValue 5707 * @see #getIntPropertyMaxValue 5708 * @see #getUnicodeVersion 5709 * @stable ICU 2.4 5710 */ 5711 public static int getIntPropertyValue(int ch, int type) 5712 { 5713 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5714 } 5715 /** 5716 * {@icu} Returns a string version of the property value. 5717 * @param propertyEnum The property enum value. 5718 * @param codepoint The codepoint value. 5719 * @param nameChoice The choice of the name. 5720 * @return value as string 5721 * @internal 5722 * @deprecated This API is ICU internal only. 5723 */ 5724 @Deprecated 5725 ///CLOVER:OFF 5726 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5727 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5728 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5729 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5730 nameChoice); 5731 } 5732 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5733 return String.valueOf(getUnicodeNumericValue(codepoint)); 5734 } 5735 // otherwise must be string property 5736 switch (propertyEnum) { 5737 case UProperty.AGE: return getAge(codepoint).toString(); 5738 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5739 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 5740 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 5741 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5742 case UProperty.NAME: return getName(codepoint); 5743 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 5744 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5745 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5746 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5747 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5748 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5749 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5750 } 5751 throw new IllegalArgumentException("Illegal Property Enum"); 5752 } 5753 ///CLOVER:ON 5754 5755 /** 5756 * {@icu} Returns the minimum value for an integer/binary Unicode property type. 5757 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5758 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5759 * @param type UProperty selector constant, identifies which binary 5760 * property to check. Must be 5761 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5762 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5763 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5764 * for a Unicode property. 0 if the property 5765 * selector 'type' is out of range. 5766 * @see UProperty 5767 * @see #hasBinaryProperty 5768 * @see #getUnicodeVersion 5769 * @see #getIntPropertyMaxValue 5770 * @see #getIntPropertyValue 5771 * @stable ICU 2.4 5772 */ 5773 public static int getIntPropertyMinValue(int type){ 5774 5775 return 0; // undefined; and: all other properties have a minimum value of 0 5776 } 5777 5778 5779 /** 5780 * {@icu} Returns the maximum value for an integer/binary Unicode property. 5781 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5782 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5783 * Examples for min/max values (for Unicode 3.2): 5784 * <ul> 5785 * <li> UProperty.BIDI_CLASS: 0/18 5786 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5787 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5788 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5789 * </ul> 5790 * For undefined UProperty constant values, min/max values will be 0/-1. 5791 * @param type UProperty selector constant, identifies which binary 5792 * property to check. Must be 5793 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5794 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5795 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 5796 * property. <= 0 if the property selector 'type' is out of range. 5797 * @see UProperty 5798 * @see #hasBinaryProperty 5799 * @see #getUnicodeVersion 5800 * @see #getIntPropertyMaxValue 5801 * @see #getIntPropertyValue 5802 * @stable ICU 2.4 5803 */ 5804 public static int getIntPropertyMaxValue(int type) 5805 { 5806 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 5807 } 5808 5809 /** 5810 * Provide the java.lang.Character forDigit API, for convenience. 5811 * @stable ICU 3.0 5812 */ 5813 public static char forDigit(int digit, int radix) { 5814 return java.lang.Character.forDigit(digit, radix); 5815 } 5816 5817 // JDK 1.5 API coverage 5818 5819 /** 5820 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 5821 * 5822 * @stable ICU 3.0 5823 */ 5824 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 5825 5826 /** 5827 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 5828 * 5829 * @stable ICU 3.0 5830 */ 5831 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 5832 5833 /** 5834 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 5835 * 5836 * @stable ICU 3.0 5837 */ 5838 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 5839 5840 /** 5841 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 5842 * 5843 * @stable ICU 3.0 5844 */ 5845 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 5846 5847 /** 5848 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 5849 * 5850 * @stable ICU 3.0 5851 */ 5852 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 5853 5854 /** 5855 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 5856 * 5857 * @stable ICU 3.0 5858 */ 5859 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 5860 5861 /** 5862 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 5863 * 5864 * @stable ICU 3.0 5865 */ 5866 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 5867 5868 /** 5869 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 5870 * 5871 * @stable ICU 3.0 5872 */ 5873 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 5874 5875 /** 5876 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 5877 * 5878 * @stable ICU 3.0 5879 */ 5880 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 5881 5882 /** 5883 * Equivalent to {@link Character#isValidCodePoint}. 5884 * 5885 * @param cp the code point to check 5886 * @return true if cp is a valid code point 5887 * @stable ICU 3.0 5888 */ 5889 public static final boolean isValidCodePoint(int cp) { 5890 return cp >= 0 && cp <= MAX_CODE_POINT; 5891 } 5892 5893 /** 5894 * Same as {@link Character#isSupplementaryCodePoint}. 5895 * 5896 * @param cp the code point to check 5897 * @return true if cp is a supplementary code point 5898 * @stable ICU 3.0 5899 */ 5900 public static final boolean isSupplementaryCodePoint(int cp) { 5901 return Character.isSupplementaryCodePoint(cp); 5902 } 5903 5904 /** 5905 * Same as {@link Character#isHighSurrogate}. 5906 * 5907 * @param ch the char to check 5908 * @return true if ch is a high (lead) surrogate 5909 * @stable ICU 3.0 5910 */ 5911 public static boolean isHighSurrogate(char ch) { 5912 return Character.isHighSurrogate(ch); 5913 } 5914 5915 /** 5916 * Same as {@link Character#isLowSurrogate}. 5917 * 5918 * @param ch the char to check 5919 * @return true if ch is a low (trail) surrogate 5920 * @stable ICU 3.0 5921 */ 5922 public static boolean isLowSurrogate(char ch) { 5923 return Character.isLowSurrogate(ch); 5924 } 5925 5926 /** 5927 * Same as {@link Character#isSurrogatePair}. 5928 * 5929 * @param high the high (lead) char 5930 * @param low the low (trail) char 5931 * @return true if high, low form a surrogate pair 5932 * @stable ICU 3.0 5933 */ 5934 public static final boolean isSurrogatePair(char high, char low) { 5935 return Character.isSurrogatePair(high, low); 5936 } 5937 5938 /** 5939 * Same as {@link Character#charCount}. 5940 * Returns the number of chars needed to represent the code point (1 or 2). 5941 * This does not check the code point for validity. 5942 * 5943 * @param cp the code point to check 5944 * @return the number of chars needed to represent the code point 5945 * @stable ICU 3.0 5946 */ 5947 public static int charCount(int cp) { 5948 return Character.charCount(cp); 5949 } 5950 5951 /** 5952 * Same as {@link Character#toCodePoint}. 5953 * Returns the code point represented by the two surrogate code units. 5954 * This does not check the surrogate pair for validity. 5955 * 5956 * @param high the high (lead) surrogate 5957 * @param low the low (trail) surrogate 5958 * @return the code point formed by the surrogate pair 5959 * @stable ICU 3.0 5960 */ 5961 public static final int toCodePoint(char high, char low) { 5962 return Character.toCodePoint(high, low); 5963 } 5964 5965 /** 5966 * Same as {@link Character#codePointAt(CharSequence, int)}. 5967 * Returns the code point at index. 5968 * This examines only the characters at index and index+1. 5969 * 5970 * @param seq the characters to check 5971 * @param index the index of the first or only char forming the code point 5972 * @return the code point at the index 5973 * @stable ICU 3.0 5974 */ 5975 public static final int codePointAt(CharSequence seq, int index) { 5976 char c1 = seq.charAt(index++); 5977 if (isHighSurrogate(c1)) { 5978 if (index < seq.length()) { 5979 char c2 = seq.charAt(index); 5980 if (isLowSurrogate(c2)) { 5981 return toCodePoint(c1, c2); 5982 } 5983 } 5984 } 5985 return c1; 5986 } 5987 5988 /** 5989 * Same as {@link Character#codePointAt(char[], int)}. 5990 * Returns the code point at index. 5991 * This examines only the characters at index and index+1. 5992 * 5993 * @param text the characters to check 5994 * @param index the index of the first or only char forming the code point 5995 * @return the code point at the index 5996 * @stable ICU 3.0 5997 */ 5998 public static final int codePointAt(char[] text, int index) { 5999 char c1 = text[index++]; 6000 if (isHighSurrogate(c1)) { 6001 if (index < text.length) { 6002 char c2 = text[index]; 6003 if (isLowSurrogate(c2)) { 6004 return toCodePoint(c1, c2); 6005 } 6006 } 6007 } 6008 return c1; 6009 } 6010 6011 /** 6012 * Same as {@link Character#codePointAt(char[], int, int)}. 6013 * Returns the code point at index. 6014 * This examines only the characters at index and index+1. 6015 * 6016 * @param text the characters to check 6017 * @param index the index of the first or only char forming the code point 6018 * @param limit the limit of the valid text 6019 * @return the code point at the index 6020 * @stable ICU 3.0 6021 */ 6022 public static final int codePointAt(char[] text, int index, int limit) { 6023 if (index >= limit || limit > text.length) { 6024 throw new IndexOutOfBoundsException(); 6025 } 6026 char c1 = text[index++]; 6027 if (isHighSurrogate(c1)) { 6028 if (index < limit) { 6029 char c2 = text[index]; 6030 if (isLowSurrogate(c2)) { 6031 return toCodePoint(c1, c2); 6032 } 6033 } 6034 } 6035 return c1; 6036 } 6037 6038 /** 6039 * Same as {@link Character#codePointBefore(CharSequence, int)}. 6040 * Return the code point before index. 6041 * This examines only the characters at index-1 and index-2. 6042 * 6043 * @param seq the characters to check 6044 * @param index the index after the last or only char forming the code point 6045 * @return the code point before the index 6046 * @stable ICU 3.0 6047 */ 6048 public static final int codePointBefore(CharSequence seq, int index) { 6049 char c2 = seq.charAt(--index); 6050 if (isLowSurrogate(c2)) { 6051 if (index > 0) { 6052 char c1 = seq.charAt(--index); 6053 if (isHighSurrogate(c1)) { 6054 return toCodePoint(c1, c2); 6055 } 6056 } 6057 } 6058 return c2; 6059 } 6060 6061 /** 6062 * Same as {@link Character#codePointBefore(char[], int)}. 6063 * Returns the code point before index. 6064 * This examines only the characters at index-1 and index-2. 6065 * 6066 * @param text the characters to check 6067 * @param index the index after the last or only char forming the code point 6068 * @return the code point before the index 6069 * @stable ICU 3.0 6070 */ 6071 public static final int codePointBefore(char[] text, int index) { 6072 char c2 = text[--index]; 6073 if (isLowSurrogate(c2)) { 6074 if (index > 0) { 6075 char c1 = text[--index]; 6076 if (isHighSurrogate(c1)) { 6077 return toCodePoint(c1, c2); 6078 } 6079 } 6080 } 6081 return c2; 6082 } 6083 6084 /** 6085 * Same as {@link Character#codePointBefore(char[], int, int)}. 6086 * Return the code point before index. 6087 * This examines only the characters at index-1 and index-2. 6088 * 6089 * @param text the characters to check 6090 * @param index the index after the last or only char forming the code point 6091 * @param limit the start of the valid text 6092 * @return the code point before the index 6093 * @stable ICU 3.0 6094 */ 6095 public static final int codePointBefore(char[] text, int index, int limit) { 6096 if (index <= limit || limit < 0) { 6097 throw new IndexOutOfBoundsException(); 6098 } 6099 char c2 = text[--index]; 6100 if (isLowSurrogate(c2)) { 6101 if (index > limit) { 6102 char c1 = text[--index]; 6103 if (isHighSurrogate(c1)) { 6104 return toCodePoint(c1, c2); 6105 } 6106 } 6107 } 6108 return c2; 6109 } 6110 6111 /** 6112 * Same as {@link Character#toChars(int, char[], int)}. 6113 * Writes the chars representing the 6114 * code point into the destination at the given index. 6115 * 6116 * @param cp the code point to convert 6117 * @param dst the destination array into which to put the char(s) representing the code point 6118 * @param dstIndex the index at which to put the first (or only) char 6119 * @return the count of the number of chars written (1 or 2) 6120 * @throws IllegalArgumentException if cp is not a valid code point 6121 * @stable ICU 3.0 6122 */ 6123 public static final int toChars(int cp, char[] dst, int dstIndex) { 6124 return Character.toChars(cp, dst, dstIndex); 6125 } 6126 6127 /** 6128 * Same as {@link Character#toChars(int)}. 6129 * Returns a char array representing the code point. 6130 * 6131 * @param cp the code point to convert 6132 * @return an array containing the char(s) representing the code point 6133 * @throws IllegalArgumentException if cp is not a valid code point 6134 * @stable ICU 3.0 6135 */ 6136 public static final char[] toChars(int cp) { 6137 return Character.toChars(cp); 6138 } 6139 6140 /** 6141 * Equivalent to the {@link Character#getDirectionality(char)} method, for 6142 * convenience. Returns a byte representing the directionality of the 6143 * character. 6144 * 6145 * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns 6146 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 6147 * 6148 * {@icunote} The return value must be tested using the constants defined in {@link 6149 * UCharacterDirection} and its interface {@link 6150 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 6151 * defined by <code>java.lang.Character</code>. 6152 * @param cp the code point to check 6153 * @return the directionality of the code point 6154 * @see #getDirection 6155 * @stable ICU 3.0 6156 */ 6157 public static byte getDirectionality(int cp) 6158 { 6159 return (byte)getDirection(cp); 6160 } 6161 6162 /** 6163 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 6164 * method, for convenience. Counts the number of code points in the range 6165 * of text. 6166 * @param text the characters to check 6167 * @param start the start of the range 6168 * @param limit the limit of the range 6169 * @return the number of code points in the range 6170 * @stable ICU 3.0 6171 */ 6172 public static int codePointCount(CharSequence text, int start, int limit) { 6173 if (start < 0 || limit < start || limit > text.length()) { 6174 throw new IndexOutOfBoundsException("start (" + start + 6175 ") or limit (" + limit + 6176 ") invalid or out of range 0, " + text.length()); 6177 } 6178 6179 int len = limit - start; 6180 while (limit > start) { 6181 char ch = text.charAt(--limit); 6182 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6183 ch = text.charAt(--limit); 6184 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6185 --len; 6186 break; 6187 } 6188 } 6189 } 6190 return len; 6191 } 6192 6193 /** 6194 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 6195 * convenience. Counts the number of code points in the range of text. 6196 * @param text the characters to check 6197 * @param start the start of the range 6198 * @param limit the limit of the range 6199 * @return the number of code points in the range 6200 * @stable ICU 3.0 6201 */ 6202 public static int codePointCount(char[] text, int start, int limit) { 6203 if (start < 0 || limit < start || limit > text.length) { 6204 throw new IndexOutOfBoundsException("start (" + start + 6205 ") or limit (" + limit + 6206 ") invalid or out of range 0, " + text.length); 6207 } 6208 6209 int len = limit - start; 6210 while (limit > start) { 6211 char ch = text[--limit]; 6212 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6213 ch = text[--limit]; 6214 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6215 --len; 6216 break; 6217 } 6218 } 6219 } 6220 return len; 6221 } 6222 6223 /** 6224 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 6225 * method, for convenience. Adjusts the char index by a code point offset. 6226 * @param text the characters to check 6227 * @param index the index to adjust 6228 * @param codePointOffset the number of code points by which to offset the index 6229 * @return the adjusted index 6230 * @stable ICU 3.0 6231 */ 6232 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 6233 if (index < 0 || index > text.length()) { 6234 throw new IndexOutOfBoundsException("index ( " + index + 6235 ") out of range 0, " + text.length()); 6236 } 6237 6238 if (codePointOffset < 0) { 6239 while (++codePointOffset <= 0) { 6240 char ch = text.charAt(--index); 6241 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 6242 ch = text.charAt(--index); 6243 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6244 if (++codePointOffset > 0) { 6245 return index+1; 6246 } 6247 } 6248 } 6249 } 6250 } else { 6251 int limit = text.length(); 6252 while (--codePointOffset >= 0) { 6253 char ch = text.charAt(index++); 6254 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6255 ch = text.charAt(index++); 6256 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6257 if (--codePointOffset < 0) { 6258 return index-1; 6259 } 6260 } 6261 } 6262 } 6263 } 6264 6265 return index; 6266 } 6267 6268 /** 6269 * Equivalent to the 6270 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 6271 * method, for convenience. Adjusts the char index by a code point offset. 6272 * @param text the characters to check 6273 * @param start the start of the range to check 6274 * @param count the length of the range to check 6275 * @param index the index to adjust 6276 * @param codePointOffset the number of code points by which to offset the index 6277 * @return the adjusted index 6278 * @stable ICU 3.0 6279 */ 6280 public static int offsetByCodePoints(char[] text, int start, int count, int index, 6281 int codePointOffset) { 6282 int limit = start + count; 6283 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 6284 throw new IndexOutOfBoundsException("index ( " + index + 6285 ") out of range " + start + 6286 ", " + limit + 6287 " in array 0, " + text.length); 6288 } 6289 6290 if (codePointOffset < 0) { 6291 while (++codePointOffset <= 0) { 6292 char ch = text[--index]; 6293 if (index < start) { 6294 throw new IndexOutOfBoundsException("index ( " + index + 6295 ") < start (" + start + 6296 ")"); 6297 } 6298 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 6299 ch = text[--index]; 6300 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6301 if (++codePointOffset > 0) { 6302 return index+1; 6303 } 6304 } 6305 } 6306 } 6307 } else { 6308 while (--codePointOffset >= 0) { 6309 char ch = text[index++]; 6310 if (index > limit) { 6311 throw new IndexOutOfBoundsException("index ( " + index + 6312 ") > limit (" + limit + 6313 ")"); 6314 } 6315 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6316 ch = text[index++]; 6317 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6318 if (--codePointOffset < 0) { 6319 return index-1; 6320 } 6321 } 6322 } 6323 } 6324 } 6325 6326 return index; 6327 } 6328 6329 // private variables ------------------------------------------------- 6330 6331 /** 6332 * To get the last character out from a data type 6333 */ 6334 private static final int LAST_CHAR_MASK_ = 0xFFFF; 6335 6336 // /** 6337 // * To get the last byte out from a data type 6338 // */ 6339 // private static final int LAST_BYTE_MASK_ = 0xFF; 6340 // 6341 // /** 6342 // * Shift 16 bits 6343 // */ 6344 // private static final int SHIFT_16_ = 16; 6345 // 6346 // /** 6347 // * Shift 24 bits 6348 // */ 6349 // private static final int SHIFT_24_ = 24; 6350 // 6351 // /** 6352 // * Decimal radix 6353 // */ 6354 // private static final int DECIMAL_RADIX_ = 10; 6355 6356 /** 6357 * No break space code point 6358 */ 6359 private static final int NO_BREAK_SPACE_ = 0xA0; 6360 6361 /** 6362 * Figure space code point 6363 */ 6364 private static final int FIGURE_SPACE_ = 0x2007; 6365 6366 /** 6367 * Narrow no break space code point 6368 */ 6369 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 6370 6371 /** 6372 * Ideographic number zero code point 6373 */ 6374 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6375 6376 /** 6377 * CJK Ideograph, First code point 6378 */ 6379 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6380 6381 /** 6382 * CJK Ideograph, Second code point 6383 */ 6384 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6385 6386 /** 6387 * CJK Ideograph, Third code point 6388 */ 6389 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6390 6391 /** 6392 * CJK Ideograph, Fourth code point 6393 */ 6394 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 6395 6396 /** 6397 * CJK Ideograph, FIFTH code point 6398 */ 6399 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6400 6401 /** 6402 * CJK Ideograph, Sixth code point 6403 */ 6404 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6405 6406 /** 6407 * CJK Ideograph, Seventh code point 6408 */ 6409 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6410 6411 /** 6412 * CJK Ideograph, Eighth code point 6413 */ 6414 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6415 6416 /** 6417 * CJK Ideograph, Nineth code point 6418 */ 6419 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6420 6421 /** 6422 * Application Program command code point 6423 */ 6424 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6425 6426 /** 6427 * Unit separator code point 6428 */ 6429 private static final int UNIT_SEPARATOR_ = 0x001F; 6430 6431 /** 6432 * Delete code point 6433 */ 6434 private static final int DELETE_ = 0x007F; 6435 6436 /** 6437 * Han digit characters 6438 */ 6439 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6440 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6441 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6442 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6443 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6444 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6445 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6446 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6447 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6448 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6449 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6450 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6451 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6452 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6453 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6454 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6455 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6456 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6457 6458 // private constructor ----------------------------------------------- 6459 ///CLOVER:OFF 6460 /** 6461 * Private constructor to prevent instantiation 6462 */ 6463 private UCharacter() 6464 { 6465 } 6466 ///CLOVER:ON 6467 } 6468