Home | History | Annotate | Download | only in lang
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /**
      4  *******************************************************************************
      5  * Copyright (C) 1996-2016, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  *******************************************************************************
      8  */
      9 
     10 package com.ibm.icu.lang;
     11 
     12 import java.lang.ref.SoftReference;
     13 import java.util.HashMap;
     14 import java.util.Iterator;
     15 import java.util.Locale;
     16 import java.util.Map;
     17 
     18 import com.ibm.icu.impl.CaseMapImpl;
     19 import com.ibm.icu.impl.IllegalIcuArgumentException;
     20 import com.ibm.icu.impl.Trie2;
     21 import com.ibm.icu.impl.UBiDiProps;
     22 import com.ibm.icu.impl.UCaseProps;
     23 import com.ibm.icu.impl.UCharacterName;
     24 import com.ibm.icu.impl.UCharacterNameChoice;
     25 import com.ibm.icu.impl.UCharacterProperty;
     26 import com.ibm.icu.impl.UCharacterUtility;
     27 import com.ibm.icu.impl.UPropertyAliases;
     28 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
     29 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
     30 import com.ibm.icu.text.BreakIterator;
     31 import com.ibm.icu.text.Edits;
     32 import com.ibm.icu.text.Normalizer2;
     33 import com.ibm.icu.util.RangeValueIterator;
     34 import com.ibm.icu.util.ULocale;
     35 import com.ibm.icu.util.ValueIterator;
     36 import com.ibm.icu.util.VersionInfo;
     37 
     38 /**
     39  * {@icuenhanced java.lang.Character}.{@icu _usage_}
     40  *
     41  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
     42  * These extensions provide support for more Unicode properties.
     43  * Each ICU release supports the latest version of Unicode available at that time.
     44  *
     45  * <p>For some time before Java 5 added support for supplementary Unicode code points,
     46  * The ICU UCharacter class and many other ICU classes already supported them.
     47  * Some UCharacter methods and constants were widened slightly differently than
     48  * how the Character class methods and constants were widened later.
     49  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
     50  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
     51  *
     52  * <p>Code points are represented in these API using ints. While it would be
     53  * more convenient in Java to have a separate primitive datatype for them,
     54  * ints suffice in the meantime.
     55  *
     56  * <p>To use this class please add the jar file name icu4j.jar to the
     57  * class path, since it contains data files which supply the information used
     58  * by this file.<br>
     59  * E.g. In Windows <br>
     60  * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
     61  * Otherwise, another method would be to copy the files uprops.dat and
     62  * unames.icu from the icu4j source subdirectory
     63  * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
     64  * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
     65  *
     66  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
     67  * properties, the main differences between UCharacter and Character are:
     68  * <ul>
     69  * <li> UCharacter is not designed to be a char wrapper and does not have
     70  *      APIs to which involves management of that single char.<br>
     71  *      These include:
     72  *      <ul>
     73  *        <li> char charValue(),
     74  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
     75  *      </ul>
     76  * <li> UCharacter does not include Character APIs that are deprecated, nor
     77  *      does it include the Java-specific character information, such as
     78  *      boolean isJavaIdentifierPart(char ch).
     79  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
     80  *      values '10' - '35'. UCharacter also does this in digit and
     81  *      getNumericValue, to adhere to the java semantics of these
     82  *      methods.  New methods unicodeDigit, and
     83  *      getUnicodeNumericValue do not treat the above code points
     84  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
     85  * </ul>
     86  * <p>
     87  * Further detail on differences can be determined using the program
     88  *        <a href=
     89  * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
     90  *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
     91  * <p>
     92  * In addition to Java compatibility functions, which calculate derived properties,
     93  * this API provides low-level access to the Unicode Character Database.
     94  * <p>
     95  * Unicode assigns each code point (not just assigned character) values for
     96  * many properties.
     97  * Most of them are simple boolean flags, or constants from a small enumerated list.
     98  * For some properties, values are strings or other relatively more complex types.
     99  * <p>
    100  * For more information see
    101  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
    102  * (http://www.unicode.org/ucd/)
    103  * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
    104  * User Guide chapter on Properties</a>
    105  * (http://www.icu-project.org/userguide/properties.html).
    106  * <p>
    107  * There are also functions that provide easy migration from C/POSIX functions
    108  * like isblank(). Their use is generally discouraged because the C/POSIX
    109  * standards do not define their semantics beyond the ASCII range, which means
    110  * that different implementations exhibit very different behavior.
    111  * Instead, Unicode properties should be used directly.
    112  * <p>
    113  * There are also only a few, broad C/POSIX character classes, and they tend
    114  * to be used for conflicting purposes. For example, the "isalpha()" class
    115  * is sometimes used to determine word boundaries, while a more sophisticated
    116  * approach would at least distinguish initial letters from continuation
    117  * characters (the latter including combining marks).
    118  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
    119  * Another example: There is no "istitle()" class for titlecase characters.
    120  * <p>
    121  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
    122  * ICU implements them according to the Standard Recommendations in
    123  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
    124  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
    125  * <p>
    126  * API access for C/POSIX character classes is as follows:
    127  * <pre>{@code
    128  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
    129  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
    130  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
    131  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
    132  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
    133  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
    134  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
    135  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
    136  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
    137  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
    138  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
    139  * - cntrl:     getType(c)==CONTROL
    140  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
    141  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
    142  * <p>
    143  * The C/POSIX character classes are also available in UnicodeSet patterns,
    144  * using patterns like [:graph:] or \p{graph}.
    145  *
    146  * <p>{@icunote} There are several ICU (and Java) whitespace functions.
    147  * Comparison:<ul>
    148  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
    149  *       most of general categories "Z" (separators) + most whitespace ISO controls
    150  *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
    151  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
    152  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
    153  *
    154  * <p>
    155  * This class is not subclassable.
    156  *
    157  * @author Syn Wee Quek
    158  * @stable ICU 2.1
    159  * @see com.ibm.icu.lang.UCharacterEnums
    160  */
    161 
    162 public final class UCharacter implements ECharacterCategory, ECharacterDirection
    163 {
    164     // public inner classes ----------------------------------------------
    165 
    166     /**
    167      * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_}
    168      *
    169      * A family of character subsets representing the character blocks in the
    170      * Unicode specification, generated from Unicode Data file Blocks.txt.
    171      * Character blocks generally define characters used for a specific script
    172      * or purpose. A character is contained by at most one Unicode block.
    173      *
    174      * {@icunote} All fields named XXX_ID are specific to ICU.
    175      *
    176      * @stable ICU 2.4
    177      */
    178     public static final class UnicodeBlock extends Character.Subset
    179     {
    180         // block id corresponding to icu4c -----------------------------------
    181 
    182         /**
    183          * @stable ICU 2.4
    184          */
    185         public static final int INVALID_CODE_ID = -1;
    186         /**
    187          * @stable ICU 2.4
    188          */
    189         public static final int BASIC_LATIN_ID = 1;
    190         /**
    191          * @stable ICU 2.4
    192          */
    193         public static final int LATIN_1_SUPPLEMENT_ID = 2;
    194         /**
    195          * @stable ICU 2.4
    196          */
    197         public static final int LATIN_EXTENDED_A_ID = 3;
    198         /**
    199          * @stable ICU 2.4
    200          */
    201         public static final int LATIN_EXTENDED_B_ID = 4;
    202         /**
    203          * @stable ICU 2.4
    204          */
    205         public static final int IPA_EXTENSIONS_ID = 5;
    206         /**
    207          * @stable ICU 2.4
    208          */
    209         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
    210         /**
    211          * @stable ICU 2.4
    212          */
    213         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
    214         /**
    215          * Unicode 3.2 renames this block to "Greek and Coptic".
    216          * @stable ICU 2.4
    217          */
    218         public static final int GREEK_ID = 8;
    219         /**
    220          * @stable ICU 2.4
    221          */
    222         public static final int CYRILLIC_ID = 9;
    223         /**
    224          * @stable ICU 2.4
    225          */
    226         public static final int ARMENIAN_ID = 10;
    227         /**
    228          * @stable ICU 2.4
    229          */
    230         public static final int HEBREW_ID = 11;
    231         /**
    232          * @stable ICU 2.4
    233          */
    234         public static final int ARABIC_ID = 12;
    235         /**
    236          * @stable ICU 2.4
    237          */
    238         public static final int SYRIAC_ID = 13;
    239         /**
    240          * @stable ICU 2.4
    241          */
    242         public static final int THAANA_ID = 14;
    243         /**
    244          * @stable ICU 2.4
    245          */
    246         public static final int DEVANAGARI_ID = 15;
    247         /**
    248          * @stable ICU 2.4
    249          */
    250         public static final int BENGALI_ID = 16;
    251         /**
    252          * @stable ICU 2.4
    253          */
    254         public static final int GURMUKHI_ID = 17;
    255         /**
    256          * @stable ICU 2.4
    257          */
    258         public static final int GUJARATI_ID = 18;
    259         /**
    260          * @stable ICU 2.4
    261          */
    262         public static final int ORIYA_ID = 19;
    263         /**
    264          * @stable ICU 2.4
    265          */
    266         public static final int TAMIL_ID = 20;
    267         /**
    268          * @stable ICU 2.4
    269          */
    270         public static final int TELUGU_ID = 21;
    271         /**
    272          * @stable ICU 2.4
    273          */
    274         public static final int KANNADA_ID = 22;
    275         /**
    276          * @stable ICU 2.4
    277          */
    278         public static final int MALAYALAM_ID = 23;
    279         /**
    280          * @stable ICU 2.4
    281          */
    282         public static final int SINHALA_ID = 24;
    283         /**
    284          * @stable ICU 2.4
    285          */
    286         public static final int THAI_ID = 25;
    287         /**
    288          * @stable ICU 2.4
    289          */
    290         public static final int LAO_ID = 26;
    291         /**
    292          * @stable ICU 2.4
    293          */
    294         public static final int TIBETAN_ID = 27;
    295         /**
    296          * @stable ICU 2.4
    297          */
    298         public static final int MYANMAR_ID = 28;
    299         /**
    300          * @stable ICU 2.4
    301          */
    302         public static final int GEORGIAN_ID = 29;
    303         /**
    304          * @stable ICU 2.4
    305          */
    306         public static final int HANGUL_JAMO_ID = 30;
    307         /**
    308          * @stable ICU 2.4
    309          */
    310         public static final int ETHIOPIC_ID = 31;
    311         /**
    312          * @stable ICU 2.4
    313          */
    314         public static final int CHEROKEE_ID = 32;
    315         /**
    316          * @stable ICU 2.4
    317          */
    318         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
    319         /**
    320          * @stable ICU 2.4
    321          */
    322         public static final int OGHAM_ID = 34;
    323         /**
    324          * @stable ICU 2.4
    325          */
    326         public static final int RUNIC_ID = 35;
    327         /**
    328          * @stable ICU 2.4
    329          */
    330         public static final int KHMER_ID = 36;
    331         /**
    332          * @stable ICU 2.4
    333          */
    334         public static final int MONGOLIAN_ID = 37;
    335         /**
    336          * @stable ICU 2.4
    337          */
    338         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
    339         /**
    340          * @stable ICU 2.4
    341          */
    342         public static final int GREEK_EXTENDED_ID = 39;
    343         /**
    344          * @stable ICU 2.4
    345          */
    346         public static final int GENERAL_PUNCTUATION_ID = 40;
    347         /**
    348          * @stable ICU 2.4
    349          */
    350         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
    351         /**
    352          * @stable ICU 2.4
    353          */
    354         public static final int CURRENCY_SYMBOLS_ID = 42;
    355         /**
    356          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
    357          * Symbols".
    358          * @stable ICU 2.4
    359          */
    360         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
    361         /**
    362          * @stable ICU 2.4
    363          */
    364         public static final int LETTERLIKE_SYMBOLS_ID = 44;
    365         /**
    366          * @stable ICU 2.4
    367          */
    368         public static final int NUMBER_FORMS_ID = 45;
    369         /**
    370          * @stable ICU 2.4
    371          */
    372         public static final int ARROWS_ID = 46;
    373         /**
    374          * @stable ICU 2.4
    375          */
    376         public static final int MATHEMATICAL_OPERATORS_ID = 47;
    377         /**
    378          * @stable ICU 2.4
    379          */
    380         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
    381         /**
    382          * @stable ICU 2.4
    383          */
    384         public static final int CONTROL_PICTURES_ID = 49;
    385         /**
    386          * @stable ICU 2.4
    387          */
    388         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
    389         /**
    390          * @stable ICU 2.4
    391          */
    392         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
    393         /**
    394          * @stable ICU 2.4
    395          */
    396         public static final int BOX_DRAWING_ID = 52;
    397         /**
    398          * @stable ICU 2.4
    399          */
    400         public static final int BLOCK_ELEMENTS_ID = 53;
    401         /**
    402          * @stable ICU 2.4
    403          */
    404         public static final int GEOMETRIC_SHAPES_ID = 54;
    405         /**
    406          * @stable ICU 2.4
    407          */
    408         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
    409         /**
    410          * @stable ICU 2.4
    411          */
    412         public static final int DINGBATS_ID = 56;
    413         /**
    414          * @stable ICU 2.4
    415          */
    416         public static final int BRAILLE_PATTERNS_ID = 57;
    417         /**
    418          * @stable ICU 2.4
    419          */
    420         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
    421         /**
    422          * @stable ICU 2.4
    423          */
    424         public static final int KANGXI_RADICALS_ID = 59;
    425         /**
    426          * @stable ICU 2.4
    427          */
    428         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
    429         /**
    430          * @stable ICU 2.4
    431          */
    432         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
    433         /**
    434          * @stable ICU 2.4
    435          */
    436         public static final int HIRAGANA_ID = 62;
    437         /**
    438          * @stable ICU 2.4
    439          */
    440         public static final int KATAKANA_ID = 63;
    441         /**
    442          * @stable ICU 2.4
    443          */
    444         public static final int BOPOMOFO_ID = 64;
    445         /**
    446          * @stable ICU 2.4
    447          */
    448         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
    449         /**
    450          * @stable ICU 2.4
    451          */
    452         public static final int KANBUN_ID = 66;
    453         /**
    454          * @stable ICU 2.4
    455          */
    456         public static final int BOPOMOFO_EXTENDED_ID = 67;
    457         /**
    458          * @stable ICU 2.4
    459          */
    460         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
    461         /**
    462          * @stable ICU 2.4
    463          */
    464         public static final int CJK_COMPATIBILITY_ID = 69;
    465         /**
    466          * @stable ICU 2.4
    467          */
    468         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
    469         /**
    470          * @stable ICU 2.4
    471          */
    472         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
    473         /**
    474          * @stable ICU 2.4
    475          */
    476         public static final int YI_SYLLABLES_ID = 72;
    477         /**
    478          * @stable ICU 2.4
    479          */
    480         public static final int YI_RADICALS_ID = 73;
    481         /**
    482          * @stable ICU 2.4
    483          */
    484         public static final int HANGUL_SYLLABLES_ID = 74;
    485         /**
    486          * @stable ICU 2.4
    487          */
    488         public static final int HIGH_SURROGATES_ID = 75;
    489         /**
    490          * @stable ICU 2.4
    491          */
    492         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
    493         /**
    494          * @stable ICU 2.4
    495          */
    496         public static final int LOW_SURROGATES_ID = 77;
    497         /**
    498          * Same as public static final int PRIVATE_USE.
    499          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
    500          * and multiple code point ranges had this block.
    501          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
    502          * and adds separate blocks for the supplementary PUAs.
    503          * @stable ICU 2.4
    504          */
    505         public static final int PRIVATE_USE_AREA_ID = 78;
    506         /**
    507          * Same as public static final int PRIVATE_USE_AREA.
    508          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
    509          * and multiple code point ranges had this block.
    510          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
    511          * and adds separate blocks for the supplementary PUAs.
    512          * @stable ICU 2.4
    513          */
    514         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
    515         /**
    516          * @stable ICU 2.4
    517          */
    518         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
    519         /**
    520          * @stable ICU 2.4
    521          */
    522         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
    523         /**
    524          * @stable ICU 2.4
    525          */
    526         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
    527         /**
    528          * @stable ICU 2.4
    529          */
    530         public static final int COMBINING_HALF_MARKS_ID = 82;
    531         /**
    532          * @stable ICU 2.4
    533          */
    534         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
    535         /**
    536          * @stable ICU 2.4
    537          */
    538         public static final int SMALL_FORM_VARIANTS_ID = 84;
    539         /**
    540          * @stable ICU 2.4
    541          */
    542         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
    543         /**
    544          * @stable ICU 2.4
    545          */
    546         public static final int SPECIALS_ID = 86;
    547         /**
    548          * @stable ICU 2.4
    549          */
    550         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
    551         /**
    552          * @stable ICU 2.4
    553          */
    554         public static final int OLD_ITALIC_ID = 88;
    555         /**
    556          * @stable ICU 2.4
    557          */
    558         public static final int GOTHIC_ID = 89;
    559         /**
    560          * @stable ICU 2.4
    561          */
    562         public static final int DESERET_ID = 90;
    563         /**
    564          * @stable ICU 2.4
    565          */
    566         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
    567         /**
    568          * @stable ICU 2.4
    569          */
    570         public static final int MUSICAL_SYMBOLS_ID = 92;
    571         /**
    572          * @stable ICU 2.4
    573          */
    574         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
    575         /**
    576          * @stable ICU 2.4
    577          */
    578         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
    579         /**
    580          * @stable ICU 2.4
    581          */
    582         public static final int
    583         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
    584         /**
    585          * @stable ICU 2.4
    586          */
    587         public static final int TAGS_ID = 96;
    588 
    589         // New blocks in Unicode 3.2
    590 
    591         /**
    592          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
    593          * @stable ICU 2.4
    594          */
    595         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
    596         /**
    597          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
    598          * @stable ICU 3.0
    599          */
    600 
    601         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
    602         /**
    603          * @stable ICU 2.4
    604          */
    605         public static final int TAGALOG_ID = 98;
    606         /**
    607          * @stable ICU 2.4
    608          */
    609         public static final int HANUNOO_ID = 99;
    610         /**
    611          * @stable ICU 2.4
    612          */
    613         public static final int BUHID_ID = 100;
    614         /**
    615          * @stable ICU 2.4
    616          */
    617         public static final int TAGBANWA_ID = 101;
    618         /**
    619          * @stable ICU 2.4
    620          */
    621         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
    622         /**
    623          * @stable ICU 2.4
    624          */
    625         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
    626         /**
    627          * @stable ICU 2.4
    628          */
    629         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
    630         /**
    631          * @stable ICU 2.4
    632          */
    633         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
    634         /**
    635          * @stable ICU 2.4
    636          */
    637         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
    638         /**
    639          * @stable ICU 2.4
    640          */
    641         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
    642         /**
    643          * @stable ICU 2.4
    644          */
    645         public static final int VARIATION_SELECTORS_ID = 108;
    646         /**
    647          * @stable ICU 2.4
    648          */
    649         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
    650         /**
    651          * @stable ICU 2.4
    652          */
    653         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
    654 
    655         /**
    656          * @stable ICU 2.6
    657          */
    658         public static final int LIMBU_ID = 111; /*[1900]*/
    659         /**
    660          * @stable ICU 2.6
    661          */
    662         public static final int TAI_LE_ID = 112; /*[1950]*/
    663         /**
    664          * @stable ICU 2.6
    665          */
    666         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
    667         /**
    668          * @stable ICU 2.6
    669          */
    670         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
    671         /**
    672          * @stable ICU 2.6
    673          */
    674         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
    675         /**
    676          * @stable ICU 2.6
    677          */
    678         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
    679         /**
    680          * @stable ICU 2.6
    681          */
    682         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
    683         /**
    684          * @stable ICU 2.6
    685          */
    686         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
    687         /**
    688          * @stable ICU 2.6
    689          */
    690         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
    691         /**
    692          * @stable ICU 2.6
    693          */
    694         public static final int UGARITIC_ID = 120; /*[10380]*/
    695         /**
    696          * @stable ICU 2.6
    697          */
    698         public static final int SHAVIAN_ID = 121; /*[10450]*/
    699         /**
    700          * @stable ICU 2.6
    701          */
    702         public static final int OSMANYA_ID = 122; /*[10480]*/
    703         /**
    704          * @stable ICU 2.6
    705          */
    706         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
    707         /**
    708          * @stable ICU 2.6
    709          */
    710         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
    711         /**
    712          * @stable ICU 2.6
    713          */
    714         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
    715 
    716         /* New blocks in Unicode 4.1 */
    717 
    718         /**
    719          * @stable ICU 3.4
    720          */
    721         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
    722 
    723         /**
    724          * @stable ICU 3.4
    725          */
    726         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
    727 
    728         /**
    729          * @stable ICU 3.4
    730          */
    731         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
    732 
    733         /**
    734          * @stable ICU 3.4
    735          */
    736         public static final int BUGINESE_ID = 129; /*[1A00]*/
    737 
    738         /**
    739          * @stable ICU 3.4
    740          */
    741         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
    742 
    743         /**
    744          * @stable ICU 3.4
    745          */
    746         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
    747 
    748         /**
    749          * @stable ICU 3.4
    750          */
    751         public static final int COPTIC_ID = 132; /*[2C80]*/
    752 
    753         /**
    754          * @stable ICU 3.4
    755          */
    756         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
    757 
    758         /**
    759          * @stable ICU 3.4
    760          */
    761         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
    762 
    763         /**
    764          * @stable ICU 3.4
    765          */
    766         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
    767 
    768         /**
    769          * @stable ICU 3.4
    770          */
    771         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
    772 
    773         /**
    774          * @stable ICU 3.4
    775          */
    776         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
    777 
    778         /**
    779          * @stable ICU 3.4
    780          */
    781         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
    782 
    783         /**
    784          * @stable ICU 3.4
    785          */
    786         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
    787 
    788         /**
    789          * @stable ICU 3.4
    790          */
    791         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
    792 
    793         /**
    794          * @stable ICU 3.4
    795          */
    796         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
    797 
    798         /**
    799          * @stable ICU 3.4
    800          */
    801         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
    802 
    803         /**
    804          * @stable ICU 3.4
    805          */
    806         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
    807 
    808         /**
    809          * @stable ICU 3.4
    810          */
    811         public static final int TIFINAGH_ID = 144; /*[2D30]*/
    812 
    813         /**
    814          * @stable ICU 3.4
    815          */
    816         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
    817 
    818         /* New blocks in Unicode 5.0 */
    819 
    820         /**
    821          * @stable ICU 3.6
    822          */
    823         public static final int NKO_ID = 146; /*[07C0]*/
    824         /**
    825          * @stable ICU 3.6
    826          */
    827         public static final int BALINESE_ID = 147; /*[1B00]*/
    828         /**
    829          * @stable ICU 3.6
    830          */
    831         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
    832         /**
    833          * @stable ICU 3.6
    834          */
    835         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
    836         /**
    837          * @stable ICU 3.6
    838          */
    839         public static final int PHAGS_PA_ID = 150; /*[A840]*/
    840         /**
    841          * @stable ICU 3.6
    842          */
    843         public static final int PHOENICIAN_ID = 151; /*[10900]*/
    844         /**
    845          * @stable ICU 3.6
    846          */
    847         public static final int CUNEIFORM_ID = 152; /*[12000]*/
    848         /**
    849          * @stable ICU 3.6
    850          */
    851         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
    852         /**
    853          * @stable ICU 3.6
    854          */
    855         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
    856 
    857         /**
    858          * @stable ICU 4.0
    859          */
    860         public static final int SUNDANESE_ID = 155; /* [1B80] */
    861 
    862         /**
    863          * @stable ICU 4.0
    864          */
    865         public static final int LEPCHA_ID = 156; /* [1C00] */
    866 
    867         /**
    868          * @stable ICU 4.0
    869          */
    870         public static final int OL_CHIKI_ID = 157; /* [1C50] */
    871 
    872         /**
    873          * @stable ICU 4.0
    874          */
    875         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
    876 
    877         /**
    878          * @stable ICU 4.0
    879          */
    880         public static final int VAI_ID = 159; /* [A500] */
    881 
    882         /**
    883          * @stable ICU 4.0
    884          */
    885         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
    886 
    887         /**
    888          * @stable ICU 4.0
    889          */
    890         public static final int SAURASHTRA_ID = 161; /* [A880] */
    891 
    892         /**
    893          * @stable ICU 4.0
    894          */
    895         public static final int KAYAH_LI_ID = 162; /* [A900] */
    896 
    897         /**
    898          * @stable ICU 4.0
    899          */
    900         public static final int REJANG_ID = 163; /* [A930] */
    901 
    902         /**
    903          * @stable ICU 4.0
    904          */
    905         public static final int CHAM_ID = 164; /* [AA00] */
    906 
    907         /**
    908          * @stable ICU 4.0
    909          */
    910         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
    911 
    912         /**
    913          * @stable ICU 4.0
    914          */
    915         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
    916 
    917         /**
    918          * @stable ICU 4.0
    919          */
    920         public static final int LYCIAN_ID = 167; /* [10280] */
    921 
    922         /**
    923          * @stable ICU 4.0
    924          */
    925         public static final int CARIAN_ID = 168; /* [102A0] */
    926 
    927         /**
    928          * @stable ICU 4.0
    929          */
    930         public static final int LYDIAN_ID = 169; /* [10920] */
    931 
    932         /**
    933          * @stable ICU 4.0
    934          */
    935         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
    936 
    937         /**
    938          * @stable ICU 4.0
    939          */
    940         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
    941 
    942         /* New blocks in Unicode 5.2 */
    943 
    944         /** @stable ICU 4.4 */
    945         public static final int SAMARITAN_ID = 172; /*[0800]*/
    946         /** @stable ICU 4.4 */
    947         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
    948         /** @stable ICU 4.4 */
    949         public static final int TAI_THAM_ID = 174; /*[1A20]*/
    950         /** @stable ICU 4.4 */
    951         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
    952         /** @stable ICU 4.4 */
    953         public static final int LISU_ID = 176; /*[A4D0]*/
    954         /** @stable ICU 4.4 */
    955         public static final int BAMUM_ID = 177; /*[A6A0]*/
    956         /** @stable ICU 4.4 */
    957         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
    958         /** @stable ICU 4.4 */
    959         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
    960         /** @stable ICU 4.4 */
    961         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
    962         /** @stable ICU 4.4 */
    963         public static final int JAVANESE_ID = 181; /*[A980]*/
    964         /** @stable ICU 4.4 */
    965         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
    966         /** @stable ICU 4.4 */
    967         public static final int TAI_VIET_ID = 183; /*[AA80]*/
    968         /** @stable ICU 4.4 */
    969         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
    970         /** @stable ICU 4.4 */
    971         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
    972         /** @stable ICU 4.4 */
    973         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
    974         /** @stable ICU 4.4 */
    975         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
    976         /** @stable ICU 4.4 */
    977         public static final int AVESTAN_ID = 188; /*[10B00]*/
    978         /** @stable ICU 4.4 */
    979         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
    980         /** @stable ICU 4.4 */
    981         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
    982         /** @stable ICU 4.4 */
    983         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
    984         /** @stable ICU 4.4 */
    985         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
    986         /** @stable ICU 4.4 */
    987         public static final int KAITHI_ID = 193; /*[11080]*/
    988         /** @stable ICU 4.4 */
    989         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
    990         /** @stable ICU 4.4 */
    991         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
    992         /** @stable ICU 4.4 */
    993         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
    994         /** @stable ICU 4.4 */
    995         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
    996 
    997         /* New blocks in Unicode 6.0 */
    998 
    999         /** @stable ICU 4.6 */
   1000         public static final int MANDAIC_ID = 198; /*[0840]*/
   1001         /** @stable ICU 4.6 */
   1002         public static final int BATAK_ID = 199; /*[1BC0]*/
   1003         /** @stable ICU 4.6 */
   1004         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
   1005         /** @stable ICU 4.6 */
   1006         public static final int BRAHMI_ID = 201; /*[11000]*/
   1007         /** @stable ICU 4.6 */
   1008         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
   1009         /** @stable ICU 4.6 */
   1010         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
   1011         /** @stable ICU 4.6 */
   1012         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
   1013         /** @stable ICU 4.6 */
   1014         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
   1015         /** @stable ICU 4.6 */
   1016         public static final int EMOTICONS_ID = 206; /*[1F600]*/
   1017         /** @stable ICU 4.6 */
   1018         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
   1019         /** @stable ICU 4.6 */
   1020         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
   1021         /** @stable ICU 4.6 */
   1022         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
   1023 
   1024         /* New blocks in Unicode 6.1 */
   1025 
   1026         /** @stable ICU 49 */
   1027         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
   1028         /** @stable ICU 49 */
   1029         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
   1030         /** @stable ICU 49 */
   1031         public static final int CHAKMA_ID = 212; /*[11100]*/
   1032         /** @stable ICU 49 */
   1033         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
   1034         /** @stable ICU 49 */
   1035         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
   1036         /** @stable ICU 49 */
   1037         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
   1038         /** @stable ICU 49 */
   1039         public static final int MIAO_ID = 216; /*[16F00]*/
   1040         /** @stable ICU 49 */
   1041         public static final int SHARADA_ID = 217; /*[11180]*/
   1042         /** @stable ICU 49 */
   1043         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
   1044         /** @stable ICU 49 */
   1045         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
   1046         /** @stable ICU 49 */
   1047         public static final int TAKRI_ID = 220; /*[11680]*/
   1048 
   1049         /* New blocks in Unicode 7.0 */
   1050 
   1051         /** @stable ICU 54 */
   1052         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
   1053         /** @stable ICU 54 */
   1054         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
   1055         /** @stable ICU 54 */
   1056         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
   1057         /** @stable ICU 54 */
   1058         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
   1059         /** @stable ICU 54 */
   1060         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
   1061         /** @stable ICU 54 */
   1062         public static final int ELBASAN_ID = 226; /*[10500]*/
   1063         /** @stable ICU 54 */
   1064         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
   1065         /** @stable ICU 54 */
   1066         public static final int GRANTHA_ID = 228; /*[11300]*/
   1067         /** @stable ICU 54 */
   1068         public static final int KHOJKI_ID = 229; /*[11200]*/
   1069         /** @stable ICU 54 */
   1070         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
   1071         /** @stable ICU 54 */
   1072         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
   1073         /** @stable ICU 54 */
   1074         public static final int LINEAR_A_ID = 232; /*[10600]*/
   1075         /** @stable ICU 54 */
   1076         public static final int MAHAJANI_ID = 233; /*[11150]*/
   1077         /** @stable ICU 54 */
   1078         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
   1079         /** @stable ICU 54 */
   1080         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
   1081         /** @stable ICU 54 */
   1082         public static final int MODI_ID = 236; /*[11600]*/
   1083         /** @stable ICU 54 */
   1084         public static final int MRO_ID = 237; /*[16A40]*/
   1085         /** @stable ICU 54 */
   1086         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
   1087         /** @stable ICU 54 */
   1088         public static final int NABATAEAN_ID = 239; /*[10880]*/
   1089         /** @stable ICU 54 */
   1090         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
   1091         /** @stable ICU 54 */
   1092         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
   1093         /** @stable ICU 54 */
   1094         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
   1095         /** @stable ICU 54 */
   1096         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
   1097         /** @stable ICU 54 */
   1098         public static final int PALMYRENE_ID = 244; /*[10860]*/
   1099         /** @stable ICU 54 */
   1100         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
   1101         /** @stable ICU 54 */
   1102         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
   1103         /** @stable ICU 54 */
   1104         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
   1105         /** @stable ICU 54 */
   1106         public static final int SIDDHAM_ID = 248; /*[11580]*/
   1107         /** @stable ICU 54 */
   1108         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
   1109         /** @stable ICU 54 */
   1110         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
   1111         /** @stable ICU 54 */
   1112         public static final int TIRHUTA_ID = 251; /*[11480]*/
   1113         /** @stable ICU 54 */
   1114         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
   1115 
   1116         /* New blocks in Unicode 8.0 */
   1117 
   1118         /** @stable ICU 56 */
   1119         public static final int AHOM_ID = 253; /*[11700]*/
   1120         /** @stable ICU 56 */
   1121         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
   1122         /** @stable ICU 56 */
   1123         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
   1124         /** @stable ICU 56 */
   1125         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
   1126         /** @stable ICU 56 */
   1127         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
   1128         /** @stable ICU 56 */
   1129         public static final int HATRAN_ID = 258; /*[108E0]*/
   1130         /** @stable ICU 56 */
   1131         public static final int MULTANI_ID = 259; /*[11280]*/
   1132         /** @stable ICU 56 */
   1133         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
   1134         /** @stable ICU 56 */
   1135         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
   1136         /** @stable ICU 56 */
   1137         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
   1138 
   1139         /* New blocks in Unicode 9.0 */
   1140 
   1141         /** @stable ICU 58 */
   1142         public static final int ADLAM_ID = 263; /*[1E900]*/
   1143         /** @stable ICU 58 */
   1144         public static final int BHAIKSUKI_ID = 264; /*[11C00]*/
   1145         /** @stable ICU 58 */
   1146         public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/
   1147         /** @stable ICU 58 */
   1148         public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/
   1149         /** @stable ICU 58 */
   1150         public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/
   1151         /** @stable ICU 58 */
   1152         public static final int MARCHEN_ID = 268; /*[11C70]*/
   1153         /** @stable ICU 58 */
   1154         public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/
   1155         /** @stable ICU 58 */
   1156         public static final int NEWA_ID = 270; /*[11400]*/
   1157         /** @stable ICU 58 */
   1158         public static final int OSAGE_ID = 271; /*[104B0]*/
   1159         /** @stable ICU 58 */
   1160         public static final int TANGUT_ID = 272; /*[17000]*/
   1161         /** @stable ICU 58 */
   1162         public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/
   1163 
   1164         /**
   1165          * One more than the highest normal UnicodeBlock value.
   1166          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK).
   1167          *
   1168          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   1169          */
   1170         @Deprecated
   1171         public static final int COUNT = 274;
   1172 
   1173         // blocks objects ---------------------------------------------------
   1174 
   1175         /**
   1176          * Array of UnicodeBlocks, for easy access in getInstance(int)
   1177          */
   1178         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
   1179 
   1180         /**
   1181          * @stable ICU 2.6
   1182          */
   1183         public static final UnicodeBlock NO_BLOCK
   1184         = new UnicodeBlock("NO_BLOCK", 0);
   1185 
   1186         /**
   1187          * @stable ICU 2.4
   1188          */
   1189         public static final UnicodeBlock BASIC_LATIN
   1190         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
   1191         /**
   1192          * @stable ICU 2.4
   1193          */
   1194         public static final UnicodeBlock LATIN_1_SUPPLEMENT
   1195         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
   1196         /**
   1197          * @stable ICU 2.4
   1198          */
   1199         public static final UnicodeBlock LATIN_EXTENDED_A
   1200         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
   1201         /**
   1202          * @stable ICU 2.4
   1203          */
   1204         public static final UnicodeBlock LATIN_EXTENDED_B
   1205         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
   1206         /**
   1207          * @stable ICU 2.4
   1208          */
   1209         public static final UnicodeBlock IPA_EXTENSIONS
   1210         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
   1211         /**
   1212          * @stable ICU 2.4
   1213          */
   1214         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
   1215         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
   1216         /**
   1217          * @stable ICU 2.4
   1218          */
   1219         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
   1220         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
   1221         /**
   1222          * Unicode 3.2 renames this block to "Greek and Coptic".
   1223          * @stable ICU 2.4
   1224          */
   1225         public static final UnicodeBlock GREEK
   1226         = new UnicodeBlock("GREEK", GREEK_ID);
   1227         /**
   1228          * @stable ICU 2.4
   1229          */
   1230         public static final UnicodeBlock CYRILLIC
   1231         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
   1232         /**
   1233          * @stable ICU 2.4
   1234          */
   1235         public static final UnicodeBlock ARMENIAN
   1236         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
   1237         /**
   1238          * @stable ICU 2.4
   1239          */
   1240         public static final UnicodeBlock HEBREW
   1241         = new UnicodeBlock("HEBREW", HEBREW_ID);
   1242         /**
   1243          * @stable ICU 2.4
   1244          */
   1245         public static final UnicodeBlock ARABIC
   1246         = new UnicodeBlock("ARABIC", ARABIC_ID);
   1247         /**
   1248          * @stable ICU 2.4
   1249          */
   1250         public static final UnicodeBlock SYRIAC
   1251         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
   1252         /**
   1253          * @stable ICU 2.4
   1254          */
   1255         public static final UnicodeBlock THAANA
   1256         = new UnicodeBlock("THAANA", THAANA_ID);
   1257         /**
   1258          * @stable ICU 2.4
   1259          */
   1260         public static final UnicodeBlock DEVANAGARI
   1261         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
   1262         /**
   1263          * @stable ICU 2.4
   1264          */
   1265         public static final UnicodeBlock BENGALI
   1266         = new UnicodeBlock("BENGALI", BENGALI_ID);
   1267         /**
   1268          * @stable ICU 2.4
   1269          */
   1270         public static final UnicodeBlock GURMUKHI
   1271         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
   1272         /**
   1273          * @stable ICU 2.4
   1274          */
   1275         public static final UnicodeBlock GUJARATI
   1276         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
   1277         /**
   1278          * @stable ICU 2.4
   1279          */
   1280         public static final UnicodeBlock ORIYA
   1281         = new UnicodeBlock("ORIYA", ORIYA_ID);
   1282         /**
   1283          * @stable ICU 2.4
   1284          */
   1285         public static final UnicodeBlock TAMIL
   1286         = new UnicodeBlock("TAMIL", TAMIL_ID);
   1287         /**
   1288          * @stable ICU 2.4
   1289          */
   1290         public static final UnicodeBlock TELUGU
   1291         = new UnicodeBlock("TELUGU", TELUGU_ID);
   1292         /**
   1293          * @stable ICU 2.4
   1294          */
   1295         public static final UnicodeBlock KANNADA
   1296         = new UnicodeBlock("KANNADA", KANNADA_ID);
   1297         /**
   1298          * @stable ICU 2.4
   1299          */
   1300         public static final UnicodeBlock MALAYALAM
   1301         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
   1302         /**
   1303          * @stable ICU 2.4
   1304          */
   1305         public static final UnicodeBlock SINHALA
   1306         = new UnicodeBlock("SINHALA", SINHALA_ID);
   1307         /**
   1308          * @stable ICU 2.4
   1309          */
   1310         public static final UnicodeBlock THAI
   1311         = new UnicodeBlock("THAI", THAI_ID);
   1312         /**
   1313          * @stable ICU 2.4
   1314          */
   1315         public static final UnicodeBlock LAO
   1316         = new UnicodeBlock("LAO", LAO_ID);
   1317         /**
   1318          * @stable ICU 2.4
   1319          */
   1320         public static final UnicodeBlock TIBETAN
   1321         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
   1322         /**
   1323          * @stable ICU 2.4
   1324          */
   1325         public static final UnicodeBlock MYANMAR
   1326         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
   1327         /**
   1328          * @stable ICU 2.4
   1329          */
   1330         public static final UnicodeBlock GEORGIAN
   1331         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
   1332         /**
   1333          * @stable ICU 2.4
   1334          */
   1335         public static final UnicodeBlock HANGUL_JAMO
   1336         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
   1337         /**
   1338          * @stable ICU 2.4
   1339          */
   1340         public static final UnicodeBlock ETHIOPIC
   1341         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
   1342         /**
   1343          * @stable ICU 2.4
   1344          */
   1345         public static final UnicodeBlock CHEROKEE
   1346         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
   1347         /**
   1348          * @stable ICU 2.4
   1349          */
   1350         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
   1351         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
   1352                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
   1353         /**
   1354          * @stable ICU 2.4
   1355          */
   1356         public static final UnicodeBlock OGHAM
   1357         = new UnicodeBlock("OGHAM", OGHAM_ID);
   1358         /**
   1359          * @stable ICU 2.4
   1360          */
   1361         public static final UnicodeBlock RUNIC
   1362         = new UnicodeBlock("RUNIC", RUNIC_ID);
   1363         /**
   1364          * @stable ICU 2.4
   1365          */
   1366         public static final UnicodeBlock KHMER
   1367         = new UnicodeBlock("KHMER", KHMER_ID);
   1368         /**
   1369          * @stable ICU 2.4
   1370          */
   1371         public static final UnicodeBlock MONGOLIAN
   1372         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
   1373         /**
   1374          * @stable ICU 2.4
   1375          */
   1376         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
   1377         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
   1378         /**
   1379          * @stable ICU 2.4
   1380          */
   1381         public static final UnicodeBlock GREEK_EXTENDED
   1382         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
   1383         /**
   1384          * @stable ICU 2.4
   1385          */
   1386         public static final UnicodeBlock GENERAL_PUNCTUATION
   1387         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
   1388         /**
   1389          * @stable ICU 2.4
   1390          */
   1391         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
   1392         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
   1393         /**
   1394          * @stable ICU 2.4
   1395          */
   1396         public static final UnicodeBlock CURRENCY_SYMBOLS
   1397         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
   1398         /**
   1399          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
   1400          * Symbols".
   1401          * @stable ICU 2.4
   1402          */
   1403         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
   1404         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
   1405         /**
   1406          * @stable ICU 2.4
   1407          */
   1408         public static final UnicodeBlock LETTERLIKE_SYMBOLS
   1409         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
   1410         /**
   1411          * @stable ICU 2.4
   1412          */
   1413         public static final UnicodeBlock NUMBER_FORMS
   1414         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
   1415         /**
   1416          * @stable ICU 2.4
   1417          */
   1418         public static final UnicodeBlock ARROWS
   1419         = new UnicodeBlock("ARROWS", ARROWS_ID);
   1420         /**
   1421          * @stable ICU 2.4
   1422          */
   1423         public static final UnicodeBlock MATHEMATICAL_OPERATORS
   1424         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
   1425         /**
   1426          * @stable ICU 2.4
   1427          */
   1428         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
   1429         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
   1430         /**
   1431          * @stable ICU 2.4
   1432          */
   1433         public static final UnicodeBlock CONTROL_PICTURES
   1434         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
   1435         /**
   1436          * @stable ICU 2.4
   1437          */
   1438         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
   1439         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
   1440         /**
   1441          * @stable ICU 2.4
   1442          */
   1443         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
   1444         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
   1445         /**
   1446          * @stable ICU 2.4
   1447          */
   1448         public static final UnicodeBlock BOX_DRAWING
   1449         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
   1450         /**
   1451          * @stable ICU 2.4
   1452          */
   1453         public static final UnicodeBlock BLOCK_ELEMENTS
   1454         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
   1455         /**
   1456          * @stable ICU 2.4
   1457          */
   1458         public static final UnicodeBlock GEOMETRIC_SHAPES
   1459         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
   1460         /**
   1461          * @stable ICU 2.4
   1462          */
   1463         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
   1464         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
   1465         /**
   1466          * @stable ICU 2.4
   1467          */
   1468         public static final UnicodeBlock DINGBATS
   1469         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
   1470         /**
   1471          * @stable ICU 2.4
   1472          */
   1473         public static final UnicodeBlock BRAILLE_PATTERNS
   1474         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
   1475         /**
   1476          * @stable ICU 2.4
   1477          */
   1478         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
   1479         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
   1480         /**
   1481          * @stable ICU 2.4
   1482          */
   1483         public static final UnicodeBlock KANGXI_RADICALS
   1484         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
   1485         /**
   1486          * @stable ICU 2.4
   1487          */
   1488         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
   1489         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
   1490                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
   1491         /**
   1492          * @stable ICU 2.4
   1493          */
   1494         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
   1495         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
   1496         /**
   1497          * @stable ICU 2.4
   1498          */
   1499         public static final UnicodeBlock HIRAGANA
   1500         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
   1501         /**
   1502          * @stable ICU 2.4
   1503          */
   1504         public static final UnicodeBlock KATAKANA
   1505         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
   1506         /**
   1507          * @stable ICU 2.4
   1508          */
   1509         public static final UnicodeBlock BOPOMOFO
   1510         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
   1511         /**
   1512          * @stable ICU 2.4
   1513          */
   1514         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
   1515         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
   1516         /**
   1517          * @stable ICU 2.4
   1518          */
   1519         public static final UnicodeBlock KANBUN
   1520         = new UnicodeBlock("KANBUN", KANBUN_ID);
   1521         /**
   1522          * @stable ICU 2.4
   1523          */
   1524         public static final UnicodeBlock BOPOMOFO_EXTENDED
   1525         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
   1526         /**
   1527          * @stable ICU 2.4
   1528          */
   1529         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
   1530         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
   1531                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
   1532         /**
   1533          * @stable ICU 2.4
   1534          */
   1535         public static final UnicodeBlock CJK_COMPATIBILITY
   1536         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
   1537         /**
   1538          * @stable ICU 2.4
   1539          */
   1540         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
   1541         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
   1542                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
   1543         /**
   1544          * @stable ICU 2.4
   1545          */
   1546         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
   1547         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
   1548         /**
   1549          * @stable ICU 2.4
   1550          */
   1551         public static final UnicodeBlock YI_SYLLABLES
   1552         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
   1553         /**
   1554          * @stable ICU 2.4
   1555          */
   1556         public static final UnicodeBlock YI_RADICALS
   1557         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
   1558         /**
   1559          * @stable ICU 2.4
   1560          */
   1561         public static final UnicodeBlock HANGUL_SYLLABLES
   1562         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
   1563         /**
   1564          * @stable ICU 2.4
   1565          */
   1566         public static final UnicodeBlock HIGH_SURROGATES
   1567         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
   1568         /**
   1569          * @stable ICU 2.4
   1570          */
   1571         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
   1572         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
   1573         /**
   1574          * @stable ICU 2.4
   1575          */
   1576         public static final UnicodeBlock LOW_SURROGATES
   1577         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
   1578         /**
   1579          * Same as public static final int PRIVATE_USE.
   1580          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
   1581          * and multiple code point ranges had this block.
   1582          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
   1583          * and adds separate blocks for the supplementary PUAs.
   1584          * @stable ICU 2.4
   1585          */
   1586         public static final UnicodeBlock PRIVATE_USE_AREA
   1587         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
   1588         /**
   1589          * Same as public static final int PRIVATE_USE_AREA.
   1590          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
   1591          * and multiple code point ranges had this block.
   1592          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
   1593          * and adds separate blocks for the supplementary PUAs.
   1594          * @stable ICU 2.4
   1595          */
   1596         public static final UnicodeBlock PRIVATE_USE
   1597         = PRIVATE_USE_AREA;
   1598         /**
   1599          * @stable ICU 2.4
   1600          */
   1601         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
   1602         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
   1603         /**
   1604          * @stable ICU 2.4
   1605          */
   1606         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
   1607         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
   1608         /**
   1609          * @stable ICU 2.4
   1610          */
   1611         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
   1612         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
   1613         /**
   1614          * @stable ICU 2.4
   1615          */
   1616         public static final UnicodeBlock COMBINING_HALF_MARKS
   1617         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
   1618         /**
   1619          * @stable ICU 2.4
   1620          */
   1621         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
   1622         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
   1623         /**
   1624          * @stable ICU 2.4
   1625          */
   1626         public static final UnicodeBlock SMALL_FORM_VARIANTS
   1627         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
   1628         /**
   1629          * @stable ICU 2.4
   1630          */
   1631         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
   1632         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
   1633         /**
   1634          * @stable ICU 2.4
   1635          */
   1636         public static final UnicodeBlock SPECIALS
   1637         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
   1638         /**
   1639          * @stable ICU 2.4
   1640          */
   1641         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
   1642         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
   1643         /**
   1644          * @stable ICU 2.4
   1645          */
   1646         public static final UnicodeBlock OLD_ITALIC
   1647         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
   1648         /**
   1649          * @stable ICU 2.4
   1650          */
   1651         public static final UnicodeBlock GOTHIC
   1652         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
   1653         /**
   1654          * @stable ICU 2.4
   1655          */
   1656         public static final UnicodeBlock DESERET
   1657         = new UnicodeBlock("DESERET", DESERET_ID);
   1658         /**
   1659          * @stable ICU 2.4
   1660          */
   1661         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
   1662         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
   1663         /**
   1664          * @stable ICU 2.4
   1665          */
   1666         public static final UnicodeBlock MUSICAL_SYMBOLS
   1667         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
   1668         /**
   1669          * @stable ICU 2.4
   1670          */
   1671         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
   1672         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
   1673                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
   1674         /**
   1675          * @stable ICU 2.4
   1676          */
   1677         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
   1678         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
   1679                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
   1680         /**
   1681          * @stable ICU 2.4
   1682          */
   1683         public static final UnicodeBlock
   1684         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
   1685         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
   1686                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
   1687         /**
   1688          * @stable ICU 2.4
   1689          */
   1690         public static final UnicodeBlock TAGS
   1691         = new UnicodeBlock("TAGS", TAGS_ID);
   1692 
   1693         // New blocks in Unicode 3.2
   1694 
   1695         /**
   1696          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
   1697          * @stable ICU 2.4
   1698          */
   1699         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
   1700         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
   1701         /**
   1702          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
   1703          * @stable ICU 3.0
   1704          */
   1705         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
   1706         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
   1707         /**
   1708          * @stable ICU 2.4
   1709          */
   1710         public static final UnicodeBlock TAGALOG
   1711         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
   1712         /**
   1713          * @stable ICU 2.4
   1714          */
   1715         public static final UnicodeBlock HANUNOO
   1716         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
   1717         /**
   1718          * @stable ICU 2.4
   1719          */
   1720         public static final UnicodeBlock BUHID
   1721         = new UnicodeBlock("BUHID", BUHID_ID);
   1722         /**
   1723          * @stable ICU 2.4
   1724          */
   1725         public static final UnicodeBlock TAGBANWA
   1726         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
   1727         /**
   1728          * @stable ICU 2.4
   1729          */
   1730         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
   1731         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
   1732                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
   1733         /**
   1734          * @stable ICU 2.4
   1735          */
   1736         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
   1737         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
   1738         /**
   1739          * @stable ICU 2.4
   1740          */
   1741         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
   1742         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
   1743         /**
   1744          * @stable ICU 2.4
   1745          */
   1746         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
   1747         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
   1748                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
   1749         /**
   1750          * @stable ICU 2.4
   1751          */
   1752         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
   1753         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
   1754                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
   1755         /**
   1756          * @stable ICU 2.4
   1757          */
   1758         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
   1759         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
   1760         /**
   1761          * @stable ICU 2.4
   1762          */
   1763         public static final UnicodeBlock VARIATION_SELECTORS
   1764         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
   1765         /**
   1766          * @stable ICU 2.4
   1767          */
   1768         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
   1769         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
   1770                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
   1771         /**
   1772          * @stable ICU 2.4
   1773          */
   1774         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
   1775         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
   1776                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
   1777 
   1778         /**
   1779          * @stable ICU 2.6
   1780          */
   1781         public static final UnicodeBlock LIMBU
   1782         = new UnicodeBlock("LIMBU", LIMBU_ID);
   1783         /**
   1784          * @stable ICU 2.6
   1785          */
   1786         public static final UnicodeBlock TAI_LE
   1787         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
   1788         /**
   1789          * @stable ICU 2.6
   1790          */
   1791         public static final UnicodeBlock KHMER_SYMBOLS
   1792         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
   1793 
   1794         /**
   1795          * @stable ICU 2.6
   1796          */
   1797         public static final UnicodeBlock PHONETIC_EXTENSIONS
   1798         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
   1799 
   1800         /**
   1801          * @stable ICU 2.6
   1802          */
   1803         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
   1804         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
   1805                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
   1806         /**
   1807          * @stable ICU 2.6
   1808          */
   1809         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
   1810         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
   1811         /**
   1812          * @stable ICU 2.6
   1813          */
   1814         public static final UnicodeBlock LINEAR_B_SYLLABARY
   1815         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
   1816         /**
   1817          * @stable ICU 2.6
   1818          */
   1819         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
   1820         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
   1821         /**
   1822          * @stable ICU 2.6
   1823          */
   1824         public static final UnicodeBlock AEGEAN_NUMBERS
   1825         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
   1826         /**
   1827          * @stable ICU 2.6
   1828          */
   1829         public static final UnicodeBlock UGARITIC
   1830         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
   1831         /**
   1832          * @stable ICU 2.6
   1833          */
   1834         public static final UnicodeBlock SHAVIAN
   1835         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
   1836         /**
   1837          * @stable ICU 2.6
   1838          */
   1839         public static final UnicodeBlock OSMANYA
   1840         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
   1841         /**
   1842          * @stable ICU 2.6
   1843          */
   1844         public static final UnicodeBlock CYPRIOT_SYLLABARY
   1845         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
   1846         /**
   1847          * @stable ICU 2.6
   1848          */
   1849         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
   1850         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
   1851 
   1852         /**
   1853          * @stable ICU 2.6
   1854          */
   1855         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
   1856         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
   1857 
   1858         /* New blocks in Unicode 4.1 */
   1859 
   1860         /**
   1861          * @stable ICU 3.4
   1862          */
   1863         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
   1864                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
   1865                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
   1866 
   1867         /**
   1868          * @stable ICU 3.4
   1869          */
   1870         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
   1871                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
   1872 
   1873         /**
   1874          * @stable ICU 3.4
   1875          */
   1876         public static final UnicodeBlock ARABIC_SUPPLEMENT =
   1877                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
   1878 
   1879         /**
   1880          * @stable ICU 3.4
   1881          */
   1882         public static final UnicodeBlock BUGINESE =
   1883                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
   1884 
   1885         /**
   1886          * @stable ICU 3.4
   1887          */
   1888         public static final UnicodeBlock CJK_STROKES =
   1889                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
   1890 
   1891         /**
   1892          * @stable ICU 3.4
   1893          */
   1894         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
   1895                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
   1896                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
   1897 
   1898         /**
   1899          * @stable ICU 3.4
   1900          */
   1901         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
   1902 
   1903         /**
   1904          * @stable ICU 3.4
   1905          */
   1906         public static final UnicodeBlock ETHIOPIC_EXTENDED =
   1907                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
   1908 
   1909         /**
   1910          * @stable ICU 3.4
   1911          */
   1912         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
   1913                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
   1914 
   1915         /**
   1916          * @stable ICU 3.4
   1917          */
   1918         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
   1919                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
   1920 
   1921         /**
   1922          * @stable ICU 3.4
   1923          */
   1924         public static final UnicodeBlock GLAGOLITIC =
   1925                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
   1926 
   1927         /**
   1928          * @stable ICU 3.4
   1929          */
   1930         public static final UnicodeBlock KHAROSHTHI =
   1931                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
   1932 
   1933         /**
   1934          * @stable ICU 3.4
   1935          */
   1936         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
   1937                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
   1938 
   1939         /**
   1940          * @stable ICU 3.4
   1941          */
   1942         public static final UnicodeBlock NEW_TAI_LUE =
   1943                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
   1944 
   1945         /**
   1946          * @stable ICU 3.4
   1947          */
   1948         public static final UnicodeBlock OLD_PERSIAN =
   1949                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
   1950 
   1951         /**
   1952          * @stable ICU 3.4
   1953          */
   1954         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
   1955                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
   1956                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
   1957 
   1958         /**
   1959          * @stable ICU 3.4
   1960          */
   1961         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
   1962                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
   1963 
   1964         /**
   1965          * @stable ICU 3.4
   1966          */
   1967         public static final UnicodeBlock SYLOTI_NAGRI =
   1968                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
   1969 
   1970         /**
   1971          * @stable ICU 3.4
   1972          */
   1973         public static final UnicodeBlock TIFINAGH =
   1974                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
   1975 
   1976         /**
   1977          * @stable ICU 3.4
   1978          */
   1979         public static final UnicodeBlock VERTICAL_FORMS =
   1980                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
   1981 
   1982         /**
   1983          * @stable ICU 3.6
   1984          */
   1985         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
   1986         /**
   1987          * @stable ICU 3.6
   1988          */
   1989         public static final UnicodeBlock BALINESE =
   1990                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
   1991         /**
   1992          * @stable ICU 3.6
   1993          */
   1994         public static final UnicodeBlock LATIN_EXTENDED_C =
   1995                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
   1996         /**
   1997          * @stable ICU 3.6
   1998          */
   1999         public static final UnicodeBlock LATIN_EXTENDED_D =
   2000                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
   2001         /**
   2002          * @stable ICU 3.6
   2003          */
   2004         public static final UnicodeBlock PHAGS_PA =
   2005                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
   2006         /**
   2007          * @stable ICU 3.6
   2008          */
   2009         public static final UnicodeBlock PHOENICIAN =
   2010                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
   2011         /**
   2012          * @stable ICU 3.6
   2013          */
   2014         public static final UnicodeBlock CUNEIFORM =
   2015                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
   2016         /**
   2017          * @stable ICU 3.6
   2018          */
   2019         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
   2020                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
   2021                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
   2022         /**
   2023          * @stable ICU 3.6
   2024          */
   2025         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
   2026                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
   2027 
   2028         /**
   2029          * @stable ICU 4.0
   2030          */
   2031         public static final UnicodeBlock SUNDANESE =
   2032                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
   2033 
   2034         /**
   2035          * @stable ICU 4.0
   2036          */
   2037         public static final UnicodeBlock LEPCHA =
   2038                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
   2039 
   2040         /**
   2041          * @stable ICU 4.0
   2042          */
   2043         public static final UnicodeBlock OL_CHIKI =
   2044                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
   2045 
   2046         /**
   2047          * @stable ICU 4.0
   2048          */
   2049         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
   2050                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
   2051 
   2052         /**
   2053          * @stable ICU 4.0
   2054          */
   2055         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
   2056 
   2057         /**
   2058          * @stable ICU 4.0
   2059          */
   2060         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
   2061                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
   2062 
   2063         /**
   2064          * @stable ICU 4.0
   2065          */
   2066         public static final UnicodeBlock SAURASHTRA =
   2067                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
   2068 
   2069         /**
   2070          * @stable ICU 4.0
   2071          */
   2072         public static final UnicodeBlock KAYAH_LI =
   2073                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
   2074 
   2075         /**
   2076          * @stable ICU 4.0
   2077          */
   2078         public static final UnicodeBlock REJANG =
   2079                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
   2080 
   2081         /**
   2082          * @stable ICU 4.0
   2083          */
   2084         public static final UnicodeBlock CHAM =
   2085                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
   2086 
   2087         /**
   2088          * @stable ICU 4.0
   2089          */
   2090         public static final UnicodeBlock ANCIENT_SYMBOLS =
   2091                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
   2092 
   2093         /**
   2094          * @stable ICU 4.0
   2095          */
   2096         public static final UnicodeBlock PHAISTOS_DISC =
   2097                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
   2098 
   2099         /**
   2100          * @stable ICU 4.0
   2101          */
   2102         public static final UnicodeBlock LYCIAN =
   2103                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
   2104 
   2105         /**
   2106          * @stable ICU 4.0
   2107          */
   2108         public static final UnicodeBlock CARIAN =
   2109                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
   2110 
   2111         /**
   2112          * @stable ICU 4.0
   2113          */
   2114         public static final UnicodeBlock LYDIAN =
   2115                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
   2116 
   2117         /**
   2118          * @stable ICU 4.0
   2119          */
   2120         public static final UnicodeBlock MAHJONG_TILES =
   2121                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
   2122 
   2123         /**
   2124          * @stable ICU 4.0
   2125          */
   2126         public static final UnicodeBlock DOMINO_TILES =
   2127                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
   2128 
   2129         /* New blocks in Unicode 5.2 */
   2130 
   2131         /** @stable ICU 4.4 */
   2132         public static final UnicodeBlock SAMARITAN =
   2133                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
   2134         /** @stable ICU 4.4 */
   2135         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
   2136                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
   2137                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
   2138         /** @stable ICU 4.4 */
   2139         public static final UnicodeBlock TAI_THAM =
   2140                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
   2141         /** @stable ICU 4.4 */
   2142         public static final UnicodeBlock VEDIC_EXTENSIONS =
   2143                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
   2144         /** @stable ICU 4.4 */
   2145         public static final UnicodeBlock LISU =
   2146                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
   2147         /** @stable ICU 4.4 */
   2148         public static final UnicodeBlock BAMUM =
   2149                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
   2150         /** @stable ICU 4.4 */
   2151         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
   2152                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
   2153         /** @stable ICU 4.4 */
   2154         public static final UnicodeBlock DEVANAGARI_EXTENDED =
   2155                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
   2156         /** @stable ICU 4.4 */
   2157         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
   2158                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
   2159         /** @stable ICU 4.4 */
   2160         public static final UnicodeBlock JAVANESE =
   2161                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
   2162         /** @stable ICU 4.4 */
   2163         public static final UnicodeBlock MYANMAR_EXTENDED_A =
   2164                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
   2165         /** @stable ICU 4.4 */
   2166         public static final UnicodeBlock TAI_VIET =
   2167                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
   2168         /** @stable ICU 4.4 */
   2169         public static final UnicodeBlock MEETEI_MAYEK =
   2170                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
   2171         /** @stable ICU 4.4 */
   2172         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
   2173                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
   2174         /** @stable ICU 4.4 */
   2175         public static final UnicodeBlock IMPERIAL_ARAMAIC =
   2176                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
   2177         /** @stable ICU 4.4 */
   2178         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
   2179                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
   2180         /** @stable ICU 4.4 */
   2181         public static final UnicodeBlock AVESTAN =
   2182                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
   2183         /** @stable ICU 4.4 */
   2184         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
   2185                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
   2186         /** @stable ICU 4.4 */
   2187         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
   2188                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
   2189         /** @stable ICU 4.4 */
   2190         public static final UnicodeBlock OLD_TURKIC =
   2191                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
   2192         /** @stable ICU 4.4 */
   2193         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
   2194                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
   2195         /** @stable ICU 4.4 */
   2196         public static final UnicodeBlock KAITHI =
   2197                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
   2198         /** @stable ICU 4.4 */
   2199         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
   2200                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
   2201         /** @stable ICU 4.4 */
   2202         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
   2203                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
   2204                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
   2205         /** @stable ICU 4.4 */
   2206         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
   2207                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
   2208                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
   2209         /** @stable ICU 4.4 */
   2210         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
   2211                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
   2212                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
   2213 
   2214         /* New blocks in Unicode 6.0 */
   2215 
   2216         /** @stable ICU 4.6 */
   2217         public static final UnicodeBlock MANDAIC =
   2218                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
   2219         /** @stable ICU 4.6 */
   2220         public static final UnicodeBlock BATAK =
   2221                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
   2222         /** @stable ICU 4.6 */
   2223         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
   2224                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
   2225         /** @stable ICU 4.6 */
   2226         public static final UnicodeBlock BRAHMI =
   2227                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
   2228         /** @stable ICU 4.6 */
   2229         public static final UnicodeBlock BAMUM_SUPPLEMENT =
   2230                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
   2231         /** @stable ICU 4.6 */
   2232         public static final UnicodeBlock KANA_SUPPLEMENT =
   2233                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
   2234         /** @stable ICU 4.6 */
   2235         public static final UnicodeBlock PLAYING_CARDS =
   2236                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
   2237         /** @stable ICU 4.6 */
   2238         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
   2239                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
   2240                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
   2241         /** @stable ICU 4.6 */
   2242         public static final UnicodeBlock EMOTICONS =
   2243                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
   2244         /** @stable ICU 4.6 */
   2245         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
   2246                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
   2247         /** @stable ICU 4.6 */
   2248         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
   2249                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
   2250         /** @stable ICU 4.6 */
   2251         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
   2252                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
   2253                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
   2254 
   2255         /* New blocks in Unicode 6.1 */
   2256 
   2257         /** @stable ICU 49 */
   2258         public static final UnicodeBlock ARABIC_EXTENDED_A =
   2259                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
   2260         /** @stable ICU 49 */
   2261         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
   2262                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
   2263         /** @stable ICU 49 */
   2264         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
   2265         /** @stable ICU 49 */
   2266         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
   2267                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
   2268         /** @stable ICU 49 */
   2269         public static final UnicodeBlock MEROITIC_CURSIVE =
   2270                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
   2271         /** @stable ICU 49 */
   2272         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
   2273                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
   2274         /** @stable ICU 49 */
   2275         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
   2276         /** @stable ICU 49 */
   2277         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
   2278         /** @stable ICU 49 */
   2279         public static final UnicodeBlock SORA_SOMPENG =
   2280                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
   2281         /** @stable ICU 49 */
   2282         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
   2283                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
   2284         /** @stable ICU 49 */
   2285         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
   2286 
   2287         /* New blocks in Unicode 7.0 */
   2288 
   2289         /** @stable ICU 54 */
   2290         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
   2291         /** @stable ICU 54 */
   2292         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
   2293                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
   2294         /** @stable ICU 54 */
   2295         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
   2296                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
   2297         /** @stable ICU 54 */
   2298         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
   2299                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
   2300         /** @stable ICU 54 */
   2301         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
   2302         /** @stable ICU 54 */
   2303         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
   2304         /** @stable ICU 54 */
   2305         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
   2306                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
   2307         /** @stable ICU 54 */
   2308         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
   2309         /** @stable ICU 54 */
   2310         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
   2311         /** @stable ICU 54 */
   2312         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
   2313         /** @stable ICU 54 */
   2314         public static final UnicodeBlock LATIN_EXTENDED_E =
   2315                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
   2316         /** @stable ICU 54 */
   2317         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
   2318         /** @stable ICU 54 */
   2319         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
   2320         /** @stable ICU 54 */
   2321         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
   2322         /** @stable ICU 54 */
   2323         public static final UnicodeBlock MENDE_KIKAKUI =
   2324                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
   2325         /** @stable ICU 54 */
   2326         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
   2327         /** @stable ICU 54 */
   2328         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
   2329         /** @stable ICU 54 */
   2330         public static final UnicodeBlock MYANMAR_EXTENDED_B =
   2331                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
   2332         /** @stable ICU 54 */
   2333         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
   2334         /** @stable ICU 54 */
   2335         public static final UnicodeBlock OLD_NORTH_ARABIAN =
   2336                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
   2337         /** @stable ICU 54 */
   2338         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
   2339         /** @stable ICU 54 */
   2340         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
   2341                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
   2342         /** @stable ICU 54 */
   2343         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
   2344         /** @stable ICU 54 */
   2345         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
   2346         /** @stable ICU 54 */
   2347         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
   2348         /** @stable ICU 54 */
   2349         public static final UnicodeBlock PSALTER_PAHLAVI =
   2350                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
   2351         /** @stable ICU 54 */
   2352         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
   2353                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
   2354         /** @stable ICU 54 */
   2355         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
   2356         /** @stable ICU 54 */
   2357         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
   2358                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
   2359         /** @stable ICU 54 */
   2360         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
   2361                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
   2362         /** @stable ICU 54 */
   2363         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
   2364         /** @stable ICU 54 */
   2365         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
   2366 
   2367         /* New blocks in Unicode 8.0 */
   2368 
   2369         /** @stable ICU 56 */
   2370         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
   2371         /** @stable ICU 56 */
   2372         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
   2373                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
   2374         /** @stable ICU 56 */
   2375         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
   2376                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
   2377         /** @stable ICU 56 */
   2378         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
   2379                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
   2380                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
   2381         /** @stable ICU 56 */
   2382         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
   2383                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
   2384         /** @stable ICU 56 */
   2385         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
   2386         /** @stable ICU 56 */
   2387         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
   2388         /** @stable ICU 56 */
   2389         public static final UnicodeBlock OLD_HUNGARIAN =
   2390                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
   2391         /** @stable ICU 56 */
   2392         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
   2393                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
   2394                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
   2395         /** @stable ICU 56 */
   2396         public static final UnicodeBlock SUTTON_SIGNWRITING =
   2397                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
   2398 
   2399         /* New blocks in Unicode 9.0 */
   2400 
   2401         /** @stable ICU 58 */
   2402         public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/
   2403         /** @stable ICU 58 */
   2404         public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/
   2405         /** @stable ICU 58 */
   2406         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
   2407                 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/
   2408         /** @stable ICU 58 */
   2409         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
   2410                 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/
   2411         /** @stable ICU 58 */
   2412         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
   2413                 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/
   2414         /** @stable ICU 58 */
   2415         public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/
   2416         /** @stable ICU 58 */
   2417         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
   2418                 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/
   2419         /** @stable ICU 58 */
   2420         public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/
   2421         /** @stable ICU 58 */
   2422         public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/
   2423         /** @stable ICU 58 */
   2424         public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/
   2425         /** @stable ICU 58 */
   2426         public static final UnicodeBlock TANGUT_COMPONENTS =
   2427                 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/
   2428 
   2429         /**
   2430          * @stable ICU 2.4
   2431          */
   2432         public static final UnicodeBlock INVALID_CODE
   2433         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
   2434 
   2435         static {
   2436             for (int blockId = 0; blockId < COUNT; ++blockId) {
   2437                 if (BLOCKS_[blockId] == null) {
   2438                     throw new java.lang.IllegalStateException(
   2439                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
   2440                 }
   2441             }
   2442         }
   2443 
   2444         // public methods --------------------------------------------------
   2445 
   2446         /**
   2447          * {@icu} Returns the only instance of the UnicodeBlock with the argument ID.
   2448          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
   2449          * @param id UnicodeBlock ID
   2450          * @return the only instance of the UnicodeBlock with the argument ID
   2451          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
   2452          *         returned.
   2453          * @stable ICU 2.4
   2454          */
   2455         public static UnicodeBlock getInstance(int id)
   2456         {
   2457             if (id >= 0 && id < BLOCKS_.length) {
   2458                 return BLOCKS_[id];
   2459             }
   2460             return INVALID_CODE;
   2461         }
   2462 
   2463         /**
   2464          * Returns the Unicode allocation block that contains the code point,
   2465          * or null if the code point is not a member of a defined block.
   2466          * @param ch code point to be tested
   2467          * @return the Unicode allocation block that contains the code point
   2468          * @stable ICU 2.4
   2469          */
   2470         public static UnicodeBlock of(int ch)
   2471         {
   2472             if (ch > MAX_VALUE) {
   2473                 return INVALID_CODE;
   2474             }
   2475 
   2476             return UnicodeBlock.getInstance(
   2477                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
   2478         }
   2479 
   2480         /**
   2481          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
   2482          * Returns the Unicode block with the given name. {@icunote} Unlike
   2483          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
   2484          * against the official UCD name and the Java block name
   2485          * (ignoring case).
   2486          * @param blockName the name of the block to match
   2487          * @return the UnicodeBlock with that name
   2488          * @throws IllegalArgumentException if the blockName could not be matched
   2489          * @stable ICU 3.0
   2490          */
   2491         public static final UnicodeBlock forName(String blockName) {
   2492             Map<String, UnicodeBlock> m = null;
   2493             if (mref != null) {
   2494                 m = mref.get();
   2495             }
   2496             if (m == null) {
   2497                 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length);
   2498                 for (int i = 0; i < BLOCKS_.length; ++i) {
   2499                     UnicodeBlock b = BLOCKS_[i];
   2500                     String name = trimBlockName(
   2501                             getPropertyValueName(UProperty.BLOCK, b.getID(),
   2502                                     UProperty.NameChoice.LONG));
   2503                     m.put(name, b);
   2504                 }
   2505                 mref = new SoftReference<Map<String, UnicodeBlock>>(m);
   2506             }
   2507             UnicodeBlock b = m.get(trimBlockName(blockName));
   2508             if (b == null) {
   2509                 throw new IllegalArgumentException();
   2510             }
   2511             return b;
   2512         }
   2513         private static SoftReference<Map<String, UnicodeBlock>> mref;
   2514 
   2515         private static String trimBlockName(String name) {
   2516             String upper = name.toUpperCase(Locale.ENGLISH);
   2517             StringBuilder result = new StringBuilder(upper.length());
   2518             for (int i = 0; i < upper.length(); i++) {
   2519                 char c = upper.charAt(i);
   2520                 if (c != ' ' && c != '_' && c != '-') {
   2521                     result.append(c);
   2522                 }
   2523             }
   2524             return result.toString();
   2525         }
   2526 
   2527         /**
   2528          * {icu} Returns the type ID of this Unicode block
   2529          * @return integer type ID of this Unicode block
   2530          * @stable ICU 2.4
   2531          */
   2532         public int getID()
   2533         {
   2534             return m_id_;
   2535         }
   2536 
   2537         // private data members ---------------------------------------------
   2538 
   2539         /**
   2540          * Identification code for this UnicodeBlock
   2541          */
   2542         private int m_id_;
   2543 
   2544         // private constructor ----------------------------------------------
   2545 
   2546         /**
   2547          * UnicodeBlock constructor
   2548          * @param name name of this UnicodeBlock
   2549          * @param id unique id of this UnicodeBlock
   2550          * @exception NullPointerException if name is <code>null</code>
   2551          */
   2552         private UnicodeBlock(String name, int id)
   2553         {
   2554             super(name);
   2555             m_id_ = id;
   2556             if (id >= 0) {
   2557                 BLOCKS_[id] = this;
   2558             }
   2559         }
   2560     }
   2561 
   2562     /**
   2563      * East Asian Width constants.
   2564      * @see UProperty#EAST_ASIAN_WIDTH
   2565      * @see UCharacter#getIntPropertyValue
   2566      * @stable ICU 2.4
   2567      */
   2568     public static interface EastAsianWidth
   2569     {
   2570         /**
   2571          * @stable ICU 2.4
   2572          */
   2573         public static final int NEUTRAL = 0;
   2574         /**
   2575          * @stable ICU 2.4
   2576          */
   2577         public static final int AMBIGUOUS = 1;
   2578         /**
   2579          * @stable ICU 2.4
   2580          */
   2581         public static final int HALFWIDTH = 2;
   2582         /**
   2583          * @stable ICU 2.4
   2584          */
   2585         public static final int FULLWIDTH = 3;
   2586         /**
   2587          * @stable ICU 2.4
   2588          */
   2589         public static final int NARROW = 4;
   2590         /**
   2591          * @stable ICU 2.4
   2592          */
   2593         public static final int WIDE = 5;
   2594         /**
   2595          * One more than the highest normal EastAsianWidth value.
   2596          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH).
   2597          *
   2598          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2599          */
   2600         @Deprecated
   2601         public static final int COUNT = 6;
   2602     }
   2603 
   2604     /**
   2605      * Decomposition Type constants.
   2606      * @see UProperty#DECOMPOSITION_TYPE
   2607      * @stable ICU 2.4
   2608      */
   2609     public static interface DecompositionType
   2610     {
   2611         /**
   2612          * @stable ICU 2.4
   2613          */
   2614         public static final int NONE = 0;
   2615         /**
   2616          * @stable ICU 2.4
   2617          */
   2618         public static final int CANONICAL = 1;
   2619         /**
   2620          * @stable ICU 2.4
   2621          */
   2622         public static final int COMPAT = 2;
   2623         /**
   2624          * @stable ICU 2.4
   2625          */
   2626         public static final int CIRCLE = 3;
   2627         /**
   2628          * @stable ICU 2.4
   2629          */
   2630         public static final int FINAL = 4;
   2631         /**
   2632          * @stable ICU 2.4
   2633          */
   2634         public static final int FONT = 5;
   2635         /**
   2636          * @stable ICU 2.4
   2637          */
   2638         public static final int FRACTION = 6;
   2639         /**
   2640          * @stable ICU 2.4
   2641          */
   2642         public static final int INITIAL = 7;
   2643         /**
   2644          * @stable ICU 2.4
   2645          */
   2646         public static final int ISOLATED = 8;
   2647         /**
   2648          * @stable ICU 2.4
   2649          */
   2650         public static final int MEDIAL = 9;
   2651         /**
   2652          * @stable ICU 2.4
   2653          */
   2654         public static final int NARROW = 10;
   2655         /**
   2656          * @stable ICU 2.4
   2657          */
   2658         public static final int NOBREAK = 11;
   2659         /**
   2660          * @stable ICU 2.4
   2661          */
   2662         public static final int SMALL = 12;
   2663         /**
   2664          * @stable ICU 2.4
   2665          */
   2666         public static final int SQUARE = 13;
   2667         /**
   2668          * @stable ICU 2.4
   2669          */
   2670         public static final int SUB = 14;
   2671         /**
   2672          * @stable ICU 2.4
   2673          */
   2674         public static final int SUPER = 15;
   2675         /**
   2676          * @stable ICU 2.4
   2677          */
   2678         public static final int VERTICAL = 16;
   2679         /**
   2680          * @stable ICU 2.4
   2681          */
   2682         public static final int WIDE = 17;
   2683         /**
   2684          * One more than the highest normal DecompositionType value.
   2685          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE).
   2686          *
   2687          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2688          */
   2689         @Deprecated
   2690         public static final int COUNT = 18;
   2691     }
   2692 
   2693     /**
   2694      * Joining Type constants.
   2695      * @see UProperty#JOINING_TYPE
   2696      * @stable ICU 2.4
   2697      */
   2698     public static interface JoiningType
   2699     {
   2700         /**
   2701          * @stable ICU 2.4
   2702          */
   2703         public static final int NON_JOINING = 0;
   2704         /**
   2705          * @stable ICU 2.4
   2706          */
   2707         public static final int JOIN_CAUSING = 1;
   2708         /**
   2709          * @stable ICU 2.4
   2710          */
   2711         public static final int DUAL_JOINING = 2;
   2712         /**
   2713          * @stable ICU 2.4
   2714          */
   2715         public static final int LEFT_JOINING = 3;
   2716         /**
   2717          * @stable ICU 2.4
   2718          */
   2719         public static final int RIGHT_JOINING = 4;
   2720         /**
   2721          * @stable ICU 2.4
   2722          */
   2723         public static final int TRANSPARENT = 5;
   2724         /**
   2725          * One more than the highest normal JoiningType value.
   2726          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE).
   2727          *
   2728          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2729          */
   2730         @Deprecated
   2731         public static final int COUNT = 6;
   2732     }
   2733 
   2734     /**
   2735      * Joining Group constants.
   2736      * @see UProperty#JOINING_GROUP
   2737      * @stable ICU 2.4
   2738      */
   2739     public static interface JoiningGroup
   2740     {
   2741         /**
   2742          * @stable ICU 2.4
   2743          */
   2744         public static final int NO_JOINING_GROUP = 0;
   2745         /**
   2746          * @stable ICU 2.4
   2747          */
   2748         public static final int AIN = 1;
   2749         /**
   2750          * @stable ICU 2.4
   2751          */
   2752         public static final int ALAPH = 2;
   2753         /**
   2754          * @stable ICU 2.4
   2755          */
   2756         public static final int ALEF = 3;
   2757         /**
   2758          * @stable ICU 2.4
   2759          */
   2760         public static final int BEH = 4;
   2761         /**
   2762          * @stable ICU 2.4
   2763          */
   2764         public static final int BETH = 5;
   2765         /**
   2766          * @stable ICU 2.4
   2767          */
   2768         public static final int DAL = 6;
   2769         /**
   2770          * @stable ICU 2.4
   2771          */
   2772         public static final int DALATH_RISH = 7;
   2773         /**
   2774          * @stable ICU 2.4
   2775          */
   2776         public static final int E = 8;
   2777         /**
   2778          * @stable ICU 2.4
   2779          */
   2780         public static final int FEH = 9;
   2781         /**
   2782          * @stable ICU 2.4
   2783          */
   2784         public static final int FINAL_SEMKATH = 10;
   2785         /**
   2786          * @stable ICU 2.4
   2787          */
   2788         public static final int GAF = 11;
   2789         /**
   2790          * @stable ICU 2.4
   2791          */
   2792         public static final int GAMAL = 12;
   2793         /**
   2794          * @stable ICU 2.4
   2795          */
   2796         public static final int HAH = 13;
   2797         /** @stable ICU 4.6 */
   2798         public static final int TEH_MARBUTA_GOAL = 14;
   2799         /**
   2800          * @stable ICU 2.4
   2801          */
   2802         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
   2803         /**
   2804          * @stable ICU 2.4
   2805          */
   2806         public static final int HE = 15;
   2807         /**
   2808          * @stable ICU 2.4
   2809          */
   2810         public static final int HEH = 16;
   2811         /**
   2812          * @stable ICU 2.4
   2813          */
   2814         public static final int HEH_GOAL = 17;
   2815         /**
   2816          * @stable ICU 2.4
   2817          */
   2818         public static final int HETH = 18;
   2819         /**
   2820          * @stable ICU 2.4
   2821          */
   2822         public static final int KAF = 19;
   2823         /**
   2824          * @stable ICU 2.4
   2825          */
   2826         public static final int KAPH = 20;
   2827         /**
   2828          * @stable ICU 2.4
   2829          */
   2830         public static final int KNOTTED_HEH = 21;
   2831         /**
   2832          * @stable ICU 2.4
   2833          */
   2834         public static final int LAM = 22;
   2835         /**
   2836          * @stable ICU 2.4
   2837          */
   2838         public static final int LAMADH = 23;
   2839         /**
   2840          * @stable ICU 2.4
   2841          */
   2842         public static final int MEEM = 24;
   2843         /**
   2844          * @stable ICU 2.4
   2845          */
   2846         public static final int MIM = 25;
   2847         /**
   2848          * @stable ICU 2.4
   2849          */
   2850         public static final int NOON = 26;
   2851         /**
   2852          * @stable ICU 2.4
   2853          */
   2854         public static final int NUN = 27;
   2855         /**
   2856          * @stable ICU 2.4
   2857          */
   2858         public static final int PE = 28;
   2859         /**
   2860          * @stable ICU 2.4
   2861          */
   2862         public static final int QAF = 29;
   2863         /**
   2864          * @stable ICU 2.4
   2865          */
   2866         public static final int QAPH = 30;
   2867         /**
   2868          * @stable ICU 2.4
   2869          */
   2870         public static final int REH = 31;
   2871         /**
   2872          * @stable ICU 2.4
   2873          */
   2874         public static final int REVERSED_PE = 32;
   2875         /**
   2876          * @stable ICU 2.4
   2877          */
   2878         public static final int SAD = 33;
   2879         /**
   2880          * @stable ICU 2.4
   2881          */
   2882         public static final int SADHE = 34;
   2883         /**
   2884          * @stable ICU 2.4
   2885          */
   2886         public static final int SEEN = 35;
   2887         /**
   2888          * @stable ICU 2.4
   2889          */
   2890         public static final int SEMKATH = 36;
   2891         /**
   2892          * @stable ICU 2.4
   2893          */
   2894         public static final int SHIN = 37;
   2895         /**
   2896          * @stable ICU 2.4
   2897          */
   2898         public static final int SWASH_KAF = 38;
   2899         /**
   2900          * @stable ICU 2.4
   2901          */
   2902         public static final int SYRIAC_WAW = 39;
   2903         /**
   2904          * @stable ICU 2.4
   2905          */
   2906         public static final int TAH = 40;
   2907         /**
   2908          * @stable ICU 2.4
   2909          */
   2910         public static final int TAW = 41;
   2911         /**
   2912          * @stable ICU 2.4
   2913          */
   2914         public static final int TEH_MARBUTA = 42;
   2915         /**
   2916          * @stable ICU 2.4
   2917          */
   2918         public static final int TETH = 43;
   2919         /**
   2920          * @stable ICU 2.4
   2921          */
   2922         public static final int WAW = 44;
   2923         /**
   2924          * @stable ICU 2.4
   2925          */
   2926         public static final int YEH = 45;
   2927         /**
   2928          * @stable ICU 2.4
   2929          */
   2930         public static final int YEH_BARREE = 46;
   2931         /**
   2932          * @stable ICU 2.4
   2933          */
   2934         public static final int YEH_WITH_TAIL = 47;
   2935         /**
   2936          * @stable ICU 2.4
   2937          */
   2938         public static final int YUDH = 48;
   2939         /**
   2940          * @stable ICU 2.4
   2941          */
   2942         public static final int YUDH_HE = 49;
   2943         /**
   2944          * @stable ICU 2.4
   2945          */
   2946         public static final int ZAIN = 50;
   2947         /**
   2948          * @stable ICU 2.6
   2949          */
   2950         public static final int FE = 51;
   2951         /**
   2952          * @stable ICU 2.6
   2953          */
   2954         public static final int KHAPH = 52;
   2955         /**
   2956          * @stable ICU 2.6
   2957          */
   2958         public static final int ZHAIN = 53;
   2959         /**
   2960          * @stable ICU 4.0
   2961          */
   2962         public static final int BURUSHASKI_YEH_BARREE = 54;
   2963         /** @stable ICU 4.4 */
   2964         public static final int FARSI_YEH = 55;
   2965         /** @stable ICU 4.4 */
   2966         public static final int NYA = 56;
   2967         /** @stable ICU 49 */
   2968         public static final int ROHINGYA_YEH = 57;
   2969 
   2970         /** @stable ICU 54 */
   2971         public static final int MANICHAEAN_ALEPH = 58;
   2972         /** @stable ICU 54 */
   2973         public static final int MANICHAEAN_AYIN = 59;
   2974         /** @stable ICU 54 */
   2975         public static final int MANICHAEAN_BETH = 60;
   2976         /** @stable ICU 54 */
   2977         public static final int MANICHAEAN_DALETH = 61;
   2978         /** @stable ICU 54 */
   2979         public static final int MANICHAEAN_DHAMEDH = 62;
   2980         /** @stable ICU 54 */
   2981         public static final int MANICHAEAN_FIVE = 63;
   2982         /** @stable ICU 54 */
   2983         public static final int MANICHAEAN_GIMEL = 64;
   2984         /** @stable ICU 54 */
   2985         public static final int MANICHAEAN_HETH = 65;
   2986         /** @stable ICU 54 */
   2987         public static final int MANICHAEAN_HUNDRED = 66;
   2988         /** @stable ICU 54 */
   2989         public static final int MANICHAEAN_KAPH = 67;
   2990         /** @stable ICU 54 */
   2991         public static final int MANICHAEAN_LAMEDH = 68;
   2992         /** @stable ICU 54 */
   2993         public static final int MANICHAEAN_MEM = 69;
   2994         /** @stable ICU 54 */
   2995         public static final int MANICHAEAN_NUN = 70;
   2996         /** @stable ICU 54 */
   2997         public static final int MANICHAEAN_ONE = 71;
   2998         /** @stable ICU 54 */
   2999         public static final int MANICHAEAN_PE = 72;
   3000         /** @stable ICU 54 */
   3001         public static final int MANICHAEAN_QOPH = 73;
   3002         /** @stable ICU 54 */
   3003         public static final int MANICHAEAN_RESH = 74;
   3004         /** @stable ICU 54 */
   3005         public static final int MANICHAEAN_SADHE = 75;
   3006         /** @stable ICU 54 */
   3007         public static final int MANICHAEAN_SAMEKH = 76;
   3008         /** @stable ICU 54 */
   3009         public static final int MANICHAEAN_TAW = 77;
   3010         /** @stable ICU 54 */
   3011         public static final int MANICHAEAN_TEN = 78;
   3012         /** @stable ICU 54 */
   3013         public static final int MANICHAEAN_TETH = 79;
   3014         /** @stable ICU 54 */
   3015         public static final int MANICHAEAN_THAMEDH = 80;
   3016         /** @stable ICU 54 */
   3017         public static final int MANICHAEAN_TWENTY = 81;
   3018         /** @stable ICU 54 */
   3019         public static final int MANICHAEAN_WAW = 82;
   3020         /** @stable ICU 54 */
   3021         public static final int MANICHAEAN_YODH = 83;
   3022         /** @stable ICU 54 */
   3023         public static final int MANICHAEAN_ZAYIN = 84;
   3024         /** @stable ICU 54 */
   3025         public static final int STRAIGHT_WAW = 85;
   3026 
   3027         /** @stable ICU 58 */
   3028         public static final int AFRICAN_FEH = 86;
   3029         /** @stable ICU 58 */
   3030         public static final int AFRICAN_NOON = 87;
   3031         /** @stable ICU 58 */
   3032         public static final int AFRICAN_QAF = 88;
   3033 
   3034         /**
   3035          * One more than the highest normal JoiningGroup value.
   3036          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup).
   3037          *
   3038          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3039          */
   3040         @Deprecated
   3041         public static final int COUNT = 89;
   3042     }
   3043 
   3044     /**
   3045      * Grapheme Cluster Break constants.
   3046      * @see UProperty#GRAPHEME_CLUSTER_BREAK
   3047      * @stable ICU 3.4
   3048      */
   3049     public static interface GraphemeClusterBreak {
   3050         /**
   3051          * @stable ICU 3.4
   3052          */
   3053         public static final int OTHER = 0;
   3054         /**
   3055          * @stable ICU 3.4
   3056          */
   3057         public static final int CONTROL = 1;
   3058         /**
   3059          * @stable ICU 3.4
   3060          */
   3061         public static final int CR = 2;
   3062         /**
   3063          * @stable ICU 3.4
   3064          */
   3065         public static final int EXTEND = 3;
   3066         /**
   3067          * @stable ICU 3.4
   3068          */
   3069         public static final int L = 4;
   3070         /**
   3071          * @stable ICU 3.4
   3072          */
   3073         public static final int LF = 5;
   3074         /**
   3075          * @stable ICU 3.4
   3076          */
   3077         public static final int LV = 6;
   3078         /**
   3079          * @stable ICU 3.4
   3080          */
   3081         public static final int LVT = 7;
   3082         /**
   3083          * @stable ICU 3.4
   3084          */
   3085         public static final int T = 8;
   3086         /**
   3087          * @stable ICU 3.4
   3088          */
   3089         public static final int V = 9;
   3090         /**
   3091          * @stable ICU 4.0
   3092          */
   3093         public static final int SPACING_MARK = 10;
   3094         /**
   3095          * @stable ICU 4.0
   3096          */
   3097         public static final int PREPEND = 11;
   3098         /** @stable ICU 50 */
   3099         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
   3100         /** @stable ICU 58 */
   3101         public static final int E_BASE = 13;          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
   3102         /** @stable ICU 58 */
   3103         public static final int E_BASE_GAZ = 14;      /*[EBG]*/
   3104         /** @stable ICU 58 */
   3105         public static final int E_MODIFIER = 15;      /*[EM]*/
   3106         /** @stable ICU 58 */
   3107         public static final int GLUE_AFTER_ZWJ = 16;  /*[GAZ]*/
   3108         /** @stable ICU 58 */
   3109         public static final int ZWJ = 17;             /*[ZWJ]*/
   3110         /**
   3111          * One more than the highest normal GraphemeClusterBreak value.
   3112          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK).
   3113          *
   3114          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3115          */
   3116         @Deprecated
   3117         public static final int COUNT = 18;
   3118     }
   3119 
   3120     /**
   3121      * Word Break constants.
   3122      * @see UProperty#WORD_BREAK
   3123      * @stable ICU 3.4
   3124      */
   3125     public static interface WordBreak {
   3126         /**
   3127          * @stable ICU 3.8
   3128          */
   3129         public static final int OTHER = 0;
   3130         /**
   3131          * @stable ICU 3.8
   3132          */
   3133         public static final int ALETTER = 1;
   3134         /**
   3135          * @stable ICU 3.8
   3136          */
   3137         public static final int FORMAT = 2;
   3138         /**
   3139          * @stable ICU 3.8
   3140          */
   3141         public static final int KATAKANA = 3;
   3142         /**
   3143          * @stable ICU 3.8
   3144          */
   3145         public static final int MIDLETTER = 4;
   3146         /**
   3147          * @stable ICU 3.8
   3148          */
   3149         public static final int MIDNUM = 5;
   3150         /**
   3151          * @stable ICU 3.8
   3152          */
   3153         public static final int NUMERIC = 6;
   3154         /**
   3155          * @stable ICU 3.8
   3156          */
   3157         public static final int EXTENDNUMLET = 7;
   3158         /**
   3159          * @stable ICU 4.0
   3160          */
   3161         public static final int CR = 8;
   3162         /**
   3163          * @stable ICU 4.0
   3164          */
   3165         public static final int EXTEND = 9;
   3166         /**
   3167          * @stable ICU 4.0
   3168          */
   3169         public static final int LF = 10;
   3170         /**
   3171          * @stable ICU 4.0
   3172          */
   3173         public static final int MIDNUMLET = 11;
   3174         /**
   3175          * @stable ICU 4.0
   3176          */
   3177         public static final int NEWLINE = 12;
   3178         /** @stable ICU 50 */
   3179         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
   3180         /** @stable ICU 52 */
   3181         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
   3182         /** @stable ICU 52 */
   3183         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
   3184         /** @stable ICU 52 */
   3185         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
   3186         /** @stable ICU 58 */
   3187         public static final int E_BASE = 17;           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
   3188         /** @stable ICU 58 */
   3189         public static final int E_BASE_GAZ = 18;       /*[EBG]*/
   3190         /** @stable ICU 58 */
   3191         public static final int E_MODIFIER = 19;       /*[EM]*/
   3192         /** @stable ICU 58 */
   3193         public static final int GLUE_AFTER_ZWJ = 20;   /*[GAZ]*/
   3194         /** @stable ICU 58 */
   3195         public static final int ZWJ = 21;              /*[ZWJ]*/
   3196         /**
   3197          * One more than the highest normal WordBreak value.
   3198          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK).
   3199          *
   3200          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3201          */
   3202         @Deprecated
   3203         public static final int COUNT = 22;
   3204     }
   3205 
   3206     /**
   3207      * Sentence Break constants.
   3208      * @see UProperty#SENTENCE_BREAK
   3209      * @stable ICU 3.4
   3210      */
   3211     public static interface SentenceBreak {
   3212         /**
   3213          * @stable ICU 3.8
   3214          */
   3215         public static final int OTHER = 0;
   3216         /**
   3217          * @stable ICU 3.8
   3218          */
   3219         public static final int ATERM = 1;
   3220         /**
   3221          * @stable ICU 3.8
   3222          */
   3223         public static final int CLOSE = 2;
   3224         /**
   3225          * @stable ICU 3.8
   3226          */
   3227         public static final int FORMAT = 3;
   3228         /**
   3229          * @stable ICU 3.8
   3230          */
   3231         public static final int LOWER = 4;
   3232         /**
   3233          * @stable ICU 3.8
   3234          */
   3235         public static final int NUMERIC = 5;
   3236         /**
   3237          * @stable ICU 3.8
   3238          */
   3239         public static final int OLETTER = 6;
   3240         /**
   3241          * @stable ICU 3.8
   3242          */
   3243         public static final int SEP = 7;
   3244         /**
   3245          * @stable ICU 3.8
   3246          */
   3247         public static final int SP = 8;
   3248         /**
   3249          * @stable ICU 3.8
   3250          */
   3251         public static final int STERM = 9;
   3252         /**
   3253          * @stable ICU 3.8
   3254          */
   3255         public static final int UPPER = 10;
   3256         /**
   3257          * @stable ICU 4.0
   3258          */
   3259         public static final int CR = 11;
   3260         /**
   3261          * @stable ICU 4.0
   3262          */
   3263         public static final int EXTEND = 12;
   3264         /**
   3265          * @stable ICU 4.0
   3266          */
   3267         public static final int LF = 13;
   3268         /**
   3269          * @stable ICU 4.0
   3270          */
   3271         public static final int SCONTINUE = 14;
   3272         /**
   3273          * One more than the highest normal SentenceBreak value.
   3274          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK).
   3275          *
   3276          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3277          */
   3278         @Deprecated
   3279         public static final int COUNT = 15;
   3280     }
   3281 
   3282     /**
   3283      * Line Break constants.
   3284      * @see UProperty#LINE_BREAK
   3285      * @stable ICU 2.4
   3286      */
   3287     public static interface LineBreak
   3288     {
   3289         /**
   3290          * @stable ICU 2.4
   3291          */
   3292         public static final int UNKNOWN = 0;
   3293         /**
   3294          * @stable ICU 2.4
   3295          */
   3296         public static final int AMBIGUOUS = 1;
   3297         /**
   3298          * @stable ICU 2.4
   3299          */
   3300         public static final int ALPHABETIC = 2;
   3301         /**
   3302          * @stable ICU 2.4
   3303          */
   3304         public static final int BREAK_BOTH = 3;
   3305         /**
   3306          * @stable ICU 2.4
   3307          */
   3308         public static final int BREAK_AFTER = 4;
   3309         /**
   3310          * @stable ICU 2.4
   3311          */
   3312         public static final int BREAK_BEFORE = 5;
   3313         /**
   3314          * @stable ICU 2.4
   3315          */
   3316         public static final int MANDATORY_BREAK = 6;
   3317         /**
   3318          * @stable ICU 2.4
   3319          */
   3320         public static final int CONTINGENT_BREAK = 7;
   3321         /**
   3322          * @stable ICU 2.4
   3323          */
   3324         public static final int CLOSE_PUNCTUATION = 8;
   3325         /**
   3326          * @stable ICU 2.4
   3327          */
   3328         public static final int COMBINING_MARK = 9;
   3329         /**
   3330          * @stable ICU 2.4
   3331          */
   3332         public static final int CARRIAGE_RETURN = 10;
   3333         /**
   3334          * @stable ICU 2.4
   3335          */
   3336         public static final int EXCLAMATION = 11;
   3337         /**
   3338          * @stable ICU 2.4
   3339          */
   3340         public static final int GLUE = 12;
   3341         /**
   3342          * @stable ICU 2.4
   3343          */
   3344         public static final int HYPHEN = 13;
   3345         /**
   3346          * @stable ICU 2.4
   3347          */
   3348         public static final int IDEOGRAPHIC = 14;
   3349         /**
   3350          * @see #INSEPARABLE
   3351          * @stable ICU 2.4
   3352          */
   3353         public static final int INSEPERABLE = 15;
   3354         /**
   3355          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
   3356          * @stable ICU 3.0
   3357          */
   3358         public static final int INSEPARABLE = 15;
   3359         /**
   3360          * @stable ICU 2.4
   3361          */
   3362         public static final int INFIX_NUMERIC = 16;
   3363         /**
   3364          * @stable ICU 2.4
   3365          */
   3366         public static final int LINE_FEED = 17;
   3367         /**
   3368          * @stable ICU 2.4
   3369          */
   3370         public static final int NONSTARTER = 18;
   3371         /**
   3372          * @stable ICU 2.4
   3373          */
   3374         public static final int NUMERIC = 19;
   3375         /**
   3376          * @stable ICU 2.4
   3377          */
   3378         public static final int OPEN_PUNCTUATION = 20;
   3379         /**
   3380          * @stable ICU 2.4
   3381          */
   3382         public static final int POSTFIX_NUMERIC = 21;
   3383         /**
   3384          * @stable ICU 2.4
   3385          */
   3386         public static final int PREFIX_NUMERIC = 22;
   3387         /**
   3388          * @stable ICU 2.4
   3389          */
   3390         public static final int QUOTATION = 23;
   3391         /**
   3392          * @stable ICU 2.4
   3393          */
   3394         public static final int COMPLEX_CONTEXT = 24;
   3395         /**
   3396          * @stable ICU 2.4
   3397          */
   3398         public static final int SURROGATE = 25;
   3399         /**
   3400          * @stable ICU 2.4
   3401          */
   3402         public static final int SPACE = 26;
   3403         /**
   3404          * @stable ICU 2.4
   3405          */
   3406         public static final int BREAK_SYMBOLS = 27;
   3407         /**
   3408          * @stable ICU 2.4
   3409          */
   3410         public static final int ZWSPACE = 28;
   3411         /**
   3412          * @stable ICU 2.6
   3413          */
   3414         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
   3415         /**
   3416          * @stable ICU 2.6
   3417          */
   3418         public static final int WORD_JOINER = 30;      /*[WJ]*/
   3419         /**
   3420          * @stable ICU 3.4
   3421          */
   3422         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
   3423         /**
   3424          * @stable ICU 3.4
   3425          */
   3426         public static final int H3 = 32;
   3427         /**
   3428          * @stable ICU 3.4
   3429          */
   3430         public static final int JL = 33;
   3431         /**
   3432          * @stable ICU 3.4
   3433          */
   3434         public static final int JT = 34;
   3435         /**
   3436          * @stable ICU 3.4
   3437          */
   3438         public static final int JV = 35;
   3439         /** @stable ICU 4.4 */
   3440         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
   3441         /** @stable ICU 49 */
   3442         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
   3443         /** @stable ICU 49 */
   3444         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
   3445         /** @stable ICU 50 */
   3446         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
   3447         /** @stable ICU 58 */
   3448         public static final int E_BASE = 40;  /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
   3449         /** @stable ICU 58 */
   3450         public static final int E_MODIFIER = 41;  /*[EM]*/
   3451         /** @stable ICU 58 */
   3452         public static final int ZWJ = 42;  /*[ZWJ]*/
   3453         /**
   3454          * One more than the highest normal LineBreak value.
   3455          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK).
   3456          *
   3457          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3458          */
   3459         @Deprecated
   3460         public static final int COUNT = 43;
   3461     }
   3462 
   3463     /**
   3464      * Numeric Type constants.
   3465      * @see UProperty#NUMERIC_TYPE
   3466      * @stable ICU 2.4
   3467      */
   3468     public static interface NumericType
   3469     {
   3470         /**
   3471          * @stable ICU 2.4
   3472          */
   3473         public static final int NONE = 0;
   3474         /**
   3475          * @stable ICU 2.4
   3476          */
   3477         public static final int DECIMAL = 1;
   3478         /**
   3479          * @stable ICU 2.4
   3480          */
   3481         public static final int DIGIT = 2;
   3482         /**
   3483          * @stable ICU 2.4
   3484          */
   3485         public static final int NUMERIC = 3;
   3486         /**
   3487          * One more than the highest normal NumericType value.
   3488          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE).
   3489          *
   3490          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3491          */
   3492         @Deprecated
   3493         public static final int COUNT = 4;
   3494     }
   3495 
   3496     /**
   3497      * Hangul Syllable Type constants.
   3498      *
   3499      * @see UProperty#HANGUL_SYLLABLE_TYPE
   3500      * @stable ICU 2.6
   3501      */
   3502     public static interface HangulSyllableType
   3503     {
   3504         /**
   3505          * @stable ICU 2.6
   3506          */
   3507         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
   3508         /**
   3509          * @stable ICU 2.6
   3510          */
   3511         public static final int LEADING_JAMO        = 1;   /*[L]*/
   3512         /**
   3513          * @stable ICU 2.6
   3514          */
   3515         public static final int VOWEL_JAMO          = 2;   /*[V]*/
   3516         /**
   3517          * @stable ICU 2.6
   3518          */
   3519         public static final int TRAILING_JAMO       = 3;   /*[T]*/
   3520         /**
   3521          * @stable ICU 2.6
   3522          */
   3523         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
   3524         /**
   3525          * @stable ICU 2.6
   3526          */
   3527         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
   3528         /**
   3529          * One more than the highest normal HangulSyllableType value.
   3530          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE).
   3531          *
   3532          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3533          */
   3534         @Deprecated
   3535         public static final int COUNT               = 6;
   3536     }
   3537 
   3538     /**
   3539      * Bidi Paired Bracket Type constants.
   3540      *
   3541      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
   3542      * @stable ICU 52
   3543      */
   3544     public static interface BidiPairedBracketType {
   3545         /**
   3546          * Not a paired bracket.
   3547          * @stable ICU 52
   3548          */
   3549         public static final int NONE = 0;
   3550         /**
   3551          * Open paired bracket.
   3552          * @stable ICU 52
   3553          */
   3554         public static final int OPEN = 1;
   3555         /**
   3556          * Close paired bracket.
   3557          * @stable ICU 52
   3558          */
   3559         public static final int CLOSE = 2;
   3560         /**
   3561          * One more than the highest normal BidiPairedBracketType value.
   3562          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE).
   3563          *
   3564          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3565          */
   3566         @Deprecated
   3567         public static final int COUNT = 3;
   3568     }
   3569 
   3570     // public data members -----------------------------------------------
   3571 
   3572     /**
   3573      * The lowest Unicode code point value, constant 0.
   3574      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
   3575      *
   3576      * @stable ICU 2.1
   3577      */
   3578     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
   3579 
   3580     /**
   3581      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
   3582      * Same as {@link Character#MAX_CODE_POINT}.
   3583      *
   3584      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
   3585      * which is still a char with the value U+FFFF.
   3586      *
   3587      * @stable ICU 2.1
   3588      */
   3589     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
   3590 
   3591     /**
   3592      * The minimum value for Supplementary code points, constant U+10000.
   3593      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
   3594      *
   3595      * @stable ICU 2.1
   3596      */
   3597     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
   3598 
   3599     /**
   3600      * Unicode value used when translating into Unicode encoding form and there
   3601      * is no existing character.
   3602      * @stable ICU 2.1
   3603      */
   3604     public static final int REPLACEMENT_CHAR = '\uFFFD';
   3605 
   3606     /**
   3607      * Special value that is returned by getUnicodeNumericValue(int) when no
   3608      * numeric value is defined for a code point.
   3609      * @stable ICU 2.4
   3610      * @see #getUnicodeNumericValue
   3611      */
   3612     public static final double NO_NUMERIC_VALUE = -123456789;
   3613 
   3614     /**
   3615      * Compatibility constant for Java Character's MIN_RADIX.
   3616      * @stable ICU 3.4
   3617      */
   3618     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
   3619 
   3620     /**
   3621      * Compatibility constant for Java Character's MAX_RADIX.
   3622      * @stable ICU 3.4
   3623      */
   3624     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
   3625 
   3626     /**
   3627      * Do not lowercase non-initial parts of words when titlecasing.
   3628      * Option bit for titlecasing APIs that take an options bit set.
   3629      *
   3630      * By default, titlecasing will titlecase the first cased character
   3631      * of a word and lowercase all other characters.
   3632      * With this option, the other characters will not be modified.
   3633      *
   3634      * @see #toTitleCase
   3635      * @stable ICU 3.8
   3636      */
   3637     public static final int TITLECASE_NO_LOWERCASE = 0x100;
   3638 
   3639     /**
   3640      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
   3641      * titlecase exactly the characters at breaks from the iterator.
   3642      * Option bit for titlecasing APIs that take an options bit set.
   3643      *
   3644      * By default, titlecasing will take each break iterator index,
   3645      * adjust it by looking for the next cased character, and titlecase that one.
   3646      * Other characters are lowercased.
   3647      *
   3648      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
   3649      *
   3650      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
   3651      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
   3652      * cased character F. If F exists, map F to default_title(F); then map each
   3653      * subsequent character C to default_lower(C).
   3654      *
   3655      * @see #toTitleCase
   3656      * @see #TITLECASE_NO_LOWERCASE
   3657      * @stable ICU 3.8
   3658      */
   3659     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
   3660 
   3661     // public methods ----------------------------------------------------
   3662 
   3663     /**
   3664      * Returnss the numeric value of a decimal digit code point.
   3665      * <br>This method observes the semantics of
   3666      * <code>java.lang.Character.digit()</code>.  Note that this
   3667      * will return positive values for code points for which isDigit
   3668      * returns false, just like java.lang.Character.
   3669      * <br><em>Semantic Change:</em> In release 1.3.1 and
   3670      * prior, this did not treat the European letters as having a
   3671      * digit value, and also treated numeric letters and other numbers as
   3672      * digits.
   3673      * This has been changed to conform to the java semantics.
   3674      * <br>A code point is a valid digit if and only if:
   3675      * <ul>
   3676      *   <li>ch is a decimal digit or one of the european letters, and
   3677      *   <li>the value of ch is less than the specified radix.
   3678      * </ul>
   3679      * @param ch the code point to query
   3680      * @param radix the radix
   3681      * @return the numeric value represented by the code point in the
   3682      * specified radix, or -1 if the code point is not a decimal digit
   3683      * or if its value is too large for the radix
   3684      * @stable ICU 2.1
   3685      */
   3686     public static int digit(int ch, int radix)
   3687     {
   3688         if (2 <= radix && radix <= 36) {
   3689             int value = digit(ch);
   3690             if (value < 0) {
   3691                 // ch is not a decimal digit, try latin letters
   3692                 value = UCharacterProperty.getEuropeanDigit(ch);
   3693             }
   3694             return (value < radix) ? value : -1;
   3695         } else {
   3696             return -1;  // invalid radix
   3697         }
   3698     }
   3699 
   3700     /**
   3701      * Returnss the numeric value of a decimal digit code point.
   3702      * <br>This is a convenience overload of <code>digit(int, int)</code>
   3703      * that provides a decimal radix.
   3704      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
   3705      * treated numeric letters and other numbers as digits.  This has
   3706      * been changed to conform to the java semantics.
   3707      * @param ch the code point to query
   3708      * @return the numeric value represented by the code point,
   3709      * or -1 if the code point is not a decimal digit or if its
   3710      * value is too large for a decimal radix
   3711      * @stable ICU 2.1
   3712      */
   3713     public static int digit(int ch)
   3714     {
   3715         return UCharacterProperty.INSTANCE.digit(ch);
   3716     }
   3717 
   3718     /**
   3719      * Returns the numeric value of the code point as a nonnegative
   3720      * integer.
   3721      * <br>If the code point does not have a numeric value, then -1 is returned.
   3722      * <br>
   3723      * If the code point has a numeric value that cannot be represented as a
   3724      * nonnegative integer (for example, a fractional value), then -2 is
   3725      * returned.
   3726      * @param ch the code point to query
   3727      * @return the numeric value of the code point, or -1 if it has no numeric
   3728      * value, or -2 if it has a numeric value that cannot be represented as a
   3729      * nonnegative integer
   3730      * @stable ICU 2.1
   3731      */
   3732     public static int getNumericValue(int ch)
   3733     {
   3734         return UCharacterProperty.INSTANCE.getNumericValue(ch);
   3735     }
   3736 
   3737     /**
   3738      * {@icu} Returns the numeric value for a Unicode code point as defined in the
   3739      * Unicode Character Database.
   3740      * <p>A "double" return type is necessary because some numeric values are
   3741      * fractions, negative, or too large for int.
   3742      * <p>For characters without any numeric values in the Unicode Character
   3743      * Database, this function will return NO_NUMERIC_VALUE.
   3744      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
   3745      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
   3746      * return type int and returns -1 when the argument ch does not have a
   3747      * corresponding numeric value. This has been changed to synch with ICU4C
   3748      *
   3749      * This corresponds to the ICU4C function u_getNumericValue.
   3750      * @param ch Code point to get the numeric value for.
   3751      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
   3752      * @stable ICU 2.4
   3753      */
   3754     public static double getUnicodeNumericValue(int ch)
   3755     {
   3756         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
   3757     }
   3758 
   3759     /**
   3760      * Compatibility override of Java deprecated method.  This
   3761      * method will always remain deprecated.
   3762      * Same as java.lang.Character.isSpace().
   3763      * @param ch the code point
   3764      * @return true if the code point is a space character as
   3765      * defined by java.lang.Character.isSpace.
   3766      * @deprecated ICU 3.4 (Java)
   3767      */
   3768     @Deprecated
   3769     public static boolean isSpace(int ch) {
   3770         return ch <= 0x20 &&
   3771                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
   3772     }
   3773 
   3774     /**
   3775      * Returns a value indicating a code point's Unicode category.
   3776      * Up-to-date Unicode implementation of java.lang.Character.getType()
   3777      * except for the above mentioned code points that had their category
   3778      * changed.<br>
   3779      * Return results are constants from the interface
   3780      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
   3781      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
   3782      * those returned by java.lang.Character.getType.  UCharacterCategory values
   3783      * match the ones used in ICU4C, while java.lang.Character type
   3784      * values, though similar, skip the value 17.
   3785      * @param ch code point whose type is to be determined
   3786      * @return category which is a value of UCharacterCategory
   3787      * @stable ICU 2.1
   3788      */
   3789     public static int getType(int ch)
   3790     {
   3791         return UCharacterProperty.INSTANCE.getType(ch);
   3792     }
   3793 
   3794     /**
   3795      * Determines if a code point has a defined meaning in the up-to-date
   3796      * Unicode standard.
   3797      * E.g. supplementary code points though allocated space are not defined in
   3798      * Unicode yet.<br>
   3799      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
   3800      * @param ch code point to be determined if it is defined in the most
   3801      *        current version of Unicode
   3802      * @return true if this code point is defined in unicode
   3803      * @stable ICU 2.1
   3804      */
   3805     public static boolean isDefined(int ch)
   3806     {
   3807         return getType(ch) != 0;
   3808     }
   3809 
   3810     /**
   3811      * Determines if a code point is a Java digit.
   3812      * <br>This method observes the semantics of
   3813      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
   3814      * digits only.
   3815      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
   3816      * numeric letters and other numbers as digits.
   3817      * This has been changed to conform to the java semantics.
   3818      * @param ch code point to query
   3819      * @return true if this code point is a digit
   3820      * @stable ICU 2.1
   3821      */
   3822     public static boolean isDigit(int ch)
   3823     {
   3824         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
   3825     }
   3826 
   3827     /**
   3828      * Determines if the specified code point is an ISO control character.
   3829      * A code point is considered to be an ISO control character if it is in
   3830      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
   3831      * &#92;u009F.<br>
   3832      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
   3833      * @param ch code point to determine if it is an ISO control character
   3834      * @return true if code point is a ISO control character
   3835      * @stable ICU 2.1
   3836      */
   3837     public static boolean isISOControl(int ch)
   3838     {
   3839         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
   3840                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
   3841     }
   3842 
   3843     /**
   3844      * Determines if the specified code point is a letter.
   3845      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
   3846      * @param ch code point to determine if it is a letter
   3847      * @return true if code point is a letter
   3848      * @stable ICU 2.1
   3849      */
   3850     public static boolean isLetter(int ch)
   3851     {
   3852         // if props == 0, it will just fall through and return false
   3853         return ((1 << getType(ch))
   3854                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
   3855                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
   3856                         | (1 << UCharacterCategory.TITLECASE_LETTER)
   3857                         | (1 << UCharacterCategory.MODIFIER_LETTER)
   3858                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
   3859     }
   3860 
   3861     /**
   3862      * Determines if the specified code point is a letter or digit.
   3863      * {@icunote} This method, unlike java.lang.Character does not regard the ascii
   3864      * characters 'A' - 'Z' and 'a' - 'z' as digits.
   3865      * @param ch code point to determine if it is a letter or a digit
   3866      * @return true if code point is a letter or a digit
   3867      * @stable ICU 2.1
   3868      */
   3869     public static boolean isLetterOrDigit(int ch)
   3870     {
   3871         return ((1 << getType(ch))
   3872                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
   3873                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
   3874                         | (1 << UCharacterCategory.TITLECASE_LETTER)
   3875                         | (1 << UCharacterCategory.MODIFIER_LETTER)
   3876                         | (1 << UCharacterCategory.OTHER_LETTER)
   3877                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
   3878     }
   3879 
   3880     /**
   3881      * Compatibility override of Java deprecated method.  This
   3882      * method will always remain deprecated.  Delegates to
   3883      * java.lang.Character.isJavaIdentifierStart.
   3884      * @param cp the code point
   3885      * @return true if the code point can start a java identifier.
   3886      * @deprecated ICU 3.4 (Java)
   3887      */
   3888     @Deprecated
   3889     public static boolean isJavaLetter(int cp) {
   3890         return isJavaIdentifierStart(cp);
   3891     }
   3892 
   3893     /**
   3894      * Compatibility override of Java deprecated method.  This
   3895      * method will always remain deprecated.  Delegates to
   3896      * java.lang.Character.isJavaIdentifierPart.
   3897      * @param cp the code point
   3898      * @return true if the code point can continue a java identifier.
   3899      * @deprecated ICU 3.4 (Java)
   3900      */
   3901     @Deprecated
   3902     public static boolean isJavaLetterOrDigit(int cp) {
   3903         return isJavaIdentifierPart(cp);
   3904     }
   3905 
   3906     /**
   3907      * Compatibility override of Java method, delegates to
   3908      * java.lang.Character.isJavaIdentifierStart.
   3909      * @param cp the code point
   3910      * @return true if the code point can start a java identifier.
   3911      * @stable ICU 3.4
   3912      */
   3913     public static boolean isJavaIdentifierStart(int cp) {
   3914         // note, downcast to char for jdk 1.4 compatibility
   3915         return java.lang.Character.isJavaIdentifierStart((char)cp);
   3916     }
   3917 
   3918     /**
   3919      * Compatibility override of Java method, delegates to
   3920      * java.lang.Character.isJavaIdentifierPart.
   3921      * @param cp the code point
   3922      * @return true if the code point can continue a java identifier.
   3923      * @stable ICU 3.4
   3924      */
   3925     public static boolean isJavaIdentifierPart(int cp) {
   3926         // note, downcast to char for jdk 1.4 compatibility
   3927         return java.lang.Character.isJavaIdentifierPart((char)cp);
   3928     }
   3929 
   3930     /**
   3931      * Determines if the specified code point is a lowercase character.
   3932      * UnicodeData only contains case mappings for code points where they are
   3933      * one-to-one mappings; it also omits information about context-sensitive
   3934      * case mappings.<br> For more information about Unicode case mapping
   3935      * please refer to the
   3936      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
   3937      * #21</a>.<br>
   3938      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
   3939      * @param ch code point to determine if it is in lowercase
   3940      * @return true if code point is a lowercase character
   3941      * @stable ICU 2.1
   3942      */
   3943     public static boolean isLowerCase(int ch)
   3944     {
   3945         // if props == 0, it will just fall through and return false
   3946         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
   3947     }
   3948 
   3949     /**
   3950      * Determines if the specified code point is a white space character.
   3951      * A code point is considered to be an whitespace character if and only
   3952      * if it satisfies one of the following criteria:
   3953      * <ul>
   3954      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
   3955      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
   3956      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
   3957      * <li> It is &#92;u000A, LINE FEED.
   3958      * <li> It is &#92;u000B, VERTICAL TABULATION.
   3959      * <li> It is &#92;u000C, FORM FEED.
   3960      * <li> It is &#92;u000D, CARRIAGE RETURN.
   3961      * <li> It is &#92;u001C, FILE SEPARATOR.
   3962      * <li> It is &#92;u001D, GROUP SEPARATOR.
   3963      * <li> It is &#92;u001E, RECORD SEPARATOR.
   3964      * <li> It is &#92;u001F, UNIT SEPARATOR.
   3965      * </ul>
   3966      *
   3967      * This API tries to sync with the semantics of Java's
   3968      * java.lang.Character.isWhitespace(), but it may not return
   3969      * the exact same results because of the Unicode version
   3970      * difference.
   3971      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
   3972      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
   3973      * See http://www.unicode.org/versions/Unicode4.0.1/
   3974      * @param ch code point to determine if it is a white space
   3975      * @return true if the specified code point is a white space character
   3976      * @stable ICU 2.1
   3977      */
   3978     public static boolean isWhitespace(int ch)
   3979     {
   3980         // exclude no-break spaces
   3981         // if props == 0, it will just fall through and return false
   3982         return ((1 << getType(ch)) &
   3983                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
   3984                         | (1 << UCharacterCategory.LINE_SEPARATOR)
   3985                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
   3986                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
   3987                         // TAB VT LF FF CR FS GS RS US NL are all control characters
   3988                         // that are white spaces.
   3989                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
   3990     }
   3991 
   3992     /**
   3993      * Determines if the specified code point is a Unicode specified space
   3994      * character, i.e. if code point is in the category Zs, Zl and Zp.
   3995      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
   3996      * @param ch code point to determine if it is a space
   3997      * @return true if the specified code point is a space character
   3998      * @stable ICU 2.1
   3999      */
   4000     public static boolean isSpaceChar(int ch)
   4001     {
   4002         // if props == 0, it will just fall through and return false
   4003         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
   4004                 | (1 << UCharacterCategory.LINE_SEPARATOR)
   4005                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
   4006                 != 0;
   4007     }
   4008 
   4009     /**
   4010      * Determines if the specified code point is a titlecase character.
   4011      * UnicodeData only contains case mappings for code points where they are
   4012      * one-to-one mappings; it also omits information about context-sensitive
   4013      * case mappings.<br>
   4014      * For more information about Unicode case mapping please refer to the
   4015      * <a href=http://www.unicode.org/unicode/reports/tr21/>
   4016      * Technical report #21</a>.<br>
   4017      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
   4018      * @param ch code point to determine if it is in title case
   4019      * @return true if the specified code point is a titlecase character
   4020      * @stable ICU 2.1
   4021      */
   4022     public static boolean isTitleCase(int ch)
   4023     {
   4024         // if props == 0, it will just fall through and return false
   4025         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
   4026     }
   4027 
   4028     /**
   4029      * Determines if the specified code point may be any part of a Unicode
   4030      * identifier other than the starting character.
   4031      * A code point may be part of a Unicode identifier if and only if it is
   4032      * one of the following:
   4033      * <ul>
   4034      * <li> Lu Uppercase letter
   4035      * <li> Ll Lowercase letter
   4036      * <li> Lt Titlecase letter
   4037      * <li> Lm Modifier letter
   4038      * <li> Lo Other letter
   4039      * <li> Nl Letter number
   4040      * <li> Pc Connecting punctuation character
   4041      * <li> Nd decimal number
   4042      * <li> Mc Spacing combining mark
   4043      * <li> Mn Non-spacing mark
   4044      * <li> Cf formatting code
   4045      * </ul>
   4046      * Up-to-date Unicode implementation of
   4047      * java.lang.Character.isUnicodeIdentifierPart().<br>
   4048      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
   4049      * @param ch code point to determine if is can be part of a Unicode
   4050      *        identifier
   4051      * @return true if code point is any character belonging a unicode
   4052      *         identifier suffix after the first character
   4053      * @stable ICU 2.1
   4054      */
   4055     public static boolean isUnicodeIdentifierPart(int ch)
   4056     {
   4057         // if props == 0, it will just fall through and return false
   4058         // cat == format
   4059         return ((1 << getType(ch))
   4060                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
   4061                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
   4062                         | (1 << UCharacterCategory.TITLECASE_LETTER)
   4063                         | (1 << UCharacterCategory.MODIFIER_LETTER)
   4064                         | (1 << UCharacterCategory.OTHER_LETTER)
   4065                         | (1 << UCharacterCategory.LETTER_NUMBER)
   4066                         | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
   4067                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
   4068                         | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
   4069                         | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
   4070                         || isIdentifierIgnorable(ch);
   4071     }
   4072 
   4073     /**
   4074      * Determines if the specified code point is permissible as the first
   4075      * character in a Unicode identifier.
   4076      * A code point may start a Unicode identifier if it is of type either
   4077      * <ul>
   4078      * <li> Lu Uppercase letter
   4079      * <li> Ll Lowercase letter
   4080      * <li> Lt Titlecase letter
   4081      * <li> Lm Modifier letter
   4082      * <li> Lo Other letter
   4083      * <li> Nl Letter number
   4084      * </ul>
   4085      * Up-to-date Unicode implementation of
   4086      * java.lang.Character.isUnicodeIdentifierStart().<br>
   4087      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
   4088      * @param ch code point to determine if it can start a Unicode identifier
   4089      * @return true if code point is the first character belonging a unicode
   4090      *              identifier
   4091      * @stable ICU 2.1
   4092      */
   4093     public static boolean isUnicodeIdentifierStart(int ch)
   4094     {
   4095         /*int cat = getType(ch);*/
   4096         // if props == 0, it will just fall through and return false
   4097         return ((1 << getType(ch))
   4098                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
   4099                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
   4100                         | (1 << UCharacterCategory.TITLECASE_LETTER)
   4101                         | (1 << UCharacterCategory.MODIFIER_LETTER)
   4102                         | (1 << UCharacterCategory.OTHER_LETTER)
   4103                         | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
   4104     }
   4105 
   4106     /**
   4107      * Determines if the specified code point should be regarded as an
   4108      * ignorable character in a Java identifier.
   4109      * A character is Java-identifier-ignorable if it has the general category
   4110      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
   4111      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
   4112      * Up-to-date Unicode implementation of
   4113      * java.lang.Character.isIdentifierIgnorable().<br>
   4114      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
   4115      * <p>Note that Unicode just recommends to ignore Cf (format controls).
   4116      * @param ch code point to be determined if it can be ignored in a Unicode
   4117      *        identifier.
   4118      * @return true if the code point is ignorable
   4119      * @stable ICU 2.1
   4120      */
   4121     public static boolean isIdentifierIgnorable(int ch)
   4122     {
   4123         // see java.lang.Character.isIdentifierIgnorable() on range of
   4124         // ignorable characters.
   4125         if (ch <= 0x9f) {
   4126             return isISOControl(ch)
   4127                     && !((ch >= 0x9 && ch <= 0xd)
   4128                             || (ch >= 0x1c && ch <= 0x1f));
   4129         }
   4130         return getType(ch) == UCharacterCategory.FORMAT;
   4131     }
   4132 
   4133     /**
   4134      * Determines if the specified code point is an uppercase character.
   4135      * UnicodeData only contains case mappings for code point where they are
   4136      * one-to-one mappings; it also omits information about context-sensitive
   4137      * case mappings.<br>
   4138      * For language specific case conversion behavior, use
   4139      * toUpperCase(locale, str). <br>
   4140      * For example, the case conversion for dot-less i and dotted I in Turkish,
   4141      * or for final sigma in Greek.
   4142      * For more information about Unicode case mapping please refer to the
   4143      * <a href=http://www.unicode.org/unicode/reports/tr21/>
   4144      * Technical report #21</a>.<br>
   4145      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
   4146      * @param ch code point to determine if it is in uppercase
   4147      * @return true if the code point is an uppercase character
   4148      * @stable ICU 2.1
   4149      */
   4150     public static boolean isUpperCase(int ch)
   4151     {
   4152         // if props == 0, it will just fall through and return false
   4153         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
   4154     }
   4155 
   4156     /**
   4157      * The given code point is mapped to its lowercase equivalent; if the code
   4158      * point has no lowercase equivalent, the code point itself is returned.
   4159      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
   4160      *
   4161      * <p>This function only returns the simple, single-code point case mapping.
   4162      * Full case mappings should be used whenever possible because they produce
   4163      * better results by working on whole strings.
   4164      * They take into account the string context and the language and can map
   4165      * to a result string with a different length as appropriate.
   4166      * Full case mappings are applied by the case mapping functions
   4167      * that take String parameters rather than code points (int).
   4168      * See also the User Guide chapter on C/POSIX migration:
   4169      * http://www.icu-project.org/userguide/posix.html#case_mappings
   4170      *
   4171      * @param ch code point whose lowercase equivalent is to be retrieved
   4172      * @return the lowercase equivalent code point
   4173      * @stable ICU 2.1
   4174      */
   4175     public static int toLowerCase(int ch) {
   4176         return UCaseProps.INSTANCE.tolower(ch);
   4177     }
   4178 
   4179     /**
   4180      * Converts argument code point and returns a String object representing
   4181      * the code point's value in UTF-16 format.
   4182      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
   4183      *
   4184      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
   4185      *
   4186      * @param ch code point
   4187      * @return string representation of the code point, null if code point is not
   4188      *         defined in unicode
   4189      * @stable ICU 2.1
   4190      */
   4191     public static String toString(int ch)
   4192     {
   4193         if (ch < MIN_VALUE || ch > MAX_VALUE) {
   4194             return null;
   4195         }
   4196 
   4197         if (ch < SUPPLEMENTARY_MIN_VALUE) {
   4198             return String.valueOf((char)ch);
   4199         }
   4200 
   4201         return new String(Character.toChars(ch));
   4202     }
   4203 
   4204     /**
   4205      * Converts the code point argument to titlecase.
   4206      * If no titlecase is available, the uppercase is returned. If no uppercase
   4207      * is available, the code point itself is returned.
   4208      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
   4209      *
   4210      * <p>This function only returns the simple, single-code point case mapping.
   4211      * Full case mappings should be used whenever possible because they produce
   4212      * better results by working on whole strings.
   4213      * They take into account the string context and the language and can map
   4214      * to a result string with a different length as appropriate.
   4215      * Full case mappings are applied by the case mapping functions
   4216      * that take String parameters rather than code points (int).
   4217      * See also the User Guide chapter on C/POSIX migration:
   4218      * http://www.icu-project.org/userguide/posix.html#case_mappings
   4219      *
   4220      * @param ch code point  whose title case is to be retrieved
   4221      * @return titlecase code point
   4222      * @stable ICU 2.1
   4223      */
   4224     public static int toTitleCase(int ch) {
   4225         return UCaseProps.INSTANCE.totitle(ch);
   4226     }
   4227 
   4228     /**
   4229      * Converts the character argument to uppercase.
   4230      * If no uppercase is available, the character itself is returned.
   4231      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
   4232      *
   4233      * <p>This function only returns the simple, single-code point case mapping.
   4234      * Full case mappings should be used whenever possible because they produce
   4235      * better results by working on whole strings.
   4236      * They take into account the string context and the language and can map
   4237      * to a result string with a different length as appropriate.
   4238      * Full case mappings are applied by the case mapping functions
   4239      * that take String parameters rather than code points (int).
   4240      * See also the User Guide chapter on C/POSIX migration:
   4241      * http://www.icu-project.org/userguide/posix.html#case_mappings
   4242      *
   4243      * @param ch code point whose uppercase is to be retrieved
   4244      * @return uppercase code point
   4245      * @stable ICU 2.1
   4246      */
   4247     public static int toUpperCase(int ch) {
   4248         return UCaseProps.INSTANCE.toupper(ch);
   4249     }
   4250 
   4251     // extra methods not in java.lang.Character --------------------------
   4252 
   4253     /**
   4254      * {@icu} Determines if the code point is a supplementary character.
   4255      * A code point is a supplementary character if and only if it is greater
   4256      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
   4257      * @param ch code point to be determined if it is in the supplementary
   4258      *        plane
   4259      * @return true if code point is a supplementary character
   4260      * @stable ICU 2.1
   4261      */
   4262     public static boolean isSupplementary(int ch)
   4263     {
   4264         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
   4265                 ch <= UCharacter.MAX_VALUE;
   4266     }
   4267 
   4268     /**
   4269      * {@icu} Determines if the code point is in the BMP plane.
   4270      * @param ch code point to be determined if it is not a supplementary
   4271      *        character
   4272      * @return true if code point is not a supplementary character
   4273      * @stable ICU 2.1
   4274      */
   4275     public static boolean isBMP(int ch)
   4276     {
   4277         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
   4278     }
   4279 
   4280     /**
   4281      * {@icu} Determines whether the specified code point is a printable character
   4282      * according to the Unicode standard.
   4283      * @param ch code point to be determined if it is printable
   4284      * @return true if the code point is a printable character
   4285      * @stable ICU 2.1
   4286      */
   4287     public static boolean isPrintable(int ch)
   4288     {
   4289         int cat = getType(ch);
   4290         // if props == 0, it will just fall through and return false
   4291         return (cat != UCharacterCategory.UNASSIGNED &&
   4292                 cat != UCharacterCategory.CONTROL &&
   4293                 cat != UCharacterCategory.FORMAT &&
   4294                 cat != UCharacterCategory.PRIVATE_USE &&
   4295                 cat != UCharacterCategory.SURROGATE &&
   4296                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
   4297     }
   4298 
   4299     /**
   4300      * {@icu} Determines whether the specified code point is of base form.
   4301      * A code point of base form does not graphically combine with preceding
   4302      * characters, and is neither a control nor a format character.
   4303      * @param ch code point to be determined if it is of base form
   4304      * @return true if the code point is of base form
   4305      * @stable ICU 2.1
   4306      */
   4307     public static boolean isBaseForm(int ch)
   4308     {
   4309         int cat = getType(ch);
   4310         // if props == 0, it will just fall through and return false
   4311         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
   4312                 cat == UCharacterCategory.OTHER_NUMBER ||
   4313                 cat == UCharacterCategory.LETTER_NUMBER ||
   4314                 cat == UCharacterCategory.UPPERCASE_LETTER ||
   4315                 cat == UCharacterCategory.LOWERCASE_LETTER ||
   4316                 cat == UCharacterCategory.TITLECASE_LETTER ||
   4317                 cat == UCharacterCategory.MODIFIER_LETTER ||
   4318                 cat == UCharacterCategory.OTHER_LETTER ||
   4319                 cat == UCharacterCategory.NON_SPACING_MARK ||
   4320                 cat == UCharacterCategory.ENCLOSING_MARK ||
   4321                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
   4322     }
   4323 
   4324     /**
   4325      * {@icu} Returns the Bidirection property of a code point.
   4326      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
   4327      * property.<br>
   4328      * Result returned belongs to the interface
   4329      * <a href=UCharacterDirection.html>UCharacterDirection</a>
   4330      * @param ch the code point to be determined its direction
   4331      * @return direction constant from UCharacterDirection.
   4332      * @stable ICU 2.1
   4333      */
   4334     public static int getDirection(int ch)
   4335     {
   4336         return UBiDiProps.INSTANCE.getClass(ch);
   4337     }
   4338 
   4339     /**
   4340      * Determines whether the code point has the "mirrored" property.
   4341      * This property is set for characters that are commonly used in
   4342      * Right-To-Left contexts and need to be displayed with a "mirrored"
   4343      * glyph.
   4344      * @param ch code point whose mirror is to be determined
   4345      * @return true if the code point has the "mirrored" property
   4346      * @stable ICU 2.1
   4347      */
   4348     public static boolean isMirrored(int ch)
   4349     {
   4350         return UBiDiProps.INSTANCE.isMirrored(ch);
   4351     }
   4352 
   4353     /**
   4354      * {@icu} Maps the specified code point to a "mirror-image" code point.
   4355      * For code points with the "mirrored" property, implementations sometimes
   4356      * need a "poor man's" mapping to another code point such that the default
   4357      * glyph may serve as the mirror-image of the default glyph of the
   4358      * specified code point.<br>
   4359      * This is useful for text conversion to and from codepages with visual
   4360      * order, and for displays without glyph selection capabilities.
   4361      * @param ch code point whose mirror is to be retrieved
   4362      * @return another code point that may serve as a mirror-image substitute,
   4363      *         or ch itself if there is no such mapping or ch does not have the
   4364      *         "mirrored" property
   4365      * @stable ICU 2.1
   4366      */
   4367     public static int getMirror(int ch)
   4368     {
   4369         return UBiDiProps.INSTANCE.getMirror(ch);
   4370     }
   4371 
   4372     /**
   4373      * {@icu} Maps the specified character to its paired bracket character.
   4374      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
   4375      * Otherwise c itself is returned.
   4376      * See http://www.unicode.org/reports/tr9/
   4377      *
   4378      * @param c the code point to be mapped
   4379      * @return the paired bracket code point,
   4380      *         or c itself if there is no such mapping
   4381      *         (Bidi_Paired_Bracket_Type=None)
   4382      *
   4383      * @see UProperty#BIDI_PAIRED_BRACKET
   4384      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
   4385      * @see #getMirror(int)
   4386      * @stable ICU 52
   4387      */
   4388     public static int getBidiPairedBracket(int c) {
   4389         return UBiDiProps.INSTANCE.getPairedBracket(c);
   4390     }
   4391 
   4392     /**
   4393      * {@icu} Returns the combining class of the argument codepoint
   4394      * @param ch code point whose combining is to be retrieved
   4395      * @return the combining class of the codepoint
   4396      * @stable ICU 2.1
   4397      */
   4398     public static int getCombiningClass(int ch)
   4399     {
   4400         return Normalizer2.getNFDInstance().getCombiningClass(ch);
   4401     }
   4402 
   4403     /**
   4404      * {@icu} A code point is illegal if and only if
   4405      * <ul>
   4406      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
   4407      * <li> A surrogate value, 0xD800 to 0xDFFF
   4408      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
   4409      * </ul>
   4410      * Note: legal does not mean that it is assigned in this version of Unicode.
   4411      * @param ch code point to determine if it is a legal code point by itself
   4412      * @return true if and only if legal.
   4413      * @stable ICU 2.1
   4414      */
   4415     public static boolean isLegal(int ch)
   4416     {
   4417         if (ch < MIN_VALUE) {
   4418             return false;
   4419         }
   4420         if (ch < Character.MIN_SURROGATE) {
   4421             return true;
   4422         }
   4423         if (ch <= Character.MAX_SURROGATE) {
   4424             return false;
   4425         }
   4426         if (UCharacterUtility.isNonCharacter(ch)) {
   4427             return false;
   4428         }
   4429         return (ch <= MAX_VALUE);
   4430     }
   4431 
   4432     /**
   4433      * {@icu} A string is legal iff all its code points are legal.
   4434      * A code point is illegal if and only if
   4435      * <ul>
   4436      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
   4437      * <li> A surrogate value, 0xD800 to 0xDFFF
   4438      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
   4439      * </ul>
   4440      * Note: legal does not mean that it is assigned in this version of Unicode.
   4441      * @param str containing code points to examin
   4442      * @return true if and only if legal.
   4443      * @stable ICU 2.1
   4444      */
   4445     public static boolean isLegal(String str)
   4446     {
   4447         int size = str.length();
   4448         int codepoint;
   4449         for (int i = 0; i < size; i += Character.charCount(codepoint))
   4450         {
   4451             codepoint = str.codePointAt(i);
   4452             if (!isLegal(codepoint)) {
   4453                 return false;
   4454             }
   4455         }
   4456         return true;
   4457     }
   4458 
   4459     /**
   4460      * {@icu} Returns the version of Unicode data used.
   4461      * @return the unicode version number used
   4462      * @stable ICU 2.1
   4463      */
   4464     public static VersionInfo getUnicodeVersion()
   4465     {
   4466         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
   4467     }
   4468 
   4469     /**
   4470      * {@icu} Returns the most current Unicode name of the argument code point, or
   4471      * null if the character is unassigned or outside the range
   4472      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
   4473      * <br>
   4474      * Note calling any methods related to code point names, e.g. get*Name*()
   4475      * incurs a one-time initialisation cost to construct the name tables.
   4476      * @param ch the code point for which to get the name
   4477      * @return most current Unicode name
   4478      * @stable ICU 2.1
   4479      */
   4480     public static String getName(int ch)
   4481     {
   4482         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
   4483     }
   4484 
   4485     /**
   4486      * {@icu} Returns the names for each of the characters in a string
   4487      * @param s string to format
   4488      * @param separator string to go between names
   4489      * @return string of names
   4490      * @stable ICU 3.8
   4491      */
   4492     public static String getName(String s, String separator) {
   4493         if (s.length() == 1) { // handle common case
   4494             return getName(s.charAt(0));
   4495         }
   4496         int cp;
   4497         StringBuilder sb = new StringBuilder();
   4498         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
   4499             cp = s.codePointAt(i);
   4500             if (i != 0) sb.append(separator);
   4501             sb.append(UCharacter.getName(cp));
   4502         }
   4503         return sb.toString();
   4504     }
   4505 
   4506     /**
   4507      * {@icu} Returns null.
   4508      * Used to return the Unicode_1_Name property value which was of little practical value.
   4509      * @param ch the code point for which to get the name
   4510      * @return null
   4511      * @deprecated ICU 49
   4512      */
   4513     @Deprecated
   4514     public static String getName1_0(int ch)
   4515     {
   4516         return null;
   4517     }
   4518 
   4519     /**
   4520      * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and
   4521      * getName1_0(int), this method will return a name even for codepoints that
   4522      * are not assigned a name in UnicodeData.txt.
   4523      *
   4524      * <p>The names are returned in the following order.
   4525      * <ul>
   4526      * <li> Most current Unicode name if there is any
   4527      * <li> Unicode 1.0 name if there is any
   4528      * <li> Extended name in the form of
   4529      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
   4530      * </ul>
   4531      * Note calling any methods related to code point names, e.g. get*Name*()
   4532      * incurs a one-time initialisation cost to construct the name tables.
   4533      * @param ch the code point for which to get the name
   4534      * @return a name for the argument codepoint
   4535      * @stable ICU 2.6
   4536      */
   4537     public static String getExtendedName(int ch) {
   4538         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
   4539     }
   4540 
   4541     /**
   4542      * {@icu} Returns the corrected name from NameAliases.txt if there is one.
   4543      * Returns null if the character is unassigned or outside the range
   4544      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
   4545      * <br>
   4546      * Note calling any methods related to code point names, e.g. get*Name*()
   4547      * incurs a one-time initialisation cost to construct the name tables.
   4548      * @param ch the code point for which to get the name alias
   4549      * @return Unicode name alias, or null
   4550      * @stable ICU 4.4
   4551      */
   4552     public static String getNameAlias(int ch)
   4553     {
   4554         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
   4555     }
   4556 
   4557     /**
   4558      * {@icu} Returns null.
   4559      * Used to return the ISO 10646 comment for a character.
   4560      * The Unicode ISO_Comment property is deprecated and has no values.
   4561      *
   4562      * @param ch The code point for which to get the ISO comment.
   4563      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
   4564      * @return null
   4565      * @deprecated ICU 49
   4566      */
   4567     @Deprecated
   4568     public static String getISOComment(int ch)
   4569     {
   4570         return null;
   4571     }
   4572 
   4573     /**
   4574      * {@icu} <p>Finds a Unicode code point by its most current Unicode name and
   4575      * return its code point value. All Unicode names are in uppercase.
   4576      * Note calling any methods related to code point names, e.g. get*Name*()
   4577      * incurs a one-time initialisation cost to construct the name tables.
   4578      * @param name most current Unicode character name whose code point is to
   4579      *        be returned
   4580      * @return code point or -1 if name is not found
   4581      * @stable ICU 2.1
   4582      */
   4583     public static int getCharFromName(String name){
   4584         return UCharacterName.INSTANCE.getCharFromName(
   4585                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
   4586     }
   4587 
   4588     /**
   4589      * {@icu} Returns -1.
   4590      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
   4591      * its code point value.
   4592      * @param name Unicode 1.0 code point name whose code point is to be
   4593      *             returned
   4594      * @return -1
   4595      * @deprecated ICU 49
   4596      * @see #getName1_0(int)
   4597      */
   4598     @Deprecated
   4599     public static int getCharFromName1_0(String name){
   4600         return -1;
   4601     }
   4602 
   4603     /**
   4604      * {@icu} <p>Find a Unicode character by either its name and return its code
   4605      * point value. All Unicode names are in uppercase.
   4606      * Extended names are all lowercase except for numbers and are contained
   4607      * within angle brackets.
   4608      * The names are searched in the following order
   4609      * <ul>
   4610      * <li> Most current Unicode name if there is any
   4611      * <li> Unicode 1.0 name if there is any
   4612      * <li> Extended name in the form of
   4613      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
   4614      * </ul>
   4615      * Note calling any methods related to code point names, e.g. get*Name*()
   4616      * incurs a one-time initialisation cost to construct the name tables.
   4617      * @param name codepoint name
   4618      * @return code point associated with the name or -1 if the name is not
   4619      *         found.
   4620      * @stable ICU 2.6
   4621      */
   4622     public static int getCharFromExtendedName(String name){
   4623         return UCharacterName.INSTANCE.getCharFromName(
   4624                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
   4625     }
   4626 
   4627     /**
   4628      * {@icu} <p>Find a Unicode character by its corrected name alias and return
   4629      * its code point value. All Unicode names are in uppercase.
   4630      * Note calling any methods related to code point names, e.g. get*Name*()
   4631      * incurs a one-time initialisation cost to construct the name tables.
   4632      * @param name Unicode name alias whose code point is to be returned
   4633      * @return code point or -1 if name is not found
   4634      * @stable ICU 4.4
   4635      */
   4636     public static int getCharFromNameAlias(String name){
   4637         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
   4638     }
   4639 
   4640     /**
   4641      * {@icu} Return the Unicode name for a given property, as given in the
   4642      * Unicode database file PropertyAliases.txt.  Most properties
   4643      * have more than one name.  The nameChoice determines which one
   4644      * is returned.
   4645      *
   4646      * In addition, this function maps the property
   4647      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
   4648      * "General_Category_Mask".  These names are not in
   4649      * PropertyAliases.txt.
   4650      *
   4651      * @param property UProperty selector.
   4652      *
   4653      * @param nameChoice UProperty.NameChoice selector for which name
   4654      * to get.  All properties have a long name.  Most have a short
   4655      * name, but some do not.  Unicode allows for additional names; if
   4656      * present these will be returned by UProperty.NameChoice.LONG + i,
   4657      * where i=1, 2,...
   4658      *
   4659      * @return a name, or null if Unicode explicitly defines no name
   4660      * ("n/a") for a given property/nameChoice.  If a given nameChoice
   4661      * throws an exception, then all larger values of nameChoice will
   4662      * throw an exception.  If null is returned for a given
   4663      * nameChoice, then other nameChoice values may return non-null
   4664      * results.
   4665      *
   4666      * @exception IllegalArgumentException thrown if property or
   4667      * nameChoice are invalid.
   4668      *
   4669      * @see UProperty
   4670      * @see UProperty.NameChoice
   4671      * @stable ICU 2.4
   4672      */
   4673     public static String getPropertyName(int property,
   4674             int nameChoice) {
   4675         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
   4676     }
   4677 
   4678     /**
   4679      * {@icu} Return the UProperty selector for a given property name, as
   4680      * specified in the Unicode database file PropertyAliases.txt.
   4681      * Short, long, and any other variants are recognized.
   4682      *
   4683      * In addition, this function maps the synthetic names "gcm" /
   4684      * "General_Category_Mask" to the property
   4685      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
   4686      * PropertyAliases.txt.
   4687      *
   4688      * @param propertyAlias the property name to be matched.  The name
   4689      * is compared using "loose matching" as described in
   4690      * PropertyAliases.txt.
   4691      *
   4692      * @return a UProperty enum.
   4693      *
   4694      * @exception IllegalArgumentException thrown if propertyAlias
   4695      * is not recognized.
   4696      *
   4697      * @see UProperty
   4698      * @stable ICU 2.4
   4699      */
   4700     public static int getPropertyEnum(CharSequence propertyAlias) {
   4701         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
   4702         if (propEnum == UProperty.UNDEFINED) {
   4703             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
   4704         }
   4705         return propEnum;
   4706     }
   4707 
   4708     /**
   4709      * {@icu} Return the Unicode name for a given property value, as given in
   4710      * the Unicode database file PropertyValueAliases.txt.  Most
   4711      * values have more than one name.  The nameChoice determines
   4712      * which one is returned.
   4713      *
   4714      * Note: Some of the names in PropertyValueAliases.txt can only be
   4715      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
   4716      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
   4717      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
   4718      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
   4719      *
   4720      * @param property UProperty selector constant.
   4721      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
   4722      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
   4723      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
   4724      * If out of range, null is returned.
   4725      *
   4726      * @param value selector for a value for the given property.  In
   4727      * general, valid values range from 0 up to some maximum.  There
   4728      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
   4729      * non-zero value BASIC_LATIN.getID().  (2.)
   4730      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
   4731      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
   4732      * are mask values produced by left-shifting 1 by
   4733      * UCharacter.getType().  This allows grouped categories such as
   4734      * [:L:] to be represented.  Mask values are non-contiguous.
   4735      *
   4736      * @param nameChoice UProperty.NameChoice selector for which name
   4737      * to get.  All values have a long name.  Most have a short name,
   4738      * but some do not.  Unicode allows for additional names; if
   4739      * present these will be returned by UProperty.NameChoice.LONG + i,
   4740      * where i=1, 2,...
   4741      *
   4742      * @return a name, or null if Unicode explicitly defines no name
   4743      * ("n/a") for a given property/value/nameChoice.  If a given
   4744      * nameChoice throws an exception, then all larger values of
   4745      * nameChoice will throw an exception.  If null is returned for a
   4746      * given nameChoice, then other nameChoice values may return
   4747      * non-null results.
   4748      *
   4749      * @exception IllegalArgumentException thrown if property, value,
   4750      * or nameChoice are invalid.
   4751      *
   4752      * @see UProperty
   4753      * @see UProperty.NameChoice
   4754      * @stable ICU 2.4
   4755      */
   4756     public static String getPropertyValueName(int property,
   4757             int value,
   4758             int nameChoice)
   4759     {
   4760         if ((property == UProperty.CANONICAL_COMBINING_CLASS
   4761                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
   4762                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
   4763                 && value >= UCharacter.getIntPropertyMinValue(
   4764                         UProperty.CANONICAL_COMBINING_CLASS)
   4765                         && value <= UCharacter.getIntPropertyMaxValue(
   4766                                 UProperty.CANONICAL_COMBINING_CLASS)
   4767                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
   4768             // this is hard coded for the valid cc
   4769             // because PropertyValueAliases.txt does not contain all of them
   4770             try {
   4771                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
   4772                         nameChoice);
   4773             }
   4774             catch (IllegalArgumentException e) {
   4775                 return null;
   4776             }
   4777         }
   4778         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
   4779     }
   4780 
   4781     /**
   4782      * {@icu} Return the property value integer for a given value name, as
   4783      * specified in the Unicode database file PropertyValueAliases.txt.
   4784      * Short, long, and any other variants are recognized.
   4785      *
   4786      * Note: Some of the names in PropertyValueAliases.txt will only be
   4787      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
   4788      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
   4789      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
   4790      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
   4791      *
   4792      * @param property UProperty selector constant.
   4793      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
   4794      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
   4795      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
   4796      * Only these properties can be enumerated.
   4797      *
   4798      * @param valueAlias the value name to be matched.  The name is
   4799      * compared using "loose matching" as described in
   4800      * PropertyValueAliases.txt.
   4801      *
   4802      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
   4803      * values are mask values produced by left-shifting 1 by
   4804      * UCharacter.getType().  This allows grouped categories such as
   4805      * [:L:] to be represented.
   4806      *
   4807      * @see UProperty
   4808      * @throws IllegalArgumentException if property is not a valid UProperty
   4809      *         selector or valueAlias is not a value of this property
   4810      * @stable ICU 2.4
   4811      */
   4812     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
   4813         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
   4814         if (propEnum == UProperty.UNDEFINED) {
   4815             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
   4816         }
   4817         return propEnum;
   4818     }
   4819 
   4820     /**
   4821      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
   4822      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
   4823      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
   4824      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
   4825      * @internal
   4826      * @deprecated This API is ICU internal only.
   4827      */
   4828     @Deprecated
   4829     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
   4830         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
   4831     }
   4832 
   4833 
   4834     /**
   4835      * {@icu} Returns a code point corresponding to the two surrogate code units.
   4836      *
   4837      * @param lead the lead char
   4838      * @param trail the trail char
   4839      * @return code point if surrogate characters are valid.
   4840      * @exception IllegalArgumentException thrown when the code units do
   4841      *            not form a valid code point
   4842      * @stable ICU 2.1
   4843      */
   4844     public static int getCodePoint(char lead, char trail)
   4845     {
   4846         if (Character.isSurrogatePair(lead, trail)) {
   4847             return Character.toCodePoint(lead, trail);
   4848         }
   4849         throw new IllegalArgumentException("Illegal surrogate characters");
   4850     }
   4851 
   4852     /**
   4853      * {@icu} Returns the code point corresponding to the BMP code point.
   4854      *
   4855      * @param char16 the BMP code point
   4856      * @return code point if argument is a valid character.
   4857      * @exception IllegalArgumentException thrown when char16 is not a valid
   4858      *            code point
   4859      * @stable ICU 2.1
   4860      */
   4861     public static int getCodePoint(char char16)
   4862     {
   4863         if (UCharacter.isLegal(char16)) {
   4864             return char16;
   4865         }
   4866         throw new IllegalArgumentException("Illegal codepoint");
   4867     }
   4868 
   4869     /**
   4870      * Returns the uppercase version of the argument string.
   4871      * Casing is dependent on the default locale and context-sensitive.
   4872      * @param str source string to be performed on
   4873      * @return uppercase version of the argument string
   4874      * @stable ICU 2.1
   4875      */
   4876     public static String toUpperCase(String str)
   4877     {
   4878         return toUpperCase(getDefaultCaseLocale(), str);
   4879     }
   4880 
   4881     /**
   4882      * Returns the lowercase version of the argument string.
   4883      * Casing is dependent on the default locale and context-sensitive
   4884      * @param str source string to be performed on
   4885      * @return lowercase version of the argument string
   4886      * @stable ICU 2.1
   4887      */
   4888     public static String toLowerCase(String str)
   4889     {
   4890         return toLowerCase(getDefaultCaseLocale(), str);
   4891     }
   4892 
   4893     /**
   4894      * <p>Returns the titlecase version of the argument string.
   4895      * <p>Position for titlecasing is determined by the argument break
   4896      * iterator, hence the user can customize his break iterator for
   4897      * a specialized titlecasing. In this case only the forward iteration
   4898      * needs to be implemented.
   4899      * If the break iterator passed in is null, the default Unicode algorithm
   4900      * will be used to determine the titlecase positions.
   4901      *
   4902      * <p>Only positions returned by the break iterator will be title cased,
   4903      * character in between the positions will all be in lower case.
   4904      * <p>Casing is dependent on the default locale and context-sensitive
   4905      * @param str source string to be performed on
   4906      * @param breakiter break iterator to determine the positions in which
   4907      *        the character should be title cased.
   4908      * @return lowercase version of the argument string
   4909      * @stable ICU 2.6
   4910      */
   4911     public static String toTitleCase(String str, BreakIterator breakiter)
   4912     {
   4913         return toTitleCase(Locale.getDefault(), str, breakiter, 0);
   4914     }
   4915 
   4916     private static int getDefaultCaseLocale() {
   4917         return UCaseProps.getCaseLocale(Locale.getDefault());
   4918     }
   4919 
   4920     private static int getCaseLocale(Locale locale) {
   4921         if (locale == null) {
   4922             locale = Locale.getDefault();
   4923         }
   4924         return UCaseProps.getCaseLocale(locale);
   4925     }
   4926 
   4927     private static int getCaseLocale(ULocale locale) {
   4928         if (locale == null) {
   4929             locale = ULocale.getDefault();
   4930         }
   4931         return UCaseProps.getCaseLocale(locale);
   4932     }
   4933 
   4934     private static String toLowerCase(int caseLocale, String str) {
   4935         if (str.length() <= 100) {
   4936             if (str.isEmpty()) {
   4937                 return str;
   4938             }
   4939             // Collect and apply only changes.
   4940             // Good if no or few changes. Bad (slow) if many changes.
   4941             Edits edits = new Edits();
   4942             StringBuilder replacementChars = CaseMapImpl.toLower(
   4943                     caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
   4944             return applyEdits(str, replacementChars, edits);
   4945         } else {
   4946             return CaseMapImpl.toLower(caseLocale, 0, str,
   4947                     new StringBuilder(str.length()), null).toString();
   4948         }
   4949     }
   4950 
   4951     private static String toUpperCase(int caseLocale, String str) {
   4952         if (str.length() <= 100) {
   4953             if (str.isEmpty()) {
   4954                 return str;
   4955             }
   4956             // Collect and apply only changes.
   4957             // Good if no or few changes. Bad (slow) if many changes.
   4958             Edits edits = new Edits();
   4959             StringBuilder replacementChars = CaseMapImpl.toUpper(
   4960                     caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
   4961             return applyEdits(str, replacementChars, edits);
   4962         } else {
   4963             return CaseMapImpl.toUpper(caseLocale, 0, str,
   4964                     new StringBuilder(str.length()), null).toString();
   4965         }
   4966     }
   4967 
   4968     private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) {
   4969         if (str.length() <= 100) {
   4970             if (str.isEmpty()) {
   4971                 return str;
   4972             }
   4973             // Collect and apply only changes.
   4974             // Good if no or few changes. Bad (slow) if many changes.
   4975             Edits edits = new Edits();
   4976             StringBuilder replacementChars = CaseMapImpl.toTitle(
   4977                     caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str,
   4978                     new StringBuilder(), edits);
   4979             return applyEdits(str, replacementChars, edits);
   4980         } else {
   4981             return CaseMapImpl.toTitle(caseLocale, options, titleIter, str,
   4982                     new StringBuilder(str.length()), null).toString();
   4983         }
   4984     }
   4985 
   4986     private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
   4987         if (!edits.hasChanges()) {
   4988             return str;
   4989         }
   4990         StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
   4991         for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
   4992             if (ei.hasChange()) {
   4993                 int i = ei.replacementIndex();
   4994                 result.append(replacementChars, i, i + ei.newLength());
   4995             } else {
   4996                 int i = ei.sourceIndex();
   4997                 result.append(str, i, i + ei.oldLength());
   4998             }
   4999         }
   5000         return result.toString();
   5001     }
   5002 
   5003     /**
   5004      * Returns the uppercase version of the argument string.
   5005      * Casing is dependent on the argument locale and context-sensitive.
   5006      * @param locale which string is to be converted in
   5007      * @param str source string to be performed on
   5008      * @return uppercase version of the argument string
   5009      * @stable ICU 2.1
   5010      */
   5011     public static String toUpperCase(Locale locale, String str)
   5012     {
   5013         return toUpperCase(getCaseLocale(locale), str);
   5014     }
   5015 
   5016     /**
   5017      * Returns the uppercase version of the argument string.
   5018      * Casing is dependent on the argument locale and context-sensitive.
   5019      * @param locale which string is to be converted in
   5020      * @param str source string to be performed on
   5021      * @return uppercase version of the argument string
   5022      * @stable ICU 3.2
   5023      */
   5024     public static String toUpperCase(ULocale locale, String str) {
   5025         return toUpperCase(getCaseLocale(locale), str);
   5026     }
   5027 
   5028     /**
   5029      * Returns the lowercase version of the argument string.
   5030      * Casing is dependent on the argument locale and context-sensitive
   5031      * @param locale which string is to be converted in
   5032      * @param str source string to be performed on
   5033      * @return lowercase version of the argument string
   5034      * @stable ICU 2.1
   5035      */
   5036     public static String toLowerCase(Locale locale, String str)
   5037     {
   5038         return toLowerCase(getCaseLocale(locale), str);
   5039     }
   5040 
   5041     /**
   5042      * Returns the lowercase version of the argument string.
   5043      * Casing is dependent on the argument locale and context-sensitive
   5044      * @param locale which string is to be converted in
   5045      * @param str source string to be performed on
   5046      * @return lowercase version of the argument string
   5047      * @stable ICU 3.2
   5048      */
   5049     public static String toLowerCase(ULocale locale, String str) {
   5050         return toLowerCase(getCaseLocale(locale), str);
   5051     }
   5052 
   5053     /**
   5054      * <p>Returns the titlecase version of the argument string.
   5055      * <p>Position for titlecasing is determined by the argument break
   5056      * iterator, hence the user can customize his break iterator for
   5057      * a specialized titlecasing. In this case only the forward iteration
   5058      * needs to be implemented.
   5059      * If the break iterator passed in is null, the default Unicode algorithm
   5060      * will be used to determine the titlecase positions.
   5061      *
   5062      * <p>Only positions returned by the break iterator will be title cased,
   5063      * character in between the positions will all be in lower case.
   5064      * <p>Casing is dependent on the argument locale and context-sensitive
   5065      * @param locale which string is to be converted in
   5066      * @param str source string to be performed on
   5067      * @param breakiter break iterator to determine the positions in which
   5068      *        the character should be title cased.
   5069      * @return lowercase version of the argument string
   5070      * @stable ICU 2.6
   5071      */
   5072     public static String toTitleCase(Locale locale, String str,
   5073             BreakIterator breakiter)
   5074     {
   5075         return toTitleCase(locale, str, breakiter, 0);
   5076     }
   5077 
   5078     /**
   5079      * <p>Returns the titlecase version of the argument string.
   5080      * <p>Position for titlecasing is determined by the argument break
   5081      * iterator, hence the user can customize his break iterator for
   5082      * a specialized titlecasing. In this case only the forward iteration
   5083      * needs to be implemented.
   5084      * If the break iterator passed in is null, the default Unicode algorithm
   5085      * will be used to determine the titlecase positions.
   5086      *
   5087      * <p>Only positions returned by the break iterator will be title cased,
   5088      * character in between the positions will all be in lower case.
   5089      * <p>Casing is dependent on the argument locale and context-sensitive
   5090      * @param locale which string is to be converted in
   5091      * @param str source string to be performed on
   5092      * @param titleIter break iterator to determine the positions in which
   5093      *        the character should be title cased.
   5094      * @return lowercase version of the argument string
   5095      * @stable ICU 3.2
   5096      */
   5097     public static String toTitleCase(ULocale locale, String str,
   5098             BreakIterator titleIter) {
   5099         return toTitleCase(locale, str, titleIter, 0);
   5100     }
   5101 
   5102     /**
   5103      * <p>Returns the titlecase version of the argument string.
   5104      * <p>Position for titlecasing is determined by the argument break
   5105      * iterator, hence the user can customize his break iterator for
   5106      * a specialized titlecasing. In this case only the forward iteration
   5107      * needs to be implemented.
   5108      * If the break iterator passed in is null, the default Unicode algorithm
   5109      * will be used to determine the titlecase positions.
   5110      *
   5111      * <p>Only positions returned by the break iterator will be title cased,
   5112      * character in between the positions will all be in lower case.
   5113      * <p>Casing is dependent on the argument locale and context-sensitive
   5114      * @param locale which string is to be converted in
   5115      * @param str source string to be performed on
   5116      * @param titleIter break iterator to determine the positions in which
   5117      *        the character should be title cased.
   5118      * @param options bit set to modify the titlecasing operation
   5119      * @return lowercase version of the argument string
   5120      * @stable ICU 3.8
   5121      * @see #TITLECASE_NO_LOWERCASE
   5122      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
   5123      */
   5124     public static String toTitleCase(ULocale locale, String str,
   5125             BreakIterator titleIter, int options) {
   5126         if(titleIter == null) {
   5127             if (locale == null) {
   5128                 locale = ULocale.getDefault();
   5129             }
   5130             titleIter = BreakIterator.getWordInstance(locale);
   5131         }
   5132         titleIter.setText(str);
   5133         return toTitleCase(getCaseLocale(locale), options, titleIter, str);
   5134     }
   5135 
   5136 
   5137     private static final int BREAK_MASK =
   5138             (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER)
   5139             | (1<<UCharacterCategory.OTHER_LETTER)
   5140             | (1<<UCharacterCategory.MODIFIER_LETTER);
   5141 
   5142     /**
   5143      * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
   5144      * and sometimes has no effect at all; the original string is returned whenever casing
   5145      * would not be appropriate for the first word (such as for CJK characters or initial numbers).
   5146      * Initial non-letters are skipped in order to find the character to change.
   5147      * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
   5148      * <p>Examples:
   5149      * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
   5150      * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
   5151      * <tr><td>contact us</td><td>Contact us</td></tr>
   5152      * <tr><td>49ers win!</td><td>49ers win!</td></tr>
   5153      * <tr><td>(abc)</td><td>(abc)</td></tr>
   5154      * <tr><td>ijs</td><td>Ijs</td></tr>
   5155      * <tr><td>ijs</td><td>IJs</td><td>nl-BE</td></tr>
   5156      * <tr><td>ijs</td><td>js</td><td>tr-DE</td></tr>
   5157      * </table>
   5158      * @param locale the locale for accessing exceptional behavior (eg for tr).
   5159      * @param str the source string to change
   5160      * @return the modified string, or the original if no modifications were necessary.
   5161      * @internal
   5162      * @deprecated ICU internal only
   5163      */
   5164     @Deprecated
   5165     public static String toTitleFirst(ULocale locale, String str) {
   5166         int c = 0;
   5167         for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) {
   5168             c = UCharacter.codePointAt(str, i);
   5169             int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK);
   5170             if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK
   5171                 break;
   5172             }
   5173             if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) {
   5174                 continue;
   5175             }
   5176 
   5177             // we now have the first cased character
   5178             // What we really want is something like:
   5179             // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken);
   5180             // That is, just give us the titlecased string, for the locale, at i and following,
   5181             // and tell us how many characters are replaced.
   5182             // The following won't work completely: it needs some more substantial changes to UCaseProps
   5183 
   5184             String substring = str.substring(i, i+UCharacter.charCount(c));
   5185             String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0);
   5186 
   5187             // skip if no change
   5188             if (titled.codePointAt(0) == c) {
   5189                 // Using 0 is safe, since any change in titling will not have first initial character
   5190                 break;
   5191             }
   5192             StringBuilder result = new StringBuilder(str.length()).append(str, 0, i);
   5193             int startOfSuffix;
   5194 
   5195             // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps.
   5196 
   5197             if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') {
   5198                 result.append("IJ");
   5199                 startOfSuffix = 2;
   5200             } else {
   5201                 result.append(titled);
   5202                 startOfSuffix = i + UCharacter.charCount(c);
   5203             }
   5204 
   5205             // add the remainder, and return
   5206             return result.append(str, startOfSuffix, str.length()).toString();
   5207         }
   5208         return str; // no change
   5209     }
   5210 
   5211     /**
   5212      * {@icu} <p>Returns the titlecase version of the argument string.
   5213      * <p>Position for titlecasing is determined by the argument break
   5214      * iterator, hence the user can customize his break iterator for
   5215      * a specialized titlecasing. In this case only the forward iteration
   5216      * needs to be implemented.
   5217      * If the break iterator passed in is null, the default Unicode algorithm
   5218      * will be used to determine the titlecase positions.
   5219      *
   5220      * <p>Only positions returned by the break iterator will be title cased,
   5221      * character in between the positions will all be in lower case.
   5222      * <p>Casing is dependent on the argument locale and context-sensitive
   5223      * @param locale which string is to be converted in
   5224      * @param str source string to be performed on
   5225      * @param titleIter break iterator to determine the positions in which
   5226      *        the character should be title cased.
   5227      * @param options bit set to modify the titlecasing operation
   5228      * @return lowercase version of the argument string
   5229      * @see #TITLECASE_NO_LOWERCASE
   5230      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
   5231      * @stable ICU 54
   5232      */
   5233     public static String toTitleCase(Locale locale, String str,
   5234             BreakIterator titleIter,
   5235             int options) {
   5236         if(titleIter == null) {
   5237             titleIter = BreakIterator.getWordInstance(locale);
   5238         }
   5239         titleIter.setText(str);
   5240         return toTitleCase(getCaseLocale(locale), options, titleIter, str);
   5241     }
   5242 
   5243     /**
   5244      * {@icu} The given character is mapped to its case folding equivalent according
   5245      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
   5246      * folding equivalent, the character itself is returned.
   5247      *
   5248      * <p>This function only returns the simple, single-code point case mapping.
   5249      * Full case mappings should be used whenever possible because they produce
   5250      * better results by working on whole strings.
   5251      * They can map to a result string with a different length as appropriate.
   5252      * Full case mappings are applied by the case mapping functions
   5253      * that take String parameters rather than code points (int).
   5254      * See also the User Guide chapter on C/POSIX migration:
   5255      * http://www.icu-project.org/userguide/posix.html#case_mappings
   5256      *
   5257      * @param ch             the character to be converted
   5258      * @param defaultmapping Indicates whether the default mappings defined in
   5259      *                       CaseFolding.txt are to be used, otherwise the
   5260      *                       mappings for dotted I and dotless i marked with
   5261      *                       'T' in CaseFolding.txt are included.
   5262      * @return               the case folding equivalent of the character, if
   5263      *                       any; otherwise the character itself.
   5264      * @see                  #foldCase(String, boolean)
   5265      * @stable ICU 2.1
   5266      */
   5267     public static int foldCase(int ch, boolean defaultmapping) {
   5268         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
   5269     }
   5270 
   5271     /**
   5272      * {@icu} The given string is mapped to its case folding equivalent according to
   5273      * UnicodeData.txt and CaseFolding.txt; if any character has no case
   5274      * folding equivalent, the character itself is returned.
   5275      * "Full", multiple-code point case folding mappings are returned here.
   5276      * For "simple" single-code point mappings use the API
   5277      * foldCase(int ch, boolean defaultmapping).
   5278      * @param str            the String to be converted
   5279      * @param defaultmapping Indicates whether the default mappings defined in
   5280      *                       CaseFolding.txt are to be used, otherwise the
   5281      *                       mappings for dotted I and dotless i marked with
   5282      *                       'T' in CaseFolding.txt are included.
   5283      * @return               the case folding equivalent of the character, if
   5284      *                       any; otherwise the character itself.
   5285      * @see                  #foldCase(int, boolean)
   5286      * @stable ICU 2.1
   5287      */
   5288     public static String foldCase(String str, boolean defaultmapping) {
   5289         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
   5290     }
   5291 
   5292     /**
   5293      * {@icu} Option value for case folding: use default mappings defined in
   5294      * CaseFolding.txt.
   5295      * @stable ICU 2.6
   5296      */
   5297     public static final int FOLD_CASE_DEFAULT    =      0x0000;
   5298     /**
   5299      * {@icu} Option value for case folding:
   5300      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
   5301      * and dotless i appropriately for Turkic languages (tr, az).
   5302      *
   5303      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
   5304      * are to be included for default mappings and
   5305      * excluded for the Turkic-specific mappings.
   5306      *
   5307      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
   5308      * are to be excluded for default mappings and
   5309      * included for the Turkic-specific mappings.
   5310      *
   5311      * @stable ICU 2.6
   5312      */
   5313     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
   5314 
   5315     /**
   5316      * {@icu} The given character is mapped to its case folding equivalent according
   5317      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
   5318      * folding equivalent, the character itself is returned.
   5319      *
   5320      * <p>This function only returns the simple, single-code point case mapping.
   5321      * Full case mappings should be used whenever possible because they produce
   5322      * better results by working on whole strings.
   5323      * They can map to a result string with a different length as appropriate.
   5324      * Full case mappings are applied by the case mapping functions
   5325      * that take String parameters rather than code points (int).
   5326      * See also the User Guide chapter on C/POSIX migration:
   5327      * http://www.icu-project.org/userguide/posix.html#case_mappings
   5328      *
   5329      * @param ch the character to be converted
   5330      * @param options A bit set for special processing. Currently the recognised options
   5331      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
   5332      * @return the case folding equivalent of the character, if any; otherwise the
   5333      * character itself.
   5334      * @see #foldCase(String, boolean)
   5335      * @stable ICU 2.6
   5336      */
   5337     public static int foldCase(int ch, int options) {
   5338         return UCaseProps.INSTANCE.fold(ch, options);
   5339     }
   5340 
   5341     /**
   5342      * {@icu} The given string is mapped to its case folding equivalent according to
   5343      * UnicodeData.txt and CaseFolding.txt; if any character has no case
   5344      * folding equivalent, the character itself is returned.
   5345      * "Full", multiple-code point case folding mappings are returned here.
   5346      * For "simple" single-code point mappings use the API
   5347      * foldCase(int ch, boolean defaultmapping).
   5348      * @param str the String to be converted
   5349      * @param options A bit set for special processing. Currently the recognised options
   5350      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
   5351      * @return the case folding equivalent of the character, if any; otherwise the
   5352      *         character itself.
   5353      * @see #foldCase(int, boolean)
   5354      * @stable ICU 2.6
   5355      */
   5356     public static final String foldCase(String str, int options) {
   5357         if (str.length() <= 100) {
   5358             if (str.isEmpty()) {
   5359                 return str;
   5360             }
   5361             // Collect and apply only changes.
   5362             // Good if no or few changes. Bad (slow) if many changes.
   5363             Edits edits = new Edits();
   5364             StringBuilder replacementChars = CaseMapImpl.fold(
   5365                     options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
   5366             return applyEdits(str, replacementChars, edits);
   5367         } else {
   5368             return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString();
   5369         }
   5370     }
   5371 
   5372     /**
   5373      * {@icu} Returns the numeric value of a Han character.
   5374      *
   5375      * <p>This returns the value of Han 'numeric' code points,
   5376      * including those for zero, ten, hundred, thousand, ten thousand,
   5377      * and hundred million.
   5378      * This includes both the standard and 'checkwriting'
   5379      * characters, the 'big circle' zero character, and the standard
   5380      * zero character.
   5381      *
   5382      * <p>Note: The Unicode Standard has numeric values for more
   5383      * Han characters recognized by this method
   5384      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
   5385      * and a {@link com.ibm.icu.text.NumberFormat} can be used with
   5386      * a Chinese {@link com.ibm.icu.text.NumberingSystem}.
   5387      *
   5388      * @param ch code point to query
   5389      * @return value if it is a Han 'numeric character,' otherwise return -1.
   5390      * @stable ICU 2.4
   5391      */
   5392     public static int getHanNumericValue(int ch)
   5393     {
   5394         switch(ch)
   5395         {
   5396         case IDEOGRAPHIC_NUMBER_ZERO_ :
   5397         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
   5398             return 0; // Han Zero
   5399         case CJK_IDEOGRAPH_FIRST_ :
   5400         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
   5401             return 1; // Han One
   5402         case CJK_IDEOGRAPH_SECOND_ :
   5403         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
   5404             return 2; // Han Two
   5405         case CJK_IDEOGRAPH_THIRD_ :
   5406         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
   5407             return 3; // Han Three
   5408         case CJK_IDEOGRAPH_FOURTH_ :
   5409         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
   5410             return 4; // Han Four
   5411         case CJK_IDEOGRAPH_FIFTH_ :
   5412         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
   5413             return 5; // Han Five
   5414         case CJK_IDEOGRAPH_SIXTH_ :
   5415         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
   5416             return 6; // Han Six
   5417         case CJK_IDEOGRAPH_SEVENTH_ :
   5418         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
   5419             return 7; // Han Seven
   5420         case CJK_IDEOGRAPH_EIGHTH_ :
   5421         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
   5422             return 8; // Han Eight
   5423         case CJK_IDEOGRAPH_NINETH_ :
   5424         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
   5425             return 9; // Han Nine
   5426         case CJK_IDEOGRAPH_TEN_ :
   5427         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
   5428             return 10;
   5429         case CJK_IDEOGRAPH_HUNDRED_ :
   5430         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
   5431             return 100;
   5432         case CJK_IDEOGRAPH_THOUSAND_ :
   5433         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
   5434             return 1000;
   5435         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
   5436             return 10000;
   5437         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
   5438             return 100000000;
   5439         }
   5440         return -1; // no value
   5441     }
   5442 
   5443     /**
   5444      * {@icu} <p>Returns an iterator for character types, iterating over codepoints.
   5445      * <p>Example of use:<br>
   5446      * <pre>
   5447      * RangeValueIterator iterator = UCharacter.getTypeIterator();
   5448      * RangeValueIterator.Element element = new RangeValueIterator.Element();
   5449      * while (iterator.next(element)) {
   5450      *     System.out.println("Codepoint \\u" +
   5451      *                        Integer.toHexString(element.start) +
   5452      *                        " to codepoint \\u" +
   5453      *                        Integer.toHexString(element.limit - 1) +
   5454      *                        " has the character type " +
   5455      *                        element.value);
   5456      * }
   5457      * </pre>
   5458      * @return an iterator
   5459      * @stable ICU 2.6
   5460      */
   5461     public static RangeValueIterator getTypeIterator()
   5462     {
   5463         return new UCharacterTypeIterator();
   5464     }
   5465 
   5466     private static final class UCharacterTypeIterator implements RangeValueIterator {
   5467         UCharacterTypeIterator() {
   5468             reset();
   5469         }
   5470 
   5471         // implements RangeValueIterator
   5472         @Override
   5473         public boolean next(Element element) {
   5474             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
   5475                 element.start=range.startCodePoint;
   5476                 element.limit=range.endCodePoint+1;
   5477                 element.value=range.value;
   5478                 return true;
   5479             } else {
   5480                 return false;
   5481             }
   5482         }
   5483 
   5484         // implements RangeValueIterator
   5485         @Override
   5486         public void reset() {
   5487             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
   5488         }
   5489 
   5490         private Iterator<Trie2.Range> trieIterator;
   5491         private Trie2.Range range;
   5492 
   5493         private static final class MaskType implements Trie2.ValueMapper {
   5494             // Extracts the general category ("character type") from the trie value.
   5495             @Override
   5496             public int map(int value) {
   5497                 return value & UCharacterProperty.TYPE_MASK;
   5498             }
   5499         }
   5500         private static final MaskType MASK_TYPE=new MaskType();
   5501     }
   5502 
   5503     /**
   5504      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
   5505      * <p>This API only gets the iterator for the modern, most up-to-date
   5506      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
   5507      * for extended names use getExtendedNameIterator().
   5508      * <p>Example of use:<br>
   5509      * <pre>
   5510      * ValueIterator iterator = UCharacter.getNameIterator();
   5511      * ValueIterator.Element element = new ValueIterator.Element();
   5512      * while (iterator.next(element)) {
   5513      *     System.out.println("Codepoint \\u" +
   5514      *                        Integer.toHexString(element.codepoint) +
   5515      *                        " has the name " + (String)element.value);
   5516      * }
   5517      * </pre>
   5518      * <p>The maximal range which the name iterator iterates is from
   5519      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
   5520      * @return an iterator
   5521      * @stable ICU 2.6
   5522      */
   5523     public static ValueIterator getNameIterator(){
   5524         return new UCharacterNameIterator(UCharacterName.INSTANCE,
   5525                 UCharacterNameChoice.UNICODE_CHAR_NAME);
   5526     }
   5527 
   5528     /**
   5529      * {@icu} Returns an empty iterator.
   5530      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
   5531      * @return an empty iterator
   5532      * @deprecated ICU 49
   5533      * @see #getName1_0(int)
   5534      */
   5535     @Deprecated
   5536     public static ValueIterator getName1_0Iterator(){
   5537         return new DummyValueIterator();
   5538     }
   5539 
   5540     private static final class DummyValueIterator implements ValueIterator {
   5541         @Override
   5542         public boolean next(Element element) { return false; }
   5543         @Override
   5544         public void reset() {}
   5545         @Override
   5546         public void setRange(int start, int limit) {}
   5547     }
   5548 
   5549     /**
   5550      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
   5551      * <p>This API only gets the iterator for the extended names.
   5552      * For modern, most up-to-date Unicode names use getNameIterator() or
   5553      * for older 1.0 Unicode names use get1_0NameIterator().
   5554      * <p>Example of use:<br>
   5555      * <pre>
   5556      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
   5557      * ValueIterator.Element element = new ValueIterator.Element();
   5558      * while (iterator.next(element)) {
   5559      *     System.out.println("Codepoint \\u" +
   5560      *                        Integer.toHexString(element.codepoint) +
   5561      *                        " has the name " + (String)element.value);
   5562      * }
   5563      * </pre>
   5564      * <p>The maximal range which the name iterator iterates is from
   5565      * @return an iterator
   5566      * @stable ICU 2.6
   5567      */
   5568     public static ValueIterator getExtendedNameIterator(){
   5569         return new UCharacterNameIterator(UCharacterName.INSTANCE,
   5570                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
   5571     }
   5572 
   5573     /**
   5574      * {@icu} Returns the "age" of the code point.
   5575      * <p>The "age" is the Unicode version when the code point was first
   5576      * designated (as a non-character or for Private Use) or assigned a
   5577      * character.
   5578      * <p>This can be useful to avoid emitting code points to receiving
   5579      * processes that do not accept newer characters.
   5580      * <p>The data is from the UCD file DerivedAge.txt.
   5581      * @param ch The code point.
   5582      * @return the Unicode version number
   5583      * @stable ICU 2.6
   5584      */
   5585     public static VersionInfo getAge(int ch)
   5586     {
   5587         if (ch < MIN_VALUE || ch > MAX_VALUE) {
   5588             throw new IllegalArgumentException("Codepoint out of bounds");
   5589         }
   5590         return UCharacterProperty.INSTANCE.getAge(ch);
   5591     }
   5592 
   5593     /**
   5594      * {@icu} <p>Check a binary Unicode property for a code point.
   5595      * <p>Unicode, especially in version 3.2, defines many more properties
   5596      * than the original set in UnicodeData.txt.
   5597      * <p>This API is intended to reflect Unicode properties as defined in
   5598      * the Unicode Character Database (UCD) and Unicode Technical Reports
   5599      * (UTR).
   5600      * <p>For details about the properties see
   5601      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
   5602      * <p>For names of Unicode properties see the UCD file
   5603      * PropertyAliases.txt.
   5604      * <p>This API does not check the validity of the codepoint.
   5605      * <p>Important: If ICU is built with UCD files from Unicode versions
   5606      * below 3.2, then properties marked with "new" are not or
   5607      * not fully available.
   5608      * @param ch code point to test.
   5609      * @param property selector constant from com.ibm.icu.lang.UProperty,
   5610      *        identifies which binary property to check.
   5611      * @return true or false according to the binary Unicode property value
   5612      *         for ch. Also false if property is out of bounds or if the
   5613      *         Unicode version does not have data for the property at all, or
   5614      *         not for this code point.
   5615      * @see com.ibm.icu.lang.UProperty
   5616      * @stable ICU 2.6
   5617      */
   5618     public static boolean hasBinaryProperty(int ch, int property)
   5619     {
   5620         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
   5621     }
   5622 
   5623     /**
   5624      * {@icu} <p>Check if a code point has the Alphabetic Unicode property.
   5625      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
   5626      * <p>Different from UCharacter.isLetter(ch)!
   5627      * @stable ICU 2.6
   5628      * @param ch codepoint to be tested
   5629      */
   5630     public static boolean isUAlphabetic(int ch)
   5631     {
   5632         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
   5633     }
   5634 
   5635     /**
   5636      * {@icu} <p>Check if a code point has the Lowercase Unicode property.
   5637      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
   5638      * <p>This is different from UCharacter.isLowerCase(ch)!
   5639      * @param ch codepoint to be tested
   5640      * @stable ICU 2.6
   5641      */
   5642     public static boolean isULowercase(int ch)
   5643     {
   5644         return hasBinaryProperty(ch, UProperty.LOWERCASE);
   5645     }
   5646 
   5647     /**
   5648      * {@icu} <p>Check if a code point has the Uppercase Unicode property.
   5649      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
   5650      * <p>This is different from UCharacter.isUpperCase(ch)!
   5651      * @param ch codepoint to be tested
   5652      * @stable ICU 2.6
   5653      */
   5654     public static boolean isUUppercase(int ch)
   5655     {
   5656         return hasBinaryProperty(ch, UProperty.UPPERCASE);
   5657     }
   5658 
   5659     /**
   5660      * {@icu} <p>Check if a code point has the White_Space Unicode property.
   5661      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
   5662      * <p>This is different from both UCharacter.isSpace(ch) and
   5663      * UCharacter.isWhitespace(ch)!
   5664      * @param ch codepoint to be tested
   5665      * @stable ICU 2.6
   5666      */
   5667     public static boolean isUWhiteSpace(int ch)
   5668     {
   5669         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
   5670     }
   5671 
   5672     /**
   5673      * {@icu} <p>Returns the property value for an Unicode property type of a code point.
   5674      * Also returns binary and mask property values.
   5675      * <p>Unicode, especially in version 3.2, defines many more properties than
   5676      * the original set in UnicodeData.txt.
   5677      * <p>The properties APIs are intended to reflect Unicode properties as
   5678      * defined in the Unicode Character Database (UCD) and Unicode Technical
   5679      * Reports (UTR). For details about the properties see
   5680      * http://www.unicode.org/.
   5681      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
   5682      *
   5683      * <pre>
   5684      * Sample usage:
   5685      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
   5686      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
   5687      * boolean b = (ideo == 1) ? true : false;
   5688      * </pre>
   5689      * @param ch code point to test.
   5690      * @param type UProperty selector constant, identifies which binary
   5691      *        property to check. Must be
   5692      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
   5693      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
   5694      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
   5695      * @return numeric value that is directly the property value or,
   5696      *         for enumerated properties, corresponds to the numeric value of
   5697      *         the enumerated constant of the respective property value
   5698      *         enumeration type (cast to enum type if necessary).
   5699      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
   5700      *         Returns a bit-mask for mask properties.
   5701      *         Returns 0 if 'type' is out of bounds or if the Unicode version
   5702      *         does not have data for the property at all, or not for this code
   5703      *         point.
   5704      * @see UProperty
   5705      * @see #hasBinaryProperty
   5706      * @see #getIntPropertyMinValue
   5707      * @see #getIntPropertyMaxValue
   5708      * @see #getUnicodeVersion
   5709      * @stable ICU 2.4
   5710      */
   5711     public static int getIntPropertyValue(int ch, int type)
   5712     {
   5713         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
   5714     }
   5715     /**
   5716      * {@icu} Returns a string version of the property value.
   5717      * @param propertyEnum The property enum value.
   5718      * @param codepoint The codepoint value.
   5719      * @param nameChoice The choice of the name.
   5720      * @return value as string
   5721      * @internal
   5722      * @deprecated This API is ICU internal only.
   5723      */
   5724     @Deprecated
   5725     ///CLOVER:OFF
   5726     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
   5727         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
   5728                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
   5729             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
   5730                     nameChoice);
   5731         }
   5732         if (propertyEnum == UProperty.NUMERIC_VALUE) {
   5733             return String.valueOf(getUnicodeNumericValue(codepoint));
   5734         }
   5735         // otherwise must be string property
   5736         switch (propertyEnum) {
   5737         case UProperty.AGE: return getAge(codepoint).toString();
   5738         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
   5739         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
   5740         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
   5741         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
   5742         case UProperty.NAME: return getName(codepoint);
   5743         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
   5744         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
   5745         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
   5746         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
   5747         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
   5748         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
   5749         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
   5750         }
   5751         throw new IllegalArgumentException("Illegal Property Enum");
   5752     }
   5753     ///CLOVER:ON
   5754 
   5755     /**
   5756      * {@icu} Returns the minimum value for an integer/binary Unicode property type.
   5757      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
   5758      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
   5759      * @param type UProperty selector constant, identifies which binary
   5760      *        property to check. Must be
   5761      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
   5762      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
   5763      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
   5764      *         for a Unicode property. 0 if the property
   5765      *         selector 'type' is out of range.
   5766      * @see UProperty
   5767      * @see #hasBinaryProperty
   5768      * @see #getUnicodeVersion
   5769      * @see #getIntPropertyMaxValue
   5770      * @see #getIntPropertyValue
   5771      * @stable ICU 2.4
   5772      */
   5773     public static int getIntPropertyMinValue(int type){
   5774 
   5775         return 0; // undefined; and: all other properties have a minimum value of 0
   5776     }
   5777 
   5778 
   5779     /**
   5780      * {@icu} Returns the maximum value for an integer/binary Unicode property.
   5781      * Can be used together with UCharacter.getIntPropertyMinValue(int)
   5782      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
   5783      * Examples for min/max values (for Unicode 3.2):
   5784      * <ul>
   5785      * <li> UProperty.BIDI_CLASS:    0/18
   5786      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
   5787      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
   5788      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
   5789      * </ul>
   5790      * For undefined UProperty constant values, min/max values will be 0/-1.
   5791      * @param type UProperty selector constant, identifies which binary
   5792      *        property to check. Must be
   5793      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
   5794      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
   5795      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
   5796      *         property. &lt;= 0 if the property selector 'type' is out of range.
   5797      * @see UProperty
   5798      * @see #hasBinaryProperty
   5799      * @see #getUnicodeVersion
   5800      * @see #getIntPropertyMaxValue
   5801      * @see #getIntPropertyValue
   5802      * @stable ICU 2.4
   5803      */
   5804     public static int getIntPropertyMaxValue(int type)
   5805     {
   5806         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
   5807     }
   5808 
   5809     /**
   5810      * Provide the java.lang.Character forDigit API, for convenience.
   5811      * @stable ICU 3.0
   5812      */
   5813     public static char forDigit(int digit, int radix) {
   5814         return java.lang.Character.forDigit(digit, radix);
   5815     }
   5816 
   5817     // JDK 1.5 API coverage
   5818 
   5819     /**
   5820      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
   5821      *
   5822      * @stable ICU 3.0
   5823      */
   5824     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
   5825 
   5826     /**
   5827      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
   5828      *
   5829      * @stable ICU 3.0
   5830      */
   5831     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
   5832 
   5833     /**
   5834      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
   5835      *
   5836      * @stable ICU 3.0
   5837      */
   5838     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
   5839 
   5840     /**
   5841      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
   5842      *
   5843      * @stable ICU 3.0
   5844      */
   5845     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
   5846 
   5847     /**
   5848      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
   5849      *
   5850      * @stable ICU 3.0
   5851      */
   5852     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
   5853 
   5854     /**
   5855      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
   5856      *
   5857      * @stable ICU 3.0
   5858      */
   5859     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
   5860 
   5861     /**
   5862      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
   5863      *
   5864      * @stable ICU 3.0
   5865      */
   5866     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
   5867 
   5868     /**
   5869      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
   5870      *
   5871      * @stable ICU 3.0
   5872      */
   5873     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
   5874 
   5875     /**
   5876      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
   5877      *
   5878      * @stable ICU 3.0
   5879      */
   5880     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
   5881 
   5882     /**
   5883      * Equivalent to {@link Character#isValidCodePoint}.
   5884      *
   5885      * @param cp the code point to check
   5886      * @return true if cp is a valid code point
   5887      * @stable ICU 3.0
   5888      */
   5889     public static final boolean isValidCodePoint(int cp) {
   5890         return cp >= 0 && cp <= MAX_CODE_POINT;
   5891     }
   5892 
   5893     /**
   5894      * Same as {@link Character#isSupplementaryCodePoint}.
   5895      *
   5896      * @param cp the code point to check
   5897      * @return true if cp is a supplementary code point
   5898      * @stable ICU 3.0
   5899      */
   5900     public static final boolean isSupplementaryCodePoint(int cp) {
   5901         return Character.isSupplementaryCodePoint(cp);
   5902     }
   5903 
   5904     /**
   5905      * Same as {@link Character#isHighSurrogate}.
   5906      *
   5907      * @param ch the char to check
   5908      * @return true if ch is a high (lead) surrogate
   5909      * @stable ICU 3.0
   5910      */
   5911     public static boolean isHighSurrogate(char ch) {
   5912         return Character.isHighSurrogate(ch);
   5913     }
   5914 
   5915     /**
   5916      * Same as {@link Character#isLowSurrogate}.
   5917      *
   5918      * @param ch the char to check
   5919      * @return true if ch is a low (trail) surrogate
   5920      * @stable ICU 3.0
   5921      */
   5922     public static boolean isLowSurrogate(char ch) {
   5923         return Character.isLowSurrogate(ch);
   5924     }
   5925 
   5926     /**
   5927      * Same as {@link Character#isSurrogatePair}.
   5928      *
   5929      * @param high the high (lead) char
   5930      * @param low the low (trail) char
   5931      * @return true if high, low form a surrogate pair
   5932      * @stable ICU 3.0
   5933      */
   5934     public static final boolean isSurrogatePair(char high, char low) {
   5935         return Character.isSurrogatePair(high, low);
   5936     }
   5937 
   5938     /**
   5939      * Same as {@link Character#charCount}.
   5940      * Returns the number of chars needed to represent the code point (1 or 2).
   5941      * This does not check the code point for validity.
   5942      *
   5943      * @param cp the code point to check
   5944      * @return the number of chars needed to represent the code point
   5945      * @stable ICU 3.0
   5946      */
   5947     public static int charCount(int cp) {
   5948         return Character.charCount(cp);
   5949     }
   5950 
   5951     /**
   5952      * Same as {@link Character#toCodePoint}.
   5953      * Returns the code point represented by the two surrogate code units.
   5954      * This does not check the surrogate pair for validity.
   5955      *
   5956      * @param high the high (lead) surrogate
   5957      * @param low the low (trail) surrogate
   5958      * @return the code point formed by the surrogate pair
   5959      * @stable ICU 3.0
   5960      */
   5961     public static final int toCodePoint(char high, char low) {
   5962         return Character.toCodePoint(high, low);
   5963     }
   5964 
   5965     /**
   5966      * Same as {@link Character#codePointAt(CharSequence, int)}.
   5967      * Returns the code point at index.
   5968      * This examines only the characters at index and index+1.
   5969      *
   5970      * @param seq the characters to check
   5971      * @param index the index of the first or only char forming the code point
   5972      * @return the code point at the index
   5973      * @stable ICU 3.0
   5974      */
   5975     public static final int codePointAt(CharSequence seq, int index) {
   5976         char c1 = seq.charAt(index++);
   5977         if (isHighSurrogate(c1)) {
   5978             if (index < seq.length()) {
   5979                 char c2 = seq.charAt(index);
   5980                 if (isLowSurrogate(c2)) {
   5981                     return toCodePoint(c1, c2);
   5982                 }
   5983             }
   5984         }
   5985         return c1;
   5986     }
   5987 
   5988     /**
   5989      * Same as {@link Character#codePointAt(char[], int)}.
   5990      * Returns the code point at index.
   5991      * This examines only the characters at index and index+1.
   5992      *
   5993      * @param text the characters to check
   5994      * @param index the index of the first or only char forming the code point
   5995      * @return the code point at the index
   5996      * @stable ICU 3.0
   5997      */
   5998     public static final int codePointAt(char[] text, int index) {
   5999         char c1 = text[index++];
   6000         if (isHighSurrogate(c1)) {
   6001             if (index < text.length) {
   6002                 char c2 = text[index];
   6003                 if (isLowSurrogate(c2)) {
   6004                     return toCodePoint(c1, c2);
   6005                 }
   6006             }
   6007         }
   6008         return c1;
   6009     }
   6010 
   6011     /**
   6012      * Same as {@link Character#codePointAt(char[], int, int)}.
   6013      * Returns the code point at index.
   6014      * This examines only the characters at index and index+1.
   6015      *
   6016      * @param text the characters to check
   6017      * @param index the index of the first or only char forming the code point
   6018      * @param limit the limit of the valid text
   6019      * @return the code point at the index
   6020      * @stable ICU 3.0
   6021      */
   6022     public static final int codePointAt(char[] text, int index, int limit) {
   6023         if (index >= limit || limit > text.length) {
   6024             throw new IndexOutOfBoundsException();
   6025         }
   6026         char c1 = text[index++];
   6027         if (isHighSurrogate(c1)) {
   6028             if (index < limit) {
   6029                 char c2 = text[index];
   6030                 if (isLowSurrogate(c2)) {
   6031                     return toCodePoint(c1, c2);
   6032                 }
   6033             }
   6034         }
   6035         return c1;
   6036     }
   6037 
   6038     /**
   6039      * Same as {@link Character#codePointBefore(CharSequence, int)}.
   6040      * Return the code point before index.
   6041      * This examines only the characters at index-1 and index-2.
   6042      *
   6043      * @param seq the characters to check
   6044      * @param index the index after the last or only char forming the code point
   6045      * @return the code point before the index
   6046      * @stable ICU 3.0
   6047      */
   6048     public static final int codePointBefore(CharSequence seq, int index) {
   6049         char c2 = seq.charAt(--index);
   6050         if (isLowSurrogate(c2)) {
   6051             if (index > 0) {
   6052                 char c1 = seq.charAt(--index);
   6053                 if (isHighSurrogate(c1)) {
   6054                     return toCodePoint(c1, c2);
   6055                 }
   6056             }
   6057         }
   6058         return c2;
   6059     }
   6060 
   6061     /**
   6062      * Same as {@link Character#codePointBefore(char[], int)}.
   6063      * Returns the code point before index.
   6064      * This examines only the characters at index-1 and index-2.
   6065      *
   6066      * @param text the characters to check
   6067      * @param index the index after the last or only char forming the code point
   6068      * @return the code point before the index
   6069      * @stable ICU 3.0
   6070      */
   6071     public static final int codePointBefore(char[] text, int index) {
   6072         char c2 = text[--index];
   6073         if (isLowSurrogate(c2)) {
   6074             if (index > 0) {
   6075                 char c1 = text[--index];
   6076                 if (isHighSurrogate(c1)) {
   6077                     return toCodePoint(c1, c2);
   6078                 }
   6079             }
   6080         }
   6081         return c2;
   6082     }
   6083 
   6084     /**
   6085      * Same as {@link Character#codePointBefore(char[], int, int)}.
   6086      * Return the code point before index.
   6087      * This examines only the characters at index-1 and index-2.
   6088      *
   6089      * @param text the characters to check
   6090      * @param index the index after the last or only char forming the code point
   6091      * @param limit the start of the valid text
   6092      * @return the code point before the index
   6093      * @stable ICU 3.0
   6094      */
   6095     public static final int codePointBefore(char[] text, int index, int limit) {
   6096         if (index <= limit || limit < 0) {
   6097             throw new IndexOutOfBoundsException();
   6098         }
   6099         char c2 = text[--index];
   6100         if (isLowSurrogate(c2)) {
   6101             if (index > limit) {
   6102                 char c1 = text[--index];
   6103                 if (isHighSurrogate(c1)) {
   6104                     return toCodePoint(c1, c2);
   6105                 }
   6106             }
   6107         }
   6108         return c2;
   6109     }
   6110 
   6111     /**
   6112      * Same as {@link Character#toChars(int, char[], int)}.
   6113      * Writes the chars representing the
   6114      * code point into the destination at the given index.
   6115      *
   6116      * @param cp the code point to convert
   6117      * @param dst the destination array into which to put the char(s) representing the code point
   6118      * @param dstIndex the index at which to put the first (or only) char
   6119      * @return the count of the number of chars written (1 or 2)
   6120      * @throws IllegalArgumentException if cp is not a valid code point
   6121      * @stable ICU 3.0
   6122      */
   6123     public static final int toChars(int cp, char[] dst, int dstIndex) {
   6124         return Character.toChars(cp, dst, dstIndex);
   6125     }
   6126 
   6127     /**
   6128      * Same as {@link Character#toChars(int)}.
   6129      * Returns a char array representing the code point.
   6130      *
   6131      * @param cp the code point to convert
   6132      * @return an array containing the char(s) representing the code point
   6133      * @throws IllegalArgumentException if cp is not a valid code point
   6134      * @stable ICU 3.0
   6135      */
   6136     public static final char[] toChars(int cp) {
   6137         return Character.toChars(cp);
   6138     }
   6139 
   6140     /**
   6141      * Equivalent to the {@link Character#getDirectionality(char)} method, for
   6142      * convenience. Returns a byte representing the directionality of the
   6143      * character.
   6144      *
   6145      * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns
   6146      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
   6147      *
   6148      * {@icunote} The return value must be tested using the constants defined in {@link
   6149      * UCharacterDirection} and its interface {@link
   6150      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
   6151      * defined by <code>java.lang.Character</code>.
   6152      * @param cp the code point to check
   6153      * @return the directionality of the code point
   6154      * @see #getDirection
   6155      * @stable ICU 3.0
   6156      */
   6157     public static byte getDirectionality(int cp)
   6158     {
   6159         return (byte)getDirection(cp);
   6160     }
   6161 
   6162     /**
   6163      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
   6164      * method, for convenience.  Counts the number of code points in the range
   6165      * of text.
   6166      * @param text the characters to check
   6167      * @param start the start of the range
   6168      * @param limit the limit of the range
   6169      * @return the number of code points in the range
   6170      * @stable ICU 3.0
   6171      */
   6172     public static int codePointCount(CharSequence text, int start, int limit) {
   6173         if (start < 0 || limit < start || limit > text.length()) {
   6174             throw new IndexOutOfBoundsException("start (" + start +
   6175                     ") or limit (" + limit +
   6176                     ") invalid or out of range 0, " + text.length());
   6177         }
   6178 
   6179         int len = limit - start;
   6180         while (limit > start) {
   6181             char ch = text.charAt(--limit);
   6182             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
   6183                 ch = text.charAt(--limit);
   6184                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
   6185                     --len;
   6186                     break;
   6187                 }
   6188             }
   6189         }
   6190         return len;
   6191     }
   6192 
   6193     /**
   6194      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
   6195      * convenience. Counts the number of code points in the range of text.
   6196      * @param text the characters to check
   6197      * @param start the start of the range
   6198      * @param limit the limit of the range
   6199      * @return the number of code points in the range
   6200      * @stable ICU 3.0
   6201      */
   6202     public static int codePointCount(char[] text, int start, int limit) {
   6203         if (start < 0 || limit < start || limit > text.length) {
   6204             throw new IndexOutOfBoundsException("start (" + start +
   6205                     ") or limit (" + limit +
   6206                     ") invalid or out of range 0, " + text.length);
   6207         }
   6208 
   6209         int len = limit - start;
   6210         while (limit > start) {
   6211             char ch = text[--limit];
   6212             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
   6213                 ch = text[--limit];
   6214                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
   6215                     --len;
   6216                     break;
   6217                 }
   6218             }
   6219         }
   6220         return len;
   6221     }
   6222 
   6223     /**
   6224      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
   6225      * method, for convenience.  Adjusts the char index by a code point offset.
   6226      * @param text the characters to check
   6227      * @param index the index to adjust
   6228      * @param codePointOffset the number of code points by which to offset the index
   6229      * @return the adjusted index
   6230      * @stable ICU 3.0
   6231      */
   6232     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
   6233         if (index < 0 || index > text.length()) {
   6234             throw new IndexOutOfBoundsException("index ( " + index +
   6235                     ") out of range 0, " + text.length());
   6236         }
   6237 
   6238         if (codePointOffset < 0) {
   6239             while (++codePointOffset <= 0) {
   6240                 char ch = text.charAt(--index);
   6241                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
   6242                     ch = text.charAt(--index);
   6243                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
   6244                         if (++codePointOffset > 0) {
   6245                             return index+1;
   6246                         }
   6247                     }
   6248                 }
   6249             }
   6250         } else {
   6251             int limit = text.length();
   6252             while (--codePointOffset >= 0) {
   6253                 char ch = text.charAt(index++);
   6254                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
   6255                     ch = text.charAt(index++);
   6256                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
   6257                         if (--codePointOffset < 0) {
   6258                             return index-1;
   6259                         }
   6260                     }
   6261                 }
   6262             }
   6263         }
   6264 
   6265         return index;
   6266     }
   6267 
   6268     /**
   6269      * Equivalent to the
   6270      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
   6271      * method, for convenience.  Adjusts the char index by a code point offset.
   6272      * @param text the characters to check
   6273      * @param start the start of the range to check
   6274      * @param count the length of the range to check
   6275      * @param index the index to adjust
   6276      * @param codePointOffset the number of code points by which to offset the index
   6277      * @return the adjusted index
   6278      * @stable ICU 3.0
   6279      */
   6280     public static int offsetByCodePoints(char[] text, int start, int count, int index,
   6281             int codePointOffset) {
   6282         int limit = start + count;
   6283         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
   6284             throw new IndexOutOfBoundsException("index ( " + index +
   6285                     ") out of range " + start +
   6286                     ", " + limit +
   6287                     " in array 0, " + text.length);
   6288         }
   6289 
   6290         if (codePointOffset < 0) {
   6291             while (++codePointOffset <= 0) {
   6292                 char ch = text[--index];
   6293                 if (index < start) {
   6294                     throw new IndexOutOfBoundsException("index ( " + index +
   6295                             ") < start (" + start +
   6296                             ")");
   6297                 }
   6298                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
   6299                     ch = text[--index];
   6300                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
   6301                         if (++codePointOffset > 0) {
   6302                             return index+1;
   6303                         }
   6304                     }
   6305                 }
   6306             }
   6307         } else {
   6308             while (--codePointOffset >= 0) {
   6309                 char ch = text[index++];
   6310                 if (index > limit) {
   6311                     throw new IndexOutOfBoundsException("index ( " + index +
   6312                             ") > limit (" + limit +
   6313                             ")");
   6314                 }
   6315                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
   6316                     ch = text[index++];
   6317                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
   6318                         if (--codePointOffset < 0) {
   6319                             return index-1;
   6320                         }
   6321                     }
   6322                 }
   6323             }
   6324         }
   6325 
   6326         return index;
   6327     }
   6328 
   6329     // private variables -------------------------------------------------
   6330 
   6331     /**
   6332      * To get the last character out from a data type
   6333      */
   6334     private static final int LAST_CHAR_MASK_ = 0xFFFF;
   6335 
   6336     //    /**
   6337     //     * To get the last byte out from a data type
   6338     //     */
   6339     //    private static final int LAST_BYTE_MASK_ = 0xFF;
   6340     //
   6341     //    /**
   6342     //     * Shift 16 bits
   6343     //     */
   6344     //    private static final int SHIFT_16_ = 16;
   6345     //
   6346     //    /**
   6347     //     * Shift 24 bits
   6348     //     */
   6349     //    private static final int SHIFT_24_ = 24;
   6350     //
   6351     //    /**
   6352     //     * Decimal radix
   6353     //     */
   6354     //    private static final int DECIMAL_RADIX_ = 10;
   6355 
   6356     /**
   6357      * No break space code point
   6358      */
   6359     private static final int NO_BREAK_SPACE_ = 0xA0;
   6360 
   6361     /**
   6362      * Figure space code point
   6363      */
   6364     private static final int FIGURE_SPACE_ = 0x2007;
   6365 
   6366     /**
   6367      * Narrow no break space code point
   6368      */
   6369     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
   6370 
   6371     /**
   6372      * Ideographic number zero code point
   6373      */
   6374     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
   6375 
   6376     /**
   6377      * CJK Ideograph, First code point
   6378      */
   6379     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
   6380 
   6381     /**
   6382      * CJK Ideograph, Second code point
   6383      */
   6384     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
   6385 
   6386     /**
   6387      * CJK Ideograph, Third code point
   6388      */
   6389     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
   6390 
   6391     /**
   6392      * CJK Ideograph, Fourth code point
   6393      */
   6394     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
   6395 
   6396     /**
   6397      * CJK Ideograph, FIFTH code point
   6398      */
   6399     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
   6400 
   6401     /**
   6402      * CJK Ideograph, Sixth code point
   6403      */
   6404     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
   6405 
   6406     /**
   6407      * CJK Ideograph, Seventh code point
   6408      */
   6409     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
   6410 
   6411     /**
   6412      * CJK Ideograph, Eighth code point
   6413      */
   6414     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
   6415 
   6416     /**
   6417      * CJK Ideograph, Nineth code point
   6418      */
   6419     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
   6420 
   6421     /**
   6422      * Application Program command code point
   6423      */
   6424     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
   6425 
   6426     /**
   6427      * Unit separator code point
   6428      */
   6429     private static final int UNIT_SEPARATOR_ = 0x001F;
   6430 
   6431     /**
   6432      * Delete code point
   6433      */
   6434     private static final int DELETE_ = 0x007F;
   6435 
   6436     /**
   6437      * Han digit characters
   6438      */
   6439     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
   6440     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
   6441     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
   6442     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
   6443     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
   6444     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
   6445     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
   6446     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
   6447     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
   6448     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
   6449     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
   6450     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
   6451     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
   6452     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
   6453     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
   6454     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
   6455     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
   6456     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
   6457 
   6458     // private constructor -----------------------------------------------
   6459     ///CLOVER:OFF
   6460     /**
   6461      * Private constructor to prevent instantiation
   6462      */
   6463     private UCharacter()
   6464     {
   6465     }
   6466     ///CLOVER:ON
   6467 }
   6468