Home | History | Annotate | Download | only in lang
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /**
      5  *******************************************************************************
      6  * Copyright (C) 1996-2016, International Business Machines Corporation and
      7  * others. All Rights Reserved.
      8  *******************************************************************************
      9  */
     10 
     11 package android.icu.lang;
     12 
     13 import java.lang.ref.SoftReference;
     14 import java.util.HashMap;
     15 import java.util.Iterator;
     16 import java.util.Locale;
     17 import java.util.Map;
     18 
     19 import android.icu.impl.CaseMapImpl;
     20 import android.icu.impl.IllegalIcuArgumentException;
     21 import android.icu.impl.Trie2;
     22 import android.icu.impl.UBiDiProps;
     23 import android.icu.impl.UCaseProps;
     24 import android.icu.impl.UCharacterName;
     25 import android.icu.impl.UCharacterNameChoice;
     26 import android.icu.impl.UCharacterProperty;
     27 import android.icu.impl.UCharacterUtility;
     28 import android.icu.impl.UPropertyAliases;
     29 import android.icu.lang.UCharacterEnums.ECharacterCategory;
     30 import android.icu.lang.UCharacterEnums.ECharacterDirection;
     31 import android.icu.text.BreakIterator;
     32 import android.icu.text.Normalizer2;
     33 import android.icu.util.RangeValueIterator;
     34 import android.icu.util.ULocale;
     35 import android.icu.util.ValueIterator;
     36 import android.icu.util.VersionInfo;
     37 
     38 /**
     39  * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
     40  *
     41  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
     42  * These extensions provide support for more Unicode properties.
     43  * Each ICU release supports the latest version of Unicode available at that time.
     44  *
     45  * <p>For some time before Java 5 added support for supplementary Unicode code points,
     46  * The ICU UCharacter class and many other ICU classes already supported them.
     47  * Some UCharacter methods and constants were widened slightly differently than
     48  * how the Character class methods and constants were widened later.
     49  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
     50  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
     51  *
     52  * <p>Code points are represented in these API using ints. While it would be
     53  * more convenient in Java to have a separate primitive datatype for them,
     54  * ints suffice in the meantime.
     55  *
     56  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
     57  * properties, the main differences between UCharacter and Character are:
     58  * <ul>
     59  * <li> UCharacter is not designed to be a char wrapper and does not have
     60  *      APIs to which involves management of that single char.<br>
     61  *      These include:
     62  *      <ul>
     63  *        <li> char charValue(),
     64  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
     65  *      </ul>
     66  * <li> UCharacter does not include Character APIs that are deprecated, nor
     67  *      does it include the Java-specific character information, such as
     68  *      boolean isJavaIdentifierPart(char ch).
     69  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
     70  *      values '10' - '35'. UCharacter also does this in digit and
     71  *      getNumericValue, to adhere to the java semantics of these
     72  *      methods.  New methods unicodeDigit, and
     73  *      getUnicodeNumericValue do not treat the above code points
     74  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
     75  * </ul>
     76  * <p>
     77  * In addition to Java compatibility functions, which calculate derived properties,
     78  * this API provides low-level access to the Unicode Character Database.
     79  * <p>
     80  * Unicode assigns each code point (not just assigned character) values for
     81  * many properties.
     82  * Most of them are simple boolean flags, or constants from a small enumerated list.
     83  * For some properties, values are strings or other relatively more complex types.
     84  * <p>
     85  * For more information see
     86  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
     87  * (http://www.unicode.org/ucd/)
     88  * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
     89  * User Guide chapter on Properties</a>
     90  * (http://www.icu-project.org/userguide/properties.html).
     91  * <p>
     92  * There are also functions that provide easy migration from C/POSIX functions
     93  * like isblank(). Their use is generally discouraged because the C/POSIX
     94  * standards do not define their semantics beyond the ASCII range, which means
     95  * that different implementations exhibit very different behavior.
     96  * Instead, Unicode properties should be used directly.
     97  * <p>
     98  * There are also only a few, broad C/POSIX character classes, and they tend
     99  * to be used for conflicting purposes. For example, the "isalpha()" class
    100  * is sometimes used to determine word boundaries, while a more sophisticated
    101  * approach would at least distinguish initial letters from continuation
    102  * characters (the latter including combining marks).
    103  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
    104  * Another example: There is no "istitle()" class for titlecase characters.
    105  * <p>
    106  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
    107  * ICU implements them according to the Standard Recommendations in
    108  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
    109  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
    110  * <p>
    111  * API access for C/POSIX character classes is as follows:
    112  * <pre>{@code
    113  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
    114  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
    115  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
    116  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
    117  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
    118  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
    119  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
    120  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
    121  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
    122  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
    123  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
    124  * - cntrl:     getType(c)==CONTROL
    125  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
    126  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
    127  * <p>
    128  * The C/POSIX character classes are also available in UnicodeSet patterns,
    129  * using patterns like [:graph:] or \p{graph}.
    130  *
    131  * <p><strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions.
    132  * Comparison:<ul>
    133  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
    134  *       most of general categories "Z" (separators) + most whitespace ISO controls
    135  *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
    136  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
    137  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
    138  *
    139  * <p>
    140  * This class is not subclassable.
    141  *
    142  * @author Syn Wee Quek
    143  * @see android.icu.lang.UCharacterEnums
    144  */
    145 
    146 public final class UCharacter implements ECharacterCategory, ECharacterDirection
    147 {
    148     // public inner classes ----------------------------------------------
    149 
    150     /**
    151      * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
    152      *
    153      * A family of character subsets representing the character blocks in the
    154      * Unicode specification, generated from Unicode Data file Blocks.txt.
    155      * Character blocks generally define characters used for a specific script
    156      * or purpose. A character is contained by at most one Unicode block.
    157      *
    158      * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU.
    159      */
    160     public static final class UnicodeBlock extends Character.Subset
    161     {
    162         // block id corresponding to icu4c -----------------------------------
    163 
    164         /**
    165          */
    166         public static final int INVALID_CODE_ID = -1;
    167         /**
    168          */
    169         public static final int BASIC_LATIN_ID = 1;
    170         /**
    171          */
    172         public static final int LATIN_1_SUPPLEMENT_ID = 2;
    173         /**
    174          */
    175         public static final int LATIN_EXTENDED_A_ID = 3;
    176         /**
    177          */
    178         public static final int LATIN_EXTENDED_B_ID = 4;
    179         /**
    180          */
    181         public static final int IPA_EXTENSIONS_ID = 5;
    182         /**
    183          */
    184         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
    185         /**
    186          */
    187         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
    188         /**
    189          * Unicode 3.2 renames this block to "Greek and Coptic".
    190          */
    191         public static final int GREEK_ID = 8;
    192         /**
    193          */
    194         public static final int CYRILLIC_ID = 9;
    195         /**
    196          */
    197         public static final int ARMENIAN_ID = 10;
    198         /**
    199          */
    200         public static final int HEBREW_ID = 11;
    201         /**
    202          */
    203         public static final int ARABIC_ID = 12;
    204         /**
    205          */
    206         public static final int SYRIAC_ID = 13;
    207         /**
    208          */
    209         public static final int THAANA_ID = 14;
    210         /**
    211          */
    212         public static final int DEVANAGARI_ID = 15;
    213         /**
    214          */
    215         public static final int BENGALI_ID = 16;
    216         /**
    217          */
    218         public static final int GURMUKHI_ID = 17;
    219         /**
    220          */
    221         public static final int GUJARATI_ID = 18;
    222         /**
    223          */
    224         public static final int ORIYA_ID = 19;
    225         /**
    226          */
    227         public static final int TAMIL_ID = 20;
    228         /**
    229          */
    230         public static final int TELUGU_ID = 21;
    231         /**
    232          */
    233         public static final int KANNADA_ID = 22;
    234         /**
    235          */
    236         public static final int MALAYALAM_ID = 23;
    237         /**
    238          */
    239         public static final int SINHALA_ID = 24;
    240         /**
    241          */
    242         public static final int THAI_ID = 25;
    243         /**
    244          */
    245         public static final int LAO_ID = 26;
    246         /**
    247          */
    248         public static final int TIBETAN_ID = 27;
    249         /**
    250          */
    251         public static final int MYANMAR_ID = 28;
    252         /**
    253          */
    254         public static final int GEORGIAN_ID = 29;
    255         /**
    256          */
    257         public static final int HANGUL_JAMO_ID = 30;
    258         /**
    259          */
    260         public static final int ETHIOPIC_ID = 31;
    261         /**
    262          */
    263         public static final int CHEROKEE_ID = 32;
    264         /**
    265          */
    266         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
    267         /**
    268          */
    269         public static final int OGHAM_ID = 34;
    270         /**
    271          */
    272         public static final int RUNIC_ID = 35;
    273         /**
    274          */
    275         public static final int KHMER_ID = 36;
    276         /**
    277          */
    278         public static final int MONGOLIAN_ID = 37;
    279         /**
    280          */
    281         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
    282         /**
    283          */
    284         public static final int GREEK_EXTENDED_ID = 39;
    285         /**
    286          */
    287         public static final int GENERAL_PUNCTUATION_ID = 40;
    288         /**
    289          */
    290         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
    291         /**
    292          */
    293         public static final int CURRENCY_SYMBOLS_ID = 42;
    294         /**
    295          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
    296          * Symbols".
    297          */
    298         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
    299         /**
    300          */
    301         public static final int LETTERLIKE_SYMBOLS_ID = 44;
    302         /**
    303          */
    304         public static final int NUMBER_FORMS_ID = 45;
    305         /**
    306          */
    307         public static final int ARROWS_ID = 46;
    308         /**
    309          */
    310         public static final int MATHEMATICAL_OPERATORS_ID = 47;
    311         /**
    312          */
    313         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
    314         /**
    315          */
    316         public static final int CONTROL_PICTURES_ID = 49;
    317         /**
    318          */
    319         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
    320         /**
    321          */
    322         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
    323         /**
    324          */
    325         public static final int BOX_DRAWING_ID = 52;
    326         /**
    327          */
    328         public static final int BLOCK_ELEMENTS_ID = 53;
    329         /**
    330          */
    331         public static final int GEOMETRIC_SHAPES_ID = 54;
    332         /**
    333          */
    334         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
    335         /**
    336          */
    337         public static final int DINGBATS_ID = 56;
    338         /**
    339          */
    340         public static final int BRAILLE_PATTERNS_ID = 57;
    341         /**
    342          */
    343         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
    344         /**
    345          */
    346         public static final int KANGXI_RADICALS_ID = 59;
    347         /**
    348          */
    349         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
    350         /**
    351          */
    352         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
    353         /**
    354          */
    355         public static final int HIRAGANA_ID = 62;
    356         /**
    357          */
    358         public static final int KATAKANA_ID = 63;
    359         /**
    360          */
    361         public static final int BOPOMOFO_ID = 64;
    362         /**
    363          */
    364         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
    365         /**
    366          */
    367         public static final int KANBUN_ID = 66;
    368         /**
    369          */
    370         public static final int BOPOMOFO_EXTENDED_ID = 67;
    371         /**
    372          */
    373         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
    374         /**
    375          */
    376         public static final int CJK_COMPATIBILITY_ID = 69;
    377         /**
    378          */
    379         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
    380         /**
    381          */
    382         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
    383         /**
    384          */
    385         public static final int YI_SYLLABLES_ID = 72;
    386         /**
    387          */
    388         public static final int YI_RADICALS_ID = 73;
    389         /**
    390          */
    391         public static final int HANGUL_SYLLABLES_ID = 74;
    392         /**
    393          */
    394         public static final int HIGH_SURROGATES_ID = 75;
    395         /**
    396          */
    397         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
    398         /**
    399          */
    400         public static final int LOW_SURROGATES_ID = 77;
    401         /**
    402          * Same as public static final int PRIVATE_USE.
    403          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
    404          * and multiple code point ranges had this block.
    405          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
    406          * and adds separate blocks for the supplementary PUAs.
    407          */
    408         public static final int PRIVATE_USE_AREA_ID = 78;
    409         /**
    410          * Same as public static final int PRIVATE_USE_AREA.
    411          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
    412          * and multiple code point ranges had this block.
    413          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
    414          * and adds separate blocks for the supplementary PUAs.
    415          */
    416         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
    417         /**
    418          */
    419         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
    420         /**
    421          */
    422         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
    423         /**
    424          */
    425         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
    426         /**
    427          */
    428         public static final int COMBINING_HALF_MARKS_ID = 82;
    429         /**
    430          */
    431         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
    432         /**
    433          */
    434         public static final int SMALL_FORM_VARIANTS_ID = 84;
    435         /**
    436          */
    437         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
    438         /**
    439          */
    440         public static final int SPECIALS_ID = 86;
    441         /**
    442          */
    443         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
    444         /**
    445          */
    446         public static final int OLD_ITALIC_ID = 88;
    447         /**
    448          */
    449         public static final int GOTHIC_ID = 89;
    450         /**
    451          */
    452         public static final int DESERET_ID = 90;
    453         /**
    454          */
    455         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
    456         /**
    457          */
    458         public static final int MUSICAL_SYMBOLS_ID = 92;
    459         /**
    460          */
    461         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
    462         /**
    463          */
    464         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
    465         /**
    466          */
    467         public static final int
    468         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
    469         /**
    470          */
    471         public static final int TAGS_ID = 96;
    472 
    473         // New blocks in Unicode 3.2
    474 
    475         /**
    476          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
    477          */
    478         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
    479         /**
    480          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
    481          */
    482 
    483         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
    484         /**
    485          */
    486         public static final int TAGALOG_ID = 98;
    487         /**
    488          */
    489         public static final int HANUNOO_ID = 99;
    490         /**
    491          */
    492         public static final int BUHID_ID = 100;
    493         /**
    494          */
    495         public static final int TAGBANWA_ID = 101;
    496         /**
    497          */
    498         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
    499         /**
    500          */
    501         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
    502         /**
    503          */
    504         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
    505         /**
    506          */
    507         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
    508         /**
    509          */
    510         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
    511         /**
    512          */
    513         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
    514         /**
    515          */
    516         public static final int VARIATION_SELECTORS_ID = 108;
    517         /**
    518          */
    519         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
    520         /**
    521          */
    522         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
    523 
    524         /**
    525          */
    526         public static final int LIMBU_ID = 111; /*[1900]*/
    527         /**
    528          */
    529         public static final int TAI_LE_ID = 112; /*[1950]*/
    530         /**
    531          */
    532         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
    533         /**
    534          */
    535         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
    536         /**
    537          */
    538         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
    539         /**
    540          */
    541         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
    542         /**
    543          */
    544         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
    545         /**
    546          */
    547         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
    548         /**
    549          */
    550         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
    551         /**
    552          */
    553         public static final int UGARITIC_ID = 120; /*[10380]*/
    554         /**
    555          */
    556         public static final int SHAVIAN_ID = 121; /*[10450]*/
    557         /**
    558          */
    559         public static final int OSMANYA_ID = 122; /*[10480]*/
    560         /**
    561          */
    562         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
    563         /**
    564          */
    565         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
    566         /**
    567          */
    568         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
    569 
    570         /* New blocks in Unicode 4.1 */
    571 
    572         /**
    573          */
    574         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
    575 
    576         /**
    577          */
    578         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
    579 
    580         /**
    581          */
    582         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
    583 
    584         /**
    585          */
    586         public static final int BUGINESE_ID = 129; /*[1A00]*/
    587 
    588         /**
    589          */
    590         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
    591 
    592         /**
    593          */
    594         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
    595 
    596         /**
    597          */
    598         public static final int COPTIC_ID = 132; /*[2C80]*/
    599 
    600         /**
    601          */
    602         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
    603 
    604         /**
    605          */
    606         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
    607 
    608         /**
    609          */
    610         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
    611 
    612         /**
    613          */
    614         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
    615 
    616         /**
    617          */
    618         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
    619 
    620         /**
    621          */
    622         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
    623 
    624         /**
    625          */
    626         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
    627 
    628         /**
    629          */
    630         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
    631 
    632         /**
    633          */
    634         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
    635 
    636         /**
    637          */
    638         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
    639 
    640         /**
    641          */
    642         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
    643 
    644         /**
    645          */
    646         public static final int TIFINAGH_ID = 144; /*[2D30]*/
    647 
    648         /**
    649          */
    650         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
    651 
    652         /* New blocks in Unicode 5.0 */
    653 
    654         /**
    655          */
    656         public static final int NKO_ID = 146; /*[07C0]*/
    657         /**
    658          */
    659         public static final int BALINESE_ID = 147; /*[1B00]*/
    660         /**
    661          */
    662         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
    663         /**
    664          */
    665         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
    666         /**
    667          */
    668         public static final int PHAGS_PA_ID = 150; /*[A840]*/
    669         /**
    670          */
    671         public static final int PHOENICIAN_ID = 151; /*[10900]*/
    672         /**
    673          */
    674         public static final int CUNEIFORM_ID = 152; /*[12000]*/
    675         /**
    676          */
    677         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
    678         /**
    679          */
    680         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
    681 
    682         /**
    683          */
    684         public static final int SUNDANESE_ID = 155; /* [1B80] */
    685 
    686         /**
    687          */
    688         public static final int LEPCHA_ID = 156; /* [1C00] */
    689 
    690         /**
    691          */
    692         public static final int OL_CHIKI_ID = 157; /* [1C50] */
    693 
    694         /**
    695          */
    696         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
    697 
    698         /**
    699          */
    700         public static final int VAI_ID = 159; /* [A500] */
    701 
    702         /**
    703          */
    704         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
    705 
    706         /**
    707          */
    708         public static final int SAURASHTRA_ID = 161; /* [A880] */
    709 
    710         /**
    711          */
    712         public static final int KAYAH_LI_ID = 162; /* [A900] */
    713 
    714         /**
    715          */
    716         public static final int REJANG_ID = 163; /* [A930] */
    717 
    718         /**
    719          */
    720         public static final int CHAM_ID = 164; /* [AA00] */
    721 
    722         /**
    723          */
    724         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
    725 
    726         /**
    727          */
    728         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
    729 
    730         /**
    731          */
    732         public static final int LYCIAN_ID = 167; /* [10280] */
    733 
    734         /**
    735          */
    736         public static final int CARIAN_ID = 168; /* [102A0] */
    737 
    738         /**
    739          */
    740         public static final int LYDIAN_ID = 169; /* [10920] */
    741 
    742         /**
    743          */
    744         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
    745 
    746         /**
    747          */
    748         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
    749 
    750         /* New blocks in Unicode 5.2 */
    751 
    752         /***/
    753         public static final int SAMARITAN_ID = 172; /*[0800]*/
    754         /***/
    755         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
    756         /***/
    757         public static final int TAI_THAM_ID = 174; /*[1A20]*/
    758         /***/
    759         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
    760         /***/
    761         public static final int LISU_ID = 176; /*[A4D0]*/
    762         /***/
    763         public static final int BAMUM_ID = 177; /*[A6A0]*/
    764         /***/
    765         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
    766         /***/
    767         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
    768         /***/
    769         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
    770         /***/
    771         public static final int JAVANESE_ID = 181; /*[A980]*/
    772         /***/
    773         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
    774         /***/
    775         public static final int TAI_VIET_ID = 183; /*[AA80]*/
    776         /***/
    777         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
    778         /***/
    779         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
    780         /***/
    781         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
    782         /***/
    783         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
    784         /***/
    785         public static final int AVESTAN_ID = 188; /*[10B00]*/
    786         /***/
    787         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
    788         /***/
    789         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
    790         /***/
    791         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
    792         /***/
    793         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
    794         /***/
    795         public static final int KAITHI_ID = 193; /*[11080]*/
    796         /***/
    797         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
    798         /***/
    799         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
    800         /***/
    801         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
    802         /***/
    803         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
    804 
    805         /* New blocks in Unicode 6.0 */
    806 
    807         /***/
    808         public static final int MANDAIC_ID = 198; /*[0840]*/
    809         /***/
    810         public static final int BATAK_ID = 199; /*[1BC0]*/
    811         /***/
    812         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
    813         /***/
    814         public static final int BRAHMI_ID = 201; /*[11000]*/
    815         /***/
    816         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
    817         /***/
    818         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
    819         /***/
    820         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
    821         /***/
    822         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
    823         /***/
    824         public static final int EMOTICONS_ID = 206; /*[1F600]*/
    825         /***/
    826         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
    827         /***/
    828         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
    829         /***/
    830         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
    831 
    832         /* New blocks in Unicode 6.1 */
    833 
    834         /***/
    835         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
    836         /***/
    837         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
    838         /***/
    839         public static final int CHAKMA_ID = 212; /*[11100]*/
    840         /***/
    841         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
    842         /***/
    843         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
    844         /***/
    845         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
    846         /***/
    847         public static final int MIAO_ID = 216; /*[16F00]*/
    848         /***/
    849         public static final int SHARADA_ID = 217; /*[11180]*/
    850         /***/
    851         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
    852         /***/
    853         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
    854         /***/
    855         public static final int TAKRI_ID = 220; /*[11680]*/
    856 
    857         /* New blocks in Unicode 7.0 */
    858 
    859         /***/
    860         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
    861         /***/
    862         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
    863         /***/
    864         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
    865         /***/
    866         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
    867         /***/
    868         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
    869         /***/
    870         public static final int ELBASAN_ID = 226; /*[10500]*/
    871         /***/
    872         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
    873         /***/
    874         public static final int GRANTHA_ID = 228; /*[11300]*/
    875         /***/
    876         public static final int KHOJKI_ID = 229; /*[11200]*/
    877         /***/
    878         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
    879         /***/
    880         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
    881         /***/
    882         public static final int LINEAR_A_ID = 232; /*[10600]*/
    883         /***/
    884         public static final int MAHAJANI_ID = 233; /*[11150]*/
    885         /***/
    886         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
    887         /***/
    888         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
    889         /***/
    890         public static final int MODI_ID = 236; /*[11600]*/
    891         /***/
    892         public static final int MRO_ID = 237; /*[16A40]*/
    893         /***/
    894         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
    895         /***/
    896         public static final int NABATAEAN_ID = 239; /*[10880]*/
    897         /***/
    898         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
    899         /***/
    900         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
    901         /***/
    902         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
    903         /***/
    904         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
    905         /***/
    906         public static final int PALMYRENE_ID = 244; /*[10860]*/
    907         /***/
    908         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
    909         /***/
    910         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
    911         /***/
    912         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
    913         /***/
    914         public static final int SIDDHAM_ID = 248; /*[11580]*/
    915         /***/
    916         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
    917         /***/
    918         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
    919         /***/
    920         public static final int TIRHUTA_ID = 251; /*[11480]*/
    921         /***/
    922         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
    923 
    924         /* New blocks in Unicode 8.0 */
    925 
    926         /***/
    927         public static final int AHOM_ID = 253; /*[11700]*/
    928         /***/
    929         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
    930         /***/
    931         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
    932         /***/
    933         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
    934         /***/
    935         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
    936         /***/
    937         public static final int HATRAN_ID = 258; /*[108E0]*/
    938         /***/
    939         public static final int MULTANI_ID = 259; /*[11280]*/
    940         /***/
    941         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
    942         /***/
    943         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
    944         /***/
    945         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
    946 
    947         /* New blocks in Unicode 9.0 */
    948 
    949         /***/
    950         public static final int ADLAM_ID = 263; /*[1E900]*/
    951         /***/
    952         public static final int BHAIKSUKI_ID = 264; /*[11C00]*/
    953         /***/
    954         public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/
    955         /***/
    956         public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/
    957         /***/
    958         public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/
    959         /***/
    960         public static final int MARCHEN_ID = 268; /*[11C70]*/
    961         /***/
    962         public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/
    963         /***/
    964         public static final int NEWA_ID = 270; /*[11400]*/
    965         /***/
    966         public static final int OSAGE_ID = 271; /*[104B0]*/
    967         /***/
    968         public static final int TANGUT_ID = 272; /*[17000]*/
    969         /***/
    970         public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/
    971 
    972         // New blocks in Unicode 10.0
    973 
    974         /***/
    975         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/
    976         /***/
    977         public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/
    978         /***/
    979         public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/
    980         /***/
    981         public static final int NUSHU_ID = 277; /*[1B170]*/
    982         /***/
    983         public static final int SOYOMBO_ID = 278; /*[11A50]*/
    984         /***/
    985         public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/
    986         /***/
    987         public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/
    988 
    989         /**
    990          * One more than the highest normal UnicodeBlock value.
    991          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK).
    992          *
    993          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    994          * @hide unsupported on Android
    995          */
    996         @Deprecated
    997         public static final int COUNT = 281;
    998 
    999         // blocks objects ---------------------------------------------------
   1000 
   1001         /**
   1002          * Array of UnicodeBlocks, for easy access in getInstance(int)
   1003          */
   1004         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
   1005 
   1006         /**
   1007          */
   1008         public static final UnicodeBlock NO_BLOCK
   1009         = new UnicodeBlock("NO_BLOCK", 0);
   1010 
   1011         /**
   1012          */
   1013         public static final UnicodeBlock BASIC_LATIN
   1014         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
   1015         /**
   1016          */
   1017         public static final UnicodeBlock LATIN_1_SUPPLEMENT
   1018         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
   1019         /**
   1020          */
   1021         public static final UnicodeBlock LATIN_EXTENDED_A
   1022         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
   1023         /**
   1024          */
   1025         public static final UnicodeBlock LATIN_EXTENDED_B
   1026         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
   1027         /**
   1028          */
   1029         public static final UnicodeBlock IPA_EXTENSIONS
   1030         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
   1031         /**
   1032          */
   1033         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
   1034         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
   1035         /**
   1036          */
   1037         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
   1038         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
   1039         /**
   1040          * Unicode 3.2 renames this block to "Greek and Coptic".
   1041          */
   1042         public static final UnicodeBlock GREEK
   1043         = new UnicodeBlock("GREEK", GREEK_ID);
   1044         /**
   1045          */
   1046         public static final UnicodeBlock CYRILLIC
   1047         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
   1048         /**
   1049          */
   1050         public static final UnicodeBlock ARMENIAN
   1051         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
   1052         /**
   1053          */
   1054         public static final UnicodeBlock HEBREW
   1055         = new UnicodeBlock("HEBREW", HEBREW_ID);
   1056         /**
   1057          */
   1058         public static final UnicodeBlock ARABIC
   1059         = new UnicodeBlock("ARABIC", ARABIC_ID);
   1060         /**
   1061          */
   1062         public static final UnicodeBlock SYRIAC
   1063         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
   1064         /**
   1065          */
   1066         public static final UnicodeBlock THAANA
   1067         = new UnicodeBlock("THAANA", THAANA_ID);
   1068         /**
   1069          */
   1070         public static final UnicodeBlock DEVANAGARI
   1071         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
   1072         /**
   1073          */
   1074         public static final UnicodeBlock BENGALI
   1075         = new UnicodeBlock("BENGALI", BENGALI_ID);
   1076         /**
   1077          */
   1078         public static final UnicodeBlock GURMUKHI
   1079         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
   1080         /**
   1081          */
   1082         public static final UnicodeBlock GUJARATI
   1083         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
   1084         /**
   1085          */
   1086         public static final UnicodeBlock ORIYA
   1087         = new UnicodeBlock("ORIYA", ORIYA_ID);
   1088         /**
   1089          */
   1090         public static final UnicodeBlock TAMIL
   1091         = new UnicodeBlock("TAMIL", TAMIL_ID);
   1092         /**
   1093          */
   1094         public static final UnicodeBlock TELUGU
   1095         = new UnicodeBlock("TELUGU", TELUGU_ID);
   1096         /**
   1097          */
   1098         public static final UnicodeBlock KANNADA
   1099         = new UnicodeBlock("KANNADA", KANNADA_ID);
   1100         /**
   1101          */
   1102         public static final UnicodeBlock MALAYALAM
   1103         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
   1104         /**
   1105          */
   1106         public static final UnicodeBlock SINHALA
   1107         = new UnicodeBlock("SINHALA", SINHALA_ID);
   1108         /**
   1109          */
   1110         public static final UnicodeBlock THAI
   1111         = new UnicodeBlock("THAI", THAI_ID);
   1112         /**
   1113          */
   1114         public static final UnicodeBlock LAO
   1115         = new UnicodeBlock("LAO", LAO_ID);
   1116         /**
   1117          */
   1118         public static final UnicodeBlock TIBETAN
   1119         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
   1120         /**
   1121          */
   1122         public static final UnicodeBlock MYANMAR
   1123         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
   1124         /**
   1125          */
   1126         public static final UnicodeBlock GEORGIAN
   1127         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
   1128         /**
   1129          */
   1130         public static final UnicodeBlock HANGUL_JAMO
   1131         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
   1132         /**
   1133          */
   1134         public static final UnicodeBlock ETHIOPIC
   1135         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
   1136         /**
   1137          */
   1138         public static final UnicodeBlock CHEROKEE
   1139         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
   1140         /**
   1141          */
   1142         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
   1143         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
   1144                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
   1145         /**
   1146          */
   1147         public static final UnicodeBlock OGHAM
   1148         = new UnicodeBlock("OGHAM", OGHAM_ID);
   1149         /**
   1150          */
   1151         public static final UnicodeBlock RUNIC
   1152         = new UnicodeBlock("RUNIC", RUNIC_ID);
   1153         /**
   1154          */
   1155         public static final UnicodeBlock KHMER
   1156         = new UnicodeBlock("KHMER", KHMER_ID);
   1157         /**
   1158          */
   1159         public static final UnicodeBlock MONGOLIAN
   1160         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
   1161         /**
   1162          */
   1163         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
   1164         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
   1165         /**
   1166          */
   1167         public static final UnicodeBlock GREEK_EXTENDED
   1168         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
   1169         /**
   1170          */
   1171         public static final UnicodeBlock GENERAL_PUNCTUATION
   1172         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
   1173         /**
   1174          */
   1175         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
   1176         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
   1177         /**
   1178          */
   1179         public static final UnicodeBlock CURRENCY_SYMBOLS
   1180         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
   1181         /**
   1182          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
   1183          * Symbols".
   1184          */
   1185         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
   1186         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
   1187         /**
   1188          */
   1189         public static final UnicodeBlock LETTERLIKE_SYMBOLS
   1190         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
   1191         /**
   1192          */
   1193         public static final UnicodeBlock NUMBER_FORMS
   1194         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
   1195         /**
   1196          */
   1197         public static final UnicodeBlock ARROWS
   1198         = new UnicodeBlock("ARROWS", ARROWS_ID);
   1199         /**
   1200          */
   1201         public static final UnicodeBlock MATHEMATICAL_OPERATORS
   1202         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
   1203         /**
   1204          */
   1205         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
   1206         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
   1207         /**
   1208          */
   1209         public static final UnicodeBlock CONTROL_PICTURES
   1210         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
   1211         /**
   1212          */
   1213         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
   1214         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
   1215         /**
   1216          */
   1217         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
   1218         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
   1219         /**
   1220          */
   1221         public static final UnicodeBlock BOX_DRAWING
   1222         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
   1223         /**
   1224          */
   1225         public static final UnicodeBlock BLOCK_ELEMENTS
   1226         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
   1227         /**
   1228          */
   1229         public static final UnicodeBlock GEOMETRIC_SHAPES
   1230         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
   1231         /**
   1232          */
   1233         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
   1234         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
   1235         /**
   1236          */
   1237         public static final UnicodeBlock DINGBATS
   1238         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
   1239         /**
   1240          */
   1241         public static final UnicodeBlock BRAILLE_PATTERNS
   1242         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
   1243         /**
   1244          */
   1245         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
   1246         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
   1247         /**
   1248          */
   1249         public static final UnicodeBlock KANGXI_RADICALS
   1250         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
   1251         /**
   1252          */
   1253         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
   1254         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
   1255                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
   1256         /**
   1257          */
   1258         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
   1259         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
   1260         /**
   1261          */
   1262         public static final UnicodeBlock HIRAGANA
   1263         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
   1264         /**
   1265          */
   1266         public static final UnicodeBlock KATAKANA
   1267         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
   1268         /**
   1269          */
   1270         public static final UnicodeBlock BOPOMOFO
   1271         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
   1272         /**
   1273          */
   1274         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
   1275         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
   1276         /**
   1277          */
   1278         public static final UnicodeBlock KANBUN
   1279         = new UnicodeBlock("KANBUN", KANBUN_ID);
   1280         /**
   1281          */
   1282         public static final UnicodeBlock BOPOMOFO_EXTENDED
   1283         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
   1284         /**
   1285          */
   1286         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
   1287         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
   1288                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
   1289         /**
   1290          */
   1291         public static final UnicodeBlock CJK_COMPATIBILITY
   1292         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
   1293         /**
   1294          */
   1295         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
   1296         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
   1297                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
   1298         /**
   1299          */
   1300         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
   1301         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
   1302         /**
   1303          */
   1304         public static final UnicodeBlock YI_SYLLABLES
   1305         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
   1306         /**
   1307          */
   1308         public static final UnicodeBlock YI_RADICALS
   1309         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
   1310         /**
   1311          */
   1312         public static final UnicodeBlock HANGUL_SYLLABLES
   1313         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
   1314         /**
   1315          */
   1316         public static final UnicodeBlock HIGH_SURROGATES
   1317         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
   1318         /**
   1319          */
   1320         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
   1321         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
   1322         /**
   1323          */
   1324         public static final UnicodeBlock LOW_SURROGATES
   1325         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
   1326         /**
   1327          * Same as public static final int PRIVATE_USE.
   1328          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
   1329          * and multiple code point ranges had this block.
   1330          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
   1331          * and adds separate blocks for the supplementary PUAs.
   1332          */
   1333         public static final UnicodeBlock PRIVATE_USE_AREA
   1334         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
   1335         /**
   1336          * Same as public static final int PRIVATE_USE_AREA.
   1337          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
   1338          * and multiple code point ranges had this block.
   1339          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
   1340          * and adds separate blocks for the supplementary PUAs.
   1341          */
   1342         public static final UnicodeBlock PRIVATE_USE
   1343         = PRIVATE_USE_AREA;
   1344         /**
   1345          */
   1346         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
   1347         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
   1348         /**
   1349          */
   1350         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
   1351         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
   1352         /**
   1353          */
   1354         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
   1355         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
   1356         /**
   1357          */
   1358         public static final UnicodeBlock COMBINING_HALF_MARKS
   1359         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
   1360         /**
   1361          */
   1362         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
   1363         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
   1364         /**
   1365          */
   1366         public static final UnicodeBlock SMALL_FORM_VARIANTS
   1367         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
   1368         /**
   1369          */
   1370         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
   1371         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
   1372         /**
   1373          */
   1374         public static final UnicodeBlock SPECIALS
   1375         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
   1376         /**
   1377          */
   1378         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
   1379         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
   1380         /**
   1381          */
   1382         public static final UnicodeBlock OLD_ITALIC
   1383         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
   1384         /**
   1385          */
   1386         public static final UnicodeBlock GOTHIC
   1387         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
   1388         /**
   1389          */
   1390         public static final UnicodeBlock DESERET
   1391         = new UnicodeBlock("DESERET", DESERET_ID);
   1392         /**
   1393          */
   1394         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
   1395         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
   1396         /**
   1397          */
   1398         public static final UnicodeBlock MUSICAL_SYMBOLS
   1399         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
   1400         /**
   1401          */
   1402         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
   1403         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
   1404                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
   1405         /**
   1406          */
   1407         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
   1408         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
   1409                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
   1410         /**
   1411          */
   1412         public static final UnicodeBlock
   1413         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
   1414         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
   1415                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
   1416         /**
   1417          */
   1418         public static final UnicodeBlock TAGS
   1419         = new UnicodeBlock("TAGS", TAGS_ID);
   1420 
   1421         // New blocks in Unicode 3.2
   1422 
   1423         /**
   1424          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
   1425          */
   1426         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
   1427         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
   1428         /**
   1429          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
   1430          */
   1431         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
   1432         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
   1433         /**
   1434          */
   1435         public static final UnicodeBlock TAGALOG
   1436         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
   1437         /**
   1438          */
   1439         public static final UnicodeBlock HANUNOO
   1440         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
   1441         /**
   1442          */
   1443         public static final UnicodeBlock BUHID
   1444         = new UnicodeBlock("BUHID", BUHID_ID);
   1445         /**
   1446          */
   1447         public static final UnicodeBlock TAGBANWA
   1448         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
   1449         /**
   1450          */
   1451         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
   1452         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
   1453                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
   1454         /**
   1455          */
   1456         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
   1457         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
   1458         /**
   1459          */
   1460         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
   1461         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
   1462         /**
   1463          */
   1464         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
   1465         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
   1466                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
   1467         /**
   1468          */
   1469         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
   1470         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
   1471                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
   1472         /**
   1473          */
   1474         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
   1475         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
   1476         /**
   1477          */
   1478         public static final UnicodeBlock VARIATION_SELECTORS
   1479         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
   1480         /**
   1481          */
   1482         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
   1483         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
   1484                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
   1485         /**
   1486          */
   1487         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
   1488         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
   1489                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
   1490 
   1491         /**
   1492          */
   1493         public static final UnicodeBlock LIMBU
   1494         = new UnicodeBlock("LIMBU", LIMBU_ID);
   1495         /**
   1496          */
   1497         public static final UnicodeBlock TAI_LE
   1498         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
   1499         /**
   1500          */
   1501         public static final UnicodeBlock KHMER_SYMBOLS
   1502         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
   1503 
   1504         /**
   1505          */
   1506         public static final UnicodeBlock PHONETIC_EXTENSIONS
   1507         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
   1508 
   1509         /**
   1510          */
   1511         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
   1512         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
   1513                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
   1514         /**
   1515          */
   1516         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
   1517         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
   1518         /**
   1519          */
   1520         public static final UnicodeBlock LINEAR_B_SYLLABARY
   1521         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
   1522         /**
   1523          */
   1524         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
   1525         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
   1526         /**
   1527          */
   1528         public static final UnicodeBlock AEGEAN_NUMBERS
   1529         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
   1530         /**
   1531          */
   1532         public static final UnicodeBlock UGARITIC
   1533         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
   1534         /**
   1535          */
   1536         public static final UnicodeBlock SHAVIAN
   1537         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
   1538         /**
   1539          */
   1540         public static final UnicodeBlock OSMANYA
   1541         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
   1542         /**
   1543          */
   1544         public static final UnicodeBlock CYPRIOT_SYLLABARY
   1545         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
   1546         /**
   1547          */
   1548         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
   1549         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
   1550 
   1551         /**
   1552          */
   1553         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
   1554         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
   1555 
   1556         /* New blocks in Unicode 4.1 */
   1557 
   1558         /**
   1559          */
   1560         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
   1561                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
   1562                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
   1563 
   1564         /**
   1565          */
   1566         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
   1567                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
   1568 
   1569         /**
   1570          */
   1571         public static final UnicodeBlock ARABIC_SUPPLEMENT =
   1572                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
   1573 
   1574         /**
   1575          */
   1576         public static final UnicodeBlock BUGINESE =
   1577                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
   1578 
   1579         /**
   1580          */
   1581         public static final UnicodeBlock CJK_STROKES =
   1582                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
   1583 
   1584         /**
   1585          */
   1586         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
   1587                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
   1588                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
   1589 
   1590         /**
   1591          */
   1592         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
   1593 
   1594         /**
   1595          */
   1596         public static final UnicodeBlock ETHIOPIC_EXTENDED =
   1597                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
   1598 
   1599         /**
   1600          */
   1601         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
   1602                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
   1603 
   1604         /**
   1605          */
   1606         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
   1607                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
   1608 
   1609         /**
   1610          */
   1611         public static final UnicodeBlock GLAGOLITIC =
   1612                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
   1613 
   1614         /**
   1615          */
   1616         public static final UnicodeBlock KHAROSHTHI =
   1617                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
   1618 
   1619         /**
   1620          */
   1621         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
   1622                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
   1623 
   1624         /**
   1625          */
   1626         public static final UnicodeBlock NEW_TAI_LUE =
   1627                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
   1628 
   1629         /**
   1630          */
   1631         public static final UnicodeBlock OLD_PERSIAN =
   1632                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
   1633 
   1634         /**
   1635          */
   1636         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
   1637                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
   1638                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
   1639 
   1640         /**
   1641          */
   1642         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
   1643                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
   1644 
   1645         /**
   1646          */
   1647         public static final UnicodeBlock SYLOTI_NAGRI =
   1648                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
   1649 
   1650         /**
   1651          */
   1652         public static final UnicodeBlock TIFINAGH =
   1653                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
   1654 
   1655         /**
   1656          */
   1657         public static final UnicodeBlock VERTICAL_FORMS =
   1658                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
   1659 
   1660         /**
   1661          */
   1662         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
   1663         /**
   1664          */
   1665         public static final UnicodeBlock BALINESE =
   1666                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
   1667         /**
   1668          */
   1669         public static final UnicodeBlock LATIN_EXTENDED_C =
   1670                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
   1671         /**
   1672          */
   1673         public static final UnicodeBlock LATIN_EXTENDED_D =
   1674                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
   1675         /**
   1676          */
   1677         public static final UnicodeBlock PHAGS_PA =
   1678                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
   1679         /**
   1680          */
   1681         public static final UnicodeBlock PHOENICIAN =
   1682                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
   1683         /**
   1684          */
   1685         public static final UnicodeBlock CUNEIFORM =
   1686                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
   1687         /**
   1688          */
   1689         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
   1690                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
   1691                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
   1692         /**
   1693          */
   1694         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
   1695                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
   1696 
   1697         /**
   1698          */
   1699         public static final UnicodeBlock SUNDANESE =
   1700                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
   1701 
   1702         /**
   1703          */
   1704         public static final UnicodeBlock LEPCHA =
   1705                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
   1706 
   1707         /**
   1708          */
   1709         public static final UnicodeBlock OL_CHIKI =
   1710                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
   1711 
   1712         /**
   1713          */
   1714         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
   1715                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
   1716 
   1717         /**
   1718          */
   1719         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
   1720 
   1721         /**
   1722          */
   1723         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
   1724                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
   1725 
   1726         /**
   1727          */
   1728         public static final UnicodeBlock SAURASHTRA =
   1729                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
   1730 
   1731         /**
   1732          */
   1733         public static final UnicodeBlock KAYAH_LI =
   1734                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
   1735 
   1736         /**
   1737          */
   1738         public static final UnicodeBlock REJANG =
   1739                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
   1740 
   1741         /**
   1742          */
   1743         public static final UnicodeBlock CHAM =
   1744                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
   1745 
   1746         /**
   1747          */
   1748         public static final UnicodeBlock ANCIENT_SYMBOLS =
   1749                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
   1750 
   1751         /**
   1752          */
   1753         public static final UnicodeBlock PHAISTOS_DISC =
   1754                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
   1755 
   1756         /**
   1757          */
   1758         public static final UnicodeBlock LYCIAN =
   1759                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
   1760 
   1761         /**
   1762          */
   1763         public static final UnicodeBlock CARIAN =
   1764                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
   1765 
   1766         /**
   1767          */
   1768         public static final UnicodeBlock LYDIAN =
   1769                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
   1770 
   1771         /**
   1772          */
   1773         public static final UnicodeBlock MAHJONG_TILES =
   1774                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
   1775 
   1776         /**
   1777          */
   1778         public static final UnicodeBlock DOMINO_TILES =
   1779                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
   1780 
   1781         /* New blocks in Unicode 5.2 */
   1782 
   1783         /***/
   1784         public static final UnicodeBlock SAMARITAN =
   1785                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
   1786         /***/
   1787         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
   1788                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
   1789                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
   1790         /***/
   1791         public static final UnicodeBlock TAI_THAM =
   1792                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
   1793         /***/
   1794         public static final UnicodeBlock VEDIC_EXTENSIONS =
   1795                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
   1796         /***/
   1797         public static final UnicodeBlock LISU =
   1798                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
   1799         /***/
   1800         public static final UnicodeBlock BAMUM =
   1801                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
   1802         /***/
   1803         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
   1804                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
   1805         /***/
   1806         public static final UnicodeBlock DEVANAGARI_EXTENDED =
   1807                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
   1808         /***/
   1809         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
   1810                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
   1811         /***/
   1812         public static final UnicodeBlock JAVANESE =
   1813                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
   1814         /***/
   1815         public static final UnicodeBlock MYANMAR_EXTENDED_A =
   1816                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
   1817         /***/
   1818         public static final UnicodeBlock TAI_VIET =
   1819                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
   1820         /***/
   1821         public static final UnicodeBlock MEETEI_MAYEK =
   1822                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
   1823         /***/
   1824         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
   1825                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
   1826         /***/
   1827         public static final UnicodeBlock IMPERIAL_ARAMAIC =
   1828                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
   1829         /***/
   1830         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
   1831                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
   1832         /***/
   1833         public static final UnicodeBlock AVESTAN =
   1834                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
   1835         /***/
   1836         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
   1837                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
   1838         /***/
   1839         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
   1840                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
   1841         /***/
   1842         public static final UnicodeBlock OLD_TURKIC =
   1843                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
   1844         /***/
   1845         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
   1846                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
   1847         /***/
   1848         public static final UnicodeBlock KAITHI =
   1849                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
   1850         /***/
   1851         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
   1852                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
   1853         /***/
   1854         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
   1855                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
   1856                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
   1857         /***/
   1858         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
   1859                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
   1860                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
   1861         /***/
   1862         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
   1863                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
   1864                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
   1865 
   1866         /* New blocks in Unicode 6.0 */
   1867 
   1868         /***/
   1869         public static final UnicodeBlock MANDAIC =
   1870                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
   1871         /***/
   1872         public static final UnicodeBlock BATAK =
   1873                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
   1874         /***/
   1875         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
   1876                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
   1877         /***/
   1878         public static final UnicodeBlock BRAHMI =
   1879                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
   1880         /***/
   1881         public static final UnicodeBlock BAMUM_SUPPLEMENT =
   1882                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
   1883         /***/
   1884         public static final UnicodeBlock KANA_SUPPLEMENT =
   1885                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
   1886         /***/
   1887         public static final UnicodeBlock PLAYING_CARDS =
   1888                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
   1889         /***/
   1890         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
   1891                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
   1892                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
   1893         /***/
   1894         public static final UnicodeBlock EMOTICONS =
   1895                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
   1896         /***/
   1897         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
   1898                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
   1899         /***/
   1900         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
   1901                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
   1902         /***/
   1903         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
   1904                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
   1905                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
   1906 
   1907         /* New blocks in Unicode 6.1 */
   1908 
   1909         /***/
   1910         public static final UnicodeBlock ARABIC_EXTENDED_A =
   1911                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
   1912         /***/
   1913         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
   1914                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
   1915         /***/
   1916         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
   1917         /***/
   1918         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
   1919                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
   1920         /***/
   1921         public static final UnicodeBlock MEROITIC_CURSIVE =
   1922                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
   1923         /***/
   1924         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
   1925                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
   1926         /***/
   1927         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
   1928         /***/
   1929         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
   1930         /***/
   1931         public static final UnicodeBlock SORA_SOMPENG =
   1932                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
   1933         /***/
   1934         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
   1935                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
   1936         /***/
   1937         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
   1938 
   1939         /* New blocks in Unicode 7.0 */
   1940 
   1941         /***/
   1942         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
   1943         /***/
   1944         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
   1945                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
   1946         /***/
   1947         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
   1948                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
   1949         /***/
   1950         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
   1951                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
   1952         /***/
   1953         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
   1954         /***/
   1955         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
   1956         /***/
   1957         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
   1958                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
   1959         /***/
   1960         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
   1961         /***/
   1962         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
   1963         /***/
   1964         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
   1965         /***/
   1966         public static final UnicodeBlock LATIN_EXTENDED_E =
   1967                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
   1968         /***/
   1969         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
   1970         /***/
   1971         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
   1972         /***/
   1973         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
   1974         /***/
   1975         public static final UnicodeBlock MENDE_KIKAKUI =
   1976                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
   1977         /***/
   1978         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
   1979         /***/
   1980         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
   1981         /***/
   1982         public static final UnicodeBlock MYANMAR_EXTENDED_B =
   1983                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
   1984         /***/
   1985         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
   1986         /***/
   1987         public static final UnicodeBlock OLD_NORTH_ARABIAN =
   1988                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
   1989         /***/
   1990         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
   1991         /***/
   1992         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
   1993                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
   1994         /***/
   1995         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
   1996         /***/
   1997         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
   1998         /***/
   1999         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
   2000         /***/
   2001         public static final UnicodeBlock PSALTER_PAHLAVI =
   2002                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
   2003         /***/
   2004         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
   2005                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
   2006         /***/
   2007         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
   2008         /***/
   2009         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
   2010                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
   2011         /***/
   2012         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
   2013                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
   2014         /***/
   2015         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
   2016         /***/
   2017         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
   2018 
   2019         /* New blocks in Unicode 8.0 */
   2020 
   2021         /***/
   2022         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
   2023         /***/
   2024         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
   2025                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
   2026         /***/
   2027         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
   2028                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
   2029         /***/
   2030         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
   2031                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
   2032                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
   2033         /***/
   2034         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
   2035                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
   2036         /***/
   2037         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
   2038         /***/
   2039         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
   2040         /***/
   2041         public static final UnicodeBlock OLD_HUNGARIAN =
   2042                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
   2043         /***/
   2044         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
   2045                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
   2046                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
   2047         /***/
   2048         public static final UnicodeBlock SUTTON_SIGNWRITING =
   2049                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
   2050 
   2051         /* New blocks in Unicode 9.0 */
   2052 
   2053         /***/
   2054         public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/
   2055         /***/
   2056         public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/
   2057         /***/
   2058         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
   2059                 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/
   2060         /***/
   2061         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
   2062                 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/
   2063         /***/
   2064         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
   2065                 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/
   2066         /***/
   2067         public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/
   2068         /***/
   2069         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
   2070                 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/
   2071         /***/
   2072         public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/
   2073         /***/
   2074         public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/
   2075         /***/
   2076         public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/
   2077         /***/
   2078         public static final UnicodeBlock TANGUT_COMPONENTS =
   2079                 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/
   2080 
   2081         // New blocks in Unicode 10.0
   2082 
   2083         /***/
   2084         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
   2085                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/
   2086         /***/
   2087         public static final UnicodeBlock KANA_EXTENDED_A =
   2088                 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/
   2089         /***/
   2090         public static final UnicodeBlock MASARAM_GONDI =
   2091                 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/
   2092         /***/
   2093         public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/
   2094         /***/
   2095         public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/
   2096         /***/
   2097         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
   2098                 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/
   2099         /***/
   2100         public static final UnicodeBlock ZANABAZAR_SQUARE =
   2101                 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/
   2102 
   2103         /**
   2104          */
   2105         public static final UnicodeBlock INVALID_CODE
   2106         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
   2107 
   2108         static {
   2109             for (int blockId = 0; blockId < COUNT; ++blockId) {
   2110                 if (BLOCKS_[blockId] == null) {
   2111                     throw new java.lang.IllegalStateException(
   2112                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
   2113                 }
   2114             }
   2115         }
   2116 
   2117         // public methods --------------------------------------------------
   2118 
   2119         /**
   2120          * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID.
   2121          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
   2122          * @param id UnicodeBlock ID
   2123          * @return the only instance of the UnicodeBlock with the argument ID
   2124          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
   2125          *         returned.
   2126          */
   2127         public static UnicodeBlock getInstance(int id)
   2128         {
   2129             if (id >= 0 && id < BLOCKS_.length) {
   2130                 return BLOCKS_[id];
   2131             }
   2132             return INVALID_CODE;
   2133         }
   2134 
   2135         /**
   2136          * Returns the Unicode allocation block that contains the code point,
   2137          * or null if the code point is not a member of a defined block.
   2138          * @param ch code point to be tested
   2139          * @return the Unicode allocation block that contains the code point
   2140          */
   2141         public static UnicodeBlock of(int ch)
   2142         {
   2143             if (ch > MAX_VALUE) {
   2144                 return INVALID_CODE;
   2145             }
   2146 
   2147             return UnicodeBlock.getInstance(
   2148                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
   2149         }
   2150 
   2151         /**
   2152          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
   2153          * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike
   2154          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
   2155          * against the official UCD name and the Java block name
   2156          * (ignoring case).
   2157          * @param blockName the name of the block to match
   2158          * @return the UnicodeBlock with that name
   2159          * @throws IllegalArgumentException if the blockName could not be matched
   2160          */
   2161         public static final UnicodeBlock forName(String blockName) {
   2162             Map<String, UnicodeBlock> m = null;
   2163             if (mref != null) {
   2164                 m = mref.get();
   2165             }
   2166             if (m == null) {
   2167                 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length);
   2168                 for (int i = 0; i < BLOCKS_.length; ++i) {
   2169                     UnicodeBlock b = BLOCKS_[i];
   2170                     String name = trimBlockName(
   2171                             getPropertyValueName(UProperty.BLOCK, b.getID(),
   2172                                     UProperty.NameChoice.LONG));
   2173                     m.put(name, b);
   2174                 }
   2175                 mref = new SoftReference<Map<String, UnicodeBlock>>(m);
   2176             }
   2177             UnicodeBlock b = m.get(trimBlockName(blockName));
   2178             if (b == null) {
   2179                 throw new IllegalArgumentException();
   2180             }
   2181             return b;
   2182         }
   2183         private static SoftReference<Map<String, UnicodeBlock>> mref;
   2184 
   2185         private static String trimBlockName(String name) {
   2186             String upper = name.toUpperCase(Locale.ENGLISH);
   2187             StringBuilder result = new StringBuilder(upper.length());
   2188             for (int i = 0; i < upper.length(); i++) {
   2189                 char c = upper.charAt(i);
   2190                 if (c != ' ' && c != '_' && c != '-') {
   2191                     result.append(c);
   2192                 }
   2193             }
   2194             return result.toString();
   2195         }
   2196 
   2197         /**
   2198          * {icu} Returns the type ID of this Unicode block
   2199          * @return integer type ID of this Unicode block
   2200          */
   2201         public int getID()
   2202         {
   2203             return m_id_;
   2204         }
   2205 
   2206         // private data members ---------------------------------------------
   2207 
   2208         /**
   2209          * Identification code for this UnicodeBlock
   2210          */
   2211         private int m_id_;
   2212 
   2213         // private constructor ----------------------------------------------
   2214 
   2215         /**
   2216          * UnicodeBlock constructor
   2217          * @param name name of this UnicodeBlock
   2218          * @param id unique id of this UnicodeBlock
   2219          * @exception NullPointerException if name is <code>null</code>
   2220          */
   2221         private UnicodeBlock(String name, int id)
   2222         {
   2223             super(name);
   2224             m_id_ = id;
   2225             if (id >= 0) {
   2226                 BLOCKS_[id] = this;
   2227             }
   2228         }
   2229     }
   2230 
   2231     /**
   2232      * East Asian Width constants.
   2233      * @see UProperty#EAST_ASIAN_WIDTH
   2234      * @see UCharacter#getIntPropertyValue
   2235      */
   2236     public static interface EastAsianWidth
   2237     {
   2238         /**
   2239          */
   2240         public static final int NEUTRAL = 0;
   2241         /**
   2242          */
   2243         public static final int AMBIGUOUS = 1;
   2244         /**
   2245          */
   2246         public static final int HALFWIDTH = 2;
   2247         /**
   2248          */
   2249         public static final int FULLWIDTH = 3;
   2250         /**
   2251          */
   2252         public static final int NARROW = 4;
   2253         /**
   2254          */
   2255         public static final int WIDE = 5;
   2256         /**
   2257          * One more than the highest normal EastAsianWidth value.
   2258          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH).
   2259          *
   2260          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2261          * @hide unsupported on Android
   2262          */
   2263         @Deprecated
   2264         public static final int COUNT = 6;
   2265     }
   2266 
   2267     /**
   2268      * Decomposition Type constants.
   2269      * @see UProperty#DECOMPOSITION_TYPE
   2270      */
   2271     public static interface DecompositionType
   2272     {
   2273         /**
   2274          */
   2275         public static final int NONE = 0;
   2276         /**
   2277          */
   2278         public static final int CANONICAL = 1;
   2279         /**
   2280          */
   2281         public static final int COMPAT = 2;
   2282         /**
   2283          */
   2284         public static final int CIRCLE = 3;
   2285         /**
   2286          */
   2287         public static final int FINAL = 4;
   2288         /**
   2289          */
   2290         public static final int FONT = 5;
   2291         /**
   2292          */
   2293         public static final int FRACTION = 6;
   2294         /**
   2295          */
   2296         public static final int INITIAL = 7;
   2297         /**
   2298          */
   2299         public static final int ISOLATED = 8;
   2300         /**
   2301          */
   2302         public static final int MEDIAL = 9;
   2303         /**
   2304          */
   2305         public static final int NARROW = 10;
   2306         /**
   2307          */
   2308         public static final int NOBREAK = 11;
   2309         /**
   2310          */
   2311         public static final int SMALL = 12;
   2312         /**
   2313          */
   2314         public static final int SQUARE = 13;
   2315         /**
   2316          */
   2317         public static final int SUB = 14;
   2318         /**
   2319          */
   2320         public static final int SUPER = 15;
   2321         /**
   2322          */
   2323         public static final int VERTICAL = 16;
   2324         /**
   2325          */
   2326         public static final int WIDE = 17;
   2327         /**
   2328          * One more than the highest normal DecompositionType value.
   2329          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE).
   2330          *
   2331          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2332          * @hide unsupported on Android
   2333          */
   2334         @Deprecated
   2335         public static final int COUNT = 18;
   2336     }
   2337 
   2338     /**
   2339      * Joining Type constants.
   2340      * @see UProperty#JOINING_TYPE
   2341      */
   2342     public static interface JoiningType
   2343     {
   2344         /**
   2345          */
   2346         public static final int NON_JOINING = 0;
   2347         /**
   2348          */
   2349         public static final int JOIN_CAUSING = 1;
   2350         /**
   2351          */
   2352         public static final int DUAL_JOINING = 2;
   2353         /**
   2354          */
   2355         public static final int LEFT_JOINING = 3;
   2356         /**
   2357          */
   2358         public static final int RIGHT_JOINING = 4;
   2359         /**
   2360          */
   2361         public static final int TRANSPARENT = 5;
   2362         /**
   2363          * One more than the highest normal JoiningType value.
   2364          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE).
   2365          *
   2366          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2367          * @hide unsupported on Android
   2368          */
   2369         @Deprecated
   2370         public static final int COUNT = 6;
   2371     }
   2372 
   2373     /**
   2374      * Joining Group constants.
   2375      * @see UProperty#JOINING_GROUP
   2376      */
   2377     public static interface JoiningGroup
   2378     {
   2379         /**
   2380          */
   2381         public static final int NO_JOINING_GROUP = 0;
   2382         /**
   2383          */
   2384         public static final int AIN = 1;
   2385         /**
   2386          */
   2387         public static final int ALAPH = 2;
   2388         /**
   2389          */
   2390         public static final int ALEF = 3;
   2391         /**
   2392          */
   2393         public static final int BEH = 4;
   2394         /**
   2395          */
   2396         public static final int BETH = 5;
   2397         /**
   2398          */
   2399         public static final int DAL = 6;
   2400         /**
   2401          */
   2402         public static final int DALATH_RISH = 7;
   2403         /**
   2404          */
   2405         public static final int E = 8;
   2406         /**
   2407          */
   2408         public static final int FEH = 9;
   2409         /**
   2410          */
   2411         public static final int FINAL_SEMKATH = 10;
   2412         /**
   2413          */
   2414         public static final int GAF = 11;
   2415         /**
   2416          */
   2417         public static final int GAMAL = 12;
   2418         /**
   2419          */
   2420         public static final int HAH = 13;
   2421         /***/
   2422         public static final int TEH_MARBUTA_GOAL = 14;
   2423         /**
   2424          */
   2425         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
   2426         /**
   2427          */
   2428         public static final int HE = 15;
   2429         /**
   2430          */
   2431         public static final int HEH = 16;
   2432         /**
   2433          */
   2434         public static final int HEH_GOAL = 17;
   2435         /**
   2436          */
   2437         public static final int HETH = 18;
   2438         /**
   2439          */
   2440         public static final int KAF = 19;
   2441         /**
   2442          */
   2443         public static final int KAPH = 20;
   2444         /**
   2445          */
   2446         public static final int KNOTTED_HEH = 21;
   2447         /**
   2448          */
   2449         public static final int LAM = 22;
   2450         /**
   2451          */
   2452         public static final int LAMADH = 23;
   2453         /**
   2454          */
   2455         public static final int MEEM = 24;
   2456         /**
   2457          */
   2458         public static final int MIM = 25;
   2459         /**
   2460          */
   2461         public static final int NOON = 26;
   2462         /**
   2463          */
   2464         public static final int NUN = 27;
   2465         /**
   2466          */
   2467         public static final int PE = 28;
   2468         /**
   2469          */
   2470         public static final int QAF = 29;
   2471         /**
   2472          */
   2473         public static final int QAPH = 30;
   2474         /**
   2475          */
   2476         public static final int REH = 31;
   2477         /**
   2478          */
   2479         public static final int REVERSED_PE = 32;
   2480         /**
   2481          */
   2482         public static final int SAD = 33;
   2483         /**
   2484          */
   2485         public static final int SADHE = 34;
   2486         /**
   2487          */
   2488         public static final int SEEN = 35;
   2489         /**
   2490          */
   2491         public static final int SEMKATH = 36;
   2492         /**
   2493          */
   2494         public static final int SHIN = 37;
   2495         /**
   2496          */
   2497         public static final int SWASH_KAF = 38;
   2498         /**
   2499          */
   2500         public static final int SYRIAC_WAW = 39;
   2501         /**
   2502          */
   2503         public static final int TAH = 40;
   2504         /**
   2505          */
   2506         public static final int TAW = 41;
   2507         /**
   2508          */
   2509         public static final int TEH_MARBUTA = 42;
   2510         /**
   2511          */
   2512         public static final int TETH = 43;
   2513         /**
   2514          */
   2515         public static final int WAW = 44;
   2516         /**
   2517          */
   2518         public static final int YEH = 45;
   2519         /**
   2520          */
   2521         public static final int YEH_BARREE = 46;
   2522         /**
   2523          */
   2524         public static final int YEH_WITH_TAIL = 47;
   2525         /**
   2526          */
   2527         public static final int YUDH = 48;
   2528         /**
   2529          */
   2530         public static final int YUDH_HE = 49;
   2531         /**
   2532          */
   2533         public static final int ZAIN = 50;
   2534         /**
   2535          */
   2536         public static final int FE = 51;
   2537         /**
   2538          */
   2539         public static final int KHAPH = 52;
   2540         /**
   2541          */
   2542         public static final int ZHAIN = 53;
   2543         /**
   2544          */
   2545         public static final int BURUSHASKI_YEH_BARREE = 54;
   2546         /***/
   2547         public static final int FARSI_YEH = 55;
   2548         /***/
   2549         public static final int NYA = 56;
   2550         /***/
   2551         public static final int ROHINGYA_YEH = 57;
   2552 
   2553         /***/
   2554         public static final int MANICHAEAN_ALEPH = 58;
   2555         /***/
   2556         public static final int MANICHAEAN_AYIN = 59;
   2557         /***/
   2558         public static final int MANICHAEAN_BETH = 60;
   2559         /***/
   2560         public static final int MANICHAEAN_DALETH = 61;
   2561         /***/
   2562         public static final int MANICHAEAN_DHAMEDH = 62;
   2563         /***/
   2564         public static final int MANICHAEAN_FIVE = 63;
   2565         /***/
   2566         public static final int MANICHAEAN_GIMEL = 64;
   2567         /***/
   2568         public static final int MANICHAEAN_HETH = 65;
   2569         /***/
   2570         public static final int MANICHAEAN_HUNDRED = 66;
   2571         /***/
   2572         public static final int MANICHAEAN_KAPH = 67;
   2573         /***/
   2574         public static final int MANICHAEAN_LAMEDH = 68;
   2575         /***/
   2576         public static final int MANICHAEAN_MEM = 69;
   2577         /***/
   2578         public static final int MANICHAEAN_NUN = 70;
   2579         /***/
   2580         public static final int MANICHAEAN_ONE = 71;
   2581         /***/
   2582         public static final int MANICHAEAN_PE = 72;
   2583         /***/
   2584         public static final int MANICHAEAN_QOPH = 73;
   2585         /***/
   2586         public static final int MANICHAEAN_RESH = 74;
   2587         /***/
   2588         public static final int MANICHAEAN_SADHE = 75;
   2589         /***/
   2590         public static final int MANICHAEAN_SAMEKH = 76;
   2591         /***/
   2592         public static final int MANICHAEAN_TAW = 77;
   2593         /***/
   2594         public static final int MANICHAEAN_TEN = 78;
   2595         /***/
   2596         public static final int MANICHAEAN_TETH = 79;
   2597         /***/
   2598         public static final int MANICHAEAN_THAMEDH = 80;
   2599         /***/
   2600         public static final int MANICHAEAN_TWENTY = 81;
   2601         /***/
   2602         public static final int MANICHAEAN_WAW = 82;
   2603         /***/
   2604         public static final int MANICHAEAN_YODH = 83;
   2605         /***/
   2606         public static final int MANICHAEAN_ZAYIN = 84;
   2607         /***/
   2608         public static final int STRAIGHT_WAW = 85;
   2609 
   2610         /***/
   2611         public static final int AFRICAN_FEH = 86;
   2612         /***/
   2613         public static final int AFRICAN_NOON = 87;
   2614         /***/
   2615         public static final int AFRICAN_QAF = 88;
   2616 
   2617         /***/
   2618         public static final int MALAYALAM_BHA = 89;
   2619         /***/
   2620         public static final int MALAYALAM_JA = 90;
   2621         /***/
   2622         public static final int MALAYALAM_LLA = 91;
   2623         /***/
   2624         public static final int MALAYALAM_LLLA = 92;
   2625         /***/
   2626         public static final int MALAYALAM_NGA = 93;
   2627         /***/
   2628         public static final int MALAYALAM_NNA = 94;
   2629         /***/
   2630         public static final int MALAYALAM_NNNA = 95;
   2631         /***/
   2632         public static final int MALAYALAM_NYA = 96;
   2633         /***/
   2634         public static final int MALAYALAM_RA = 97;
   2635         /***/
   2636         public static final int MALAYALAM_SSA = 98;
   2637         /***/
   2638         public static final int MALAYALAM_TTA = 99;
   2639 
   2640         /**
   2641          * One more than the highest normal JoiningGroup value.
   2642          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup).
   2643          *
   2644          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2645          * @hide unsupported on Android
   2646          */
   2647         @Deprecated
   2648         public static final int COUNT = 100;
   2649     }
   2650 
   2651     /**
   2652      * Grapheme Cluster Break constants.
   2653      * @see UProperty#GRAPHEME_CLUSTER_BREAK
   2654      */
   2655     public static interface GraphemeClusterBreak {
   2656         /**
   2657          */
   2658         public static final int OTHER = 0;
   2659         /**
   2660          */
   2661         public static final int CONTROL = 1;
   2662         /**
   2663          */
   2664         public static final int CR = 2;
   2665         /**
   2666          */
   2667         public static final int EXTEND = 3;
   2668         /**
   2669          */
   2670         public static final int L = 4;
   2671         /**
   2672          */
   2673         public static final int LF = 5;
   2674         /**
   2675          */
   2676         public static final int LV = 6;
   2677         /**
   2678          */
   2679         public static final int LVT = 7;
   2680         /**
   2681          */
   2682         public static final int T = 8;
   2683         /**
   2684          */
   2685         public static final int V = 9;
   2686         /**
   2687          */
   2688         public static final int SPACING_MARK = 10;
   2689         /**
   2690          */
   2691         public static final int PREPEND = 11;
   2692         /***/
   2693         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
   2694         /***/
   2695         public static final int E_BASE = 13;          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
   2696         /***/
   2697         public static final int E_BASE_GAZ = 14;      /*[EBG]*/
   2698         /***/
   2699         public static final int E_MODIFIER = 15;      /*[EM]*/
   2700         /***/
   2701         public static final int GLUE_AFTER_ZWJ = 16;  /*[GAZ]*/
   2702         /***/
   2703         public static final int ZWJ = 17;             /*[ZWJ]*/
   2704         /**
   2705          * One more than the highest normal GraphemeClusterBreak value.
   2706          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK).
   2707          *
   2708          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2709          * @hide unsupported on Android
   2710          */
   2711         @Deprecated
   2712         public static final int COUNT = 18;
   2713     }
   2714 
   2715     /**
   2716      * Word Break constants.
   2717      * @see UProperty#WORD_BREAK
   2718      */
   2719     public static interface WordBreak {
   2720         /**
   2721          */
   2722         public static final int OTHER = 0;
   2723         /**
   2724          */
   2725         public static final int ALETTER = 1;
   2726         /**
   2727          */
   2728         public static final int FORMAT = 2;
   2729         /**
   2730          */
   2731         public static final int KATAKANA = 3;
   2732         /**
   2733          */
   2734         public static final int MIDLETTER = 4;
   2735         /**
   2736          */
   2737         public static final int MIDNUM = 5;
   2738         /**
   2739          */
   2740         public static final int NUMERIC = 6;
   2741         /**
   2742          */
   2743         public static final int EXTENDNUMLET = 7;
   2744         /**
   2745          */
   2746         public static final int CR = 8;
   2747         /**
   2748          */
   2749         public static final int EXTEND = 9;
   2750         /**
   2751          */
   2752         public static final int LF = 10;
   2753         /**
   2754          */
   2755         public static final int MIDNUMLET = 11;
   2756         /**
   2757          */
   2758         public static final int NEWLINE = 12;
   2759         /***/
   2760         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
   2761         /***/
   2762         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
   2763         /***/
   2764         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
   2765         /***/
   2766         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
   2767         /***/
   2768         public static final int E_BASE = 17;           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
   2769         /***/
   2770         public static final int E_BASE_GAZ = 18;       /*[EBG]*/
   2771         /***/
   2772         public static final int E_MODIFIER = 19;       /*[EM]*/
   2773         /***/
   2774         public static final int GLUE_AFTER_ZWJ = 20;   /*[GAZ]*/
   2775         /***/
   2776         public static final int ZWJ = 21;              /*[ZWJ]*/
   2777         /**
   2778          * One more than the highest normal WordBreak value.
   2779          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK).
   2780          *
   2781          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2782          * @hide unsupported on Android
   2783          */
   2784         @Deprecated
   2785         public static final int COUNT = 22;
   2786     }
   2787 
   2788     /**
   2789      * Sentence Break constants.
   2790      * @see UProperty#SENTENCE_BREAK
   2791      */
   2792     public static interface SentenceBreak {
   2793         /**
   2794          */
   2795         public static final int OTHER = 0;
   2796         /**
   2797          */
   2798         public static final int ATERM = 1;
   2799         /**
   2800          */
   2801         public static final int CLOSE = 2;
   2802         /**
   2803          */
   2804         public static final int FORMAT = 3;
   2805         /**
   2806          */
   2807         public static final int LOWER = 4;
   2808         /**
   2809          */
   2810         public static final int NUMERIC = 5;
   2811         /**
   2812          */
   2813         public static final int OLETTER = 6;
   2814         /**
   2815          */
   2816         public static final int SEP = 7;
   2817         /**
   2818          */
   2819         public static final int SP = 8;
   2820         /**
   2821          */
   2822         public static final int STERM = 9;
   2823         /**
   2824          */
   2825         public static final int UPPER = 10;
   2826         /**
   2827          */
   2828         public static final int CR = 11;
   2829         /**
   2830          */
   2831         public static final int EXTEND = 12;
   2832         /**
   2833          */
   2834         public static final int LF = 13;
   2835         /**
   2836          */
   2837         public static final int SCONTINUE = 14;
   2838         /**
   2839          * One more than the highest normal SentenceBreak value.
   2840          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK).
   2841          *
   2842          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2843          * @hide unsupported on Android
   2844          */
   2845         @Deprecated
   2846         public static final int COUNT = 15;
   2847     }
   2848 
   2849     /**
   2850      * Line Break constants.
   2851      * @see UProperty#LINE_BREAK
   2852      */
   2853     public static interface LineBreak
   2854     {
   2855         /**
   2856          */
   2857         public static final int UNKNOWN = 0;
   2858         /**
   2859          */
   2860         public static final int AMBIGUOUS = 1;
   2861         /**
   2862          */
   2863         public static final int ALPHABETIC = 2;
   2864         /**
   2865          */
   2866         public static final int BREAK_BOTH = 3;
   2867         /**
   2868          */
   2869         public static final int BREAK_AFTER = 4;
   2870         /**
   2871          */
   2872         public static final int BREAK_BEFORE = 5;
   2873         /**
   2874          */
   2875         public static final int MANDATORY_BREAK = 6;
   2876         /**
   2877          */
   2878         public static final int CONTINGENT_BREAK = 7;
   2879         /**
   2880          */
   2881         public static final int CLOSE_PUNCTUATION = 8;
   2882         /**
   2883          */
   2884         public static final int COMBINING_MARK = 9;
   2885         /**
   2886          */
   2887         public static final int CARRIAGE_RETURN = 10;
   2888         /**
   2889          */
   2890         public static final int EXCLAMATION = 11;
   2891         /**
   2892          */
   2893         public static final int GLUE = 12;
   2894         /**
   2895          */
   2896         public static final int HYPHEN = 13;
   2897         /**
   2898          */
   2899         public static final int IDEOGRAPHIC = 14;
   2900         /**
   2901          * @see #INSEPARABLE
   2902          */
   2903         public static final int INSEPERABLE = 15;
   2904         /**
   2905          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
   2906          */
   2907         public static final int INSEPARABLE = 15;
   2908         /**
   2909          */
   2910         public static final int INFIX_NUMERIC = 16;
   2911         /**
   2912          */
   2913         public static final int LINE_FEED = 17;
   2914         /**
   2915          */
   2916         public static final int NONSTARTER = 18;
   2917         /**
   2918          */
   2919         public static final int NUMERIC = 19;
   2920         /**
   2921          */
   2922         public static final int OPEN_PUNCTUATION = 20;
   2923         /**
   2924          */
   2925         public static final int POSTFIX_NUMERIC = 21;
   2926         /**
   2927          */
   2928         public static final int PREFIX_NUMERIC = 22;
   2929         /**
   2930          */
   2931         public static final int QUOTATION = 23;
   2932         /**
   2933          */
   2934         public static final int COMPLEX_CONTEXT = 24;
   2935         /**
   2936          */
   2937         public static final int SURROGATE = 25;
   2938         /**
   2939          */
   2940         public static final int SPACE = 26;
   2941         /**
   2942          */
   2943         public static final int BREAK_SYMBOLS = 27;
   2944         /**
   2945          */
   2946         public static final int ZWSPACE = 28;
   2947         /**
   2948          */
   2949         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
   2950         /**
   2951          */
   2952         public static final int WORD_JOINER = 30;      /*[WJ]*/
   2953         /**
   2954          */
   2955         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
   2956         /**
   2957          */
   2958         public static final int H3 = 32;
   2959         /**
   2960          */
   2961         public static final int JL = 33;
   2962         /**
   2963          */
   2964         public static final int JT = 34;
   2965         /**
   2966          */
   2967         public static final int JV = 35;
   2968         /***/
   2969         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
   2970         /***/
   2971         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
   2972         /***/
   2973         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
   2974         /***/
   2975         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
   2976         /***/
   2977         public static final int E_BASE = 40;  /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
   2978         /***/
   2979         public static final int E_MODIFIER = 41;  /*[EM]*/
   2980         /***/
   2981         public static final int ZWJ = 42;  /*[ZWJ]*/
   2982         /**
   2983          * One more than the highest normal LineBreak value.
   2984          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK).
   2985          *
   2986          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   2987          * @hide unsupported on Android
   2988          */
   2989         @Deprecated
   2990         public static final int COUNT = 43;
   2991     }
   2992 
   2993     /**
   2994      * Numeric Type constants.
   2995      * @see UProperty#NUMERIC_TYPE
   2996      */
   2997     public static interface NumericType
   2998     {
   2999         /**
   3000          */
   3001         public static final int NONE = 0;
   3002         /**
   3003          */
   3004         public static final int DECIMAL = 1;
   3005         /**
   3006          */
   3007         public static final int DIGIT = 2;
   3008         /**
   3009          */
   3010         public static final int NUMERIC = 3;
   3011         /**
   3012          * One more than the highest normal NumericType value.
   3013          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE).
   3014          *
   3015          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3016          * @hide unsupported on Android
   3017          */
   3018         @Deprecated
   3019         public static final int COUNT = 4;
   3020     }
   3021 
   3022     /**
   3023      * Hangul Syllable Type constants.
   3024      *
   3025      * @see UProperty#HANGUL_SYLLABLE_TYPE
   3026      */
   3027     public static interface HangulSyllableType
   3028     {
   3029         /**
   3030          */
   3031         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
   3032         /**
   3033          */
   3034         public static final int LEADING_JAMO        = 1;   /*[L]*/
   3035         /**
   3036          */
   3037         public static final int VOWEL_JAMO          = 2;   /*[V]*/
   3038         /**
   3039          */
   3040         public static final int TRAILING_JAMO       = 3;   /*[T]*/
   3041         /**
   3042          */
   3043         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
   3044         /**
   3045          */
   3046         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
   3047         /**
   3048          * One more than the highest normal HangulSyllableType value.
   3049          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE).
   3050          *
   3051          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3052          * @hide unsupported on Android
   3053          */
   3054         @Deprecated
   3055         public static final int COUNT               = 6;
   3056     }
   3057 
   3058     /**
   3059      * Bidi Paired Bracket Type constants.
   3060      *
   3061      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
   3062      */
   3063     public static interface BidiPairedBracketType {
   3064         /**
   3065          * Not a paired bracket.
   3066          */
   3067         public static final int NONE = 0;
   3068         /**
   3069          * Open paired bracket.
   3070          */
   3071         public static final int OPEN = 1;
   3072         /**
   3073          * Close paired bracket.
   3074          */
   3075         public static final int CLOSE = 2;
   3076         /**
   3077          * One more than the highest normal BidiPairedBracketType value.
   3078          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE).
   3079          *
   3080          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   3081          * @hide unsupported on Android
   3082          */
   3083         @Deprecated
   3084         public static final int COUNT = 3;
   3085     }
   3086 
   3087     // public data members -----------------------------------------------
   3088 
   3089     /**
   3090      * The lowest Unicode code point value, constant 0.
   3091      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
   3092      */
   3093     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
   3094 
   3095     /**
   3096      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
   3097      * Same as {@link Character#MAX_CODE_POINT}.
   3098      *
   3099      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
   3100      * which is still a char with the value U+FFFF.
   3101      */
   3102     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
   3103 
   3104     /**
   3105      * The minimum value for Supplementary code points, constant U+10000.
   3106      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
   3107      */
   3108     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
   3109 
   3110     /**
   3111      * Unicode value used when translating into Unicode encoding form and there
   3112      * is no existing character.
   3113      */
   3114     public static final int REPLACEMENT_CHAR = '\uFFFD';
   3115 
   3116     /**
   3117      * Special value that is returned by getUnicodeNumericValue(int) when no
   3118      * numeric value is defined for a code point.
   3119      * @see #getUnicodeNumericValue
   3120      */
   3121     public static final double NO_NUMERIC_VALUE = -123456789;
   3122 
   3123     /**
   3124      * Compatibility constant for Java Character's MIN_RADIX.
   3125      */
   3126     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
   3127 
   3128     /**
   3129      * Compatibility constant for Java Character's MAX_RADIX.
   3130      */
   3131     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
   3132 
   3133     /**
   3134      * Do not lowercase non-initial parts of words when titlecasing.
   3135      * Option bit for titlecasing APIs that take an options bit set.
   3136      *
   3137      * By default, titlecasing will titlecase the first cased character
   3138      * of a word and lowercase all other characters.
   3139      * With this option, the other characters will not be modified.
   3140      *
   3141      * @see #toTitleCase
   3142      */
   3143     public static final int TITLECASE_NO_LOWERCASE = 0x100;
   3144 
   3145     /**
   3146      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
   3147      * titlecase exactly the characters at breaks from the iterator.
   3148      * Option bit for titlecasing APIs that take an options bit set.
   3149      *
   3150      * By default, titlecasing will take each break iterator index,
   3151      * adjust it by looking for the next cased character, and titlecase that one.
   3152      * Other characters are lowercased.
   3153      *
   3154      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
   3155      *
   3156      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
   3157      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
   3158      * cased character F. If F exists, map F to default_title(F); then map each
   3159      * subsequent character C to default_lower(C).
   3160      *
   3161      * @see #toTitleCase
   3162      * @see #TITLECASE_NO_LOWERCASE
   3163      */
   3164     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
   3165 
   3166     // public methods ----------------------------------------------------
   3167 
   3168     /**
   3169      * Returnss the numeric value of a decimal digit code point.
   3170      * <br>This method observes the semantics of
   3171      * <code>java.lang.Character.digit()</code>.  Note that this
   3172      * will return positive values for code points for which isDigit
   3173      * returns false, just like java.lang.Character.
   3174      * <br><em>Semantic Change:</em> In release 1.3.1 and
   3175      * prior, this did not treat the European letters as having a
   3176      * digit value, and also treated numeric letters and other numbers as
   3177      * digits.
   3178      * This has been changed to conform to the java semantics.
   3179      * <br>A code point is a valid digit if and only if:
   3180      * <ul>
   3181      *   <li>ch is a decimal digit or one of the european letters, and
   3182      *   <li>the value of ch is less than the specified radix.
   3183      * </ul>
   3184      * @param ch the code point to query
   3185      * @param radix the radix
   3186      * @return the numeric value represented by the code point in the
   3187      * specified radix, or -1 if the code point is not a decimal digit
   3188      * or if its value is too large for the radix
   3189      */
   3190     public static int digit(int ch, int radix)
   3191     {
   3192         if (2 <= radix && radix <= 36) {
   3193             int value = digit(ch);
   3194             if (value < 0) {
   3195                 // ch is not a decimal digit, try latin letters
   3196                 value = UCharacterProperty.getEuropeanDigit(ch);
   3197             }
   3198             return (value < radix) ? value : -1;
   3199         } else {
   3200             return -1;  // invalid radix
   3201         }
   3202     }
   3203 
   3204     /**
   3205      * Returnss the numeric value of a decimal digit code point.
   3206      * <br>This is a convenience overload of <code>digit(int, int)</code>
   3207      * that provides a decimal radix.
   3208      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
   3209      * treated numeric letters and other numbers as digits.  This has
   3210      * been changed to conform to the java semantics.
   3211      * @param ch the code point to query
   3212      * @return the numeric value represented by the code point,
   3213      * or -1 if the code point is not a decimal digit or if its
   3214      * value is too large for a decimal radix
   3215      */
   3216     public static int digit(int ch)
   3217     {
   3218         return UCharacterProperty.INSTANCE.digit(ch);
   3219     }
   3220 
   3221     /**
   3222      * Returns the numeric value of the code point as a nonnegative
   3223      * integer.
   3224      * <br>If the code point does not have a numeric value, then -1 is returned.
   3225      * <br>
   3226      * If the code point has a numeric value that cannot be represented as a
   3227      * nonnegative integer (for example, a fractional value), then -2 is
   3228      * returned.
   3229      * @param ch the code point to query
   3230      * @return the numeric value of the code point, or -1 if it has no numeric
   3231      * value, or -2 if it has a numeric value that cannot be represented as a
   3232      * nonnegative integer
   3233      */
   3234     public static int getNumericValue(int ch)
   3235     {
   3236         return UCharacterProperty.INSTANCE.getNumericValue(ch);
   3237     }
   3238 
   3239     /**
   3240      * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the
   3241      * Unicode Character Database.
   3242      * <p>A "double" return type is necessary because some numeric values are
   3243      * fractions, negative, or too large for int.
   3244      * <p>For characters without any numeric values in the Unicode Character
   3245      * Database, this function will return NO_NUMERIC_VALUE.
   3246      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
   3247      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
   3248      * return type int and returns -1 when the argument ch does not have a
   3249      * corresponding numeric value. This has been changed to synch with ICU4C
   3250      *
   3251      * This corresponds to the ICU4C function u_getNumericValue.
   3252      * @param ch Code point to get the numeric value for.
   3253      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
   3254      */
   3255     public static double getUnicodeNumericValue(int ch)
   3256     {
   3257         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
   3258     }
   3259 
   3260     /**
   3261      * Compatibility override of Java deprecated method.  This
   3262      * method will always remain deprecated.
   3263      * Same as java.lang.Character.isSpace().
   3264      * @param ch the code point
   3265      * @return true if the code point is a space character as
   3266      * defined by java.lang.Character.isSpace.
   3267      * @deprecated ICU 3.4 (Java)
   3268      * @hide original deprecated declaration
   3269      */
   3270     @Deprecated
   3271     public static boolean isSpace(int ch) {
   3272         return ch <= 0x20 &&
   3273                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
   3274     }
   3275 
   3276     /**
   3277      * Returns a value indicating a code point's Unicode category.
   3278      * Up-to-date Unicode implementation of java.lang.Character.getType()
   3279      * except for the above mentioned code points that had their category
   3280      * changed.<br>
   3281      * Return results are constants from the interface
   3282      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
   3283      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
   3284      * those returned by java.lang.Character.getType.  UCharacterCategory values
   3285      * match the ones used in ICU4C, while java.lang.Character type
   3286      * values, though similar, skip the value 17.
   3287      * @param ch code point whose type is to be determined
   3288      * @return category which is a value of UCharacterCategory
   3289      */
   3290     public static int getType(int ch)
   3291     {
   3292         return UCharacterProperty.INSTANCE.getType(ch);
   3293     }
   3294 
   3295     /**
   3296      * Determines if a code point has a defined meaning in the up-to-date
   3297      * Unicode standard.
   3298      * E.g. supplementary code points though allocated space are not defined in
   3299      * Unicode yet.<br>
   3300      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
   3301      * @param ch code point to be determined if it is defined in the most
   3302      *        current version of Unicode
   3303      * @return true if this code point is defined in unicode
   3304      */
   3305     public static boolean isDefined(int ch)
   3306     {
   3307         return getType(ch) != 0;
   3308     }
   3309 
   3310     /**
   3311      * Determines if a code point is a Java digit.
   3312      * <br>This method observes the semantics of
   3313      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
   3314      * digits only.
   3315      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
   3316      * numeric letters and other numbers as digits.
   3317      * This has been changed to conform to the java semantics.
   3318      * @param ch code point to query
   3319      * @return true if this code point is a digit
   3320      */
   3321     public static boolean isDigit(int ch)
   3322     {
   3323         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
   3324     }
   3325 
   3326     /**
   3327      * Determines if the specified code point is an ISO control character.
   3328      * A code point is considered to be an ISO control character if it is in
   3329      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
   3330      * &#92;u009F.<br>
   3331      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
   3332      * @param ch code point to determine if it is an ISO control character
   3333      * @return true if code point is a ISO control character
   3334      */
   3335     public static boolean isISOControl(int ch)
   3336     {
   3337         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
   3338                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
   3339     }
   3340 
   3341     /**
   3342      * Determines if the specified code point is a letter.
   3343      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
   3344      * @param ch code point to determine if it is a letter
   3345      * @return true if code point is a letter
   3346      */
   3347     public static boolean isLetter(int ch)
   3348     {
   3349         // if props == 0, it will just fall through and return false
   3350         return ((1 << getType(ch))
   3351                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
   3352                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
   3353                         | (1 << UCharacterCategory.TITLECASE_LETTER)
   3354                         | (1 << UCharacterCategory.MODIFIER_LETTER)
   3355                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
   3356     }
   3357 
   3358     /**
   3359      * Determines if the specified code point is a letter or digit.
   3360      * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii
   3361      * characters 'A' - 'Z' and 'a' - 'z' as digits.
   3362      * @param ch code point to determine if it is a letter or a digit
   3363      * @return true if code point is a letter or a digit
   3364      */
   3365     public static boolean isLetterOrDigit(int ch)
   3366     {
   3367         return ((1 << getType(ch))
   3368                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
   3369                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
   3370                         | (1 << UCharacterCategory.TITLECASE_LETTER)
   3371                         | (1 << UCharacterCategory.MODIFIER_LETTER)
   3372                         | (1 << UCharacterCategory.OTHER_LETTER)
   3373                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
   3374     }
   3375 
   3376     /**
   3377      * Compatibility override of Java deprecated method.  This
   3378      * method will always remain deprecated.  Delegates to
   3379      * java.lang.Character.isJavaIdentifierStart.
   3380      * @param cp the code point
   3381      * @return true if the code point can start a java identifier.
   3382      * @deprecated ICU 3.4 (Java)
   3383      * @hide original deprecated declaration
   3384      */
   3385     @Deprecated
   3386     public static boolean isJavaLetter(int cp) {
   3387         return isJavaIdentifierStart(cp);
   3388     }
   3389 
   3390     /**
   3391      * Compatibility override of Java deprecated method.  This
   3392      * method will always remain deprecated.  Delegates to
   3393      * java.lang.Character.isJavaIdentifierPart.
   3394      * @param cp the code point
   3395      * @return true if the code point can continue a java identifier.
   3396      * @deprecated ICU 3.4 (Java)
   3397      * @hide original deprecated declaration
   3398      */
   3399     @Deprecated
   3400     public static boolean isJavaLetterOrDigit(int cp) {
   3401         return isJavaIdentifierPart(cp);
   3402     }
   3403 
   3404     /**
   3405      * Compatibility override of Java method, delegates to
   3406      * java.lang.Character.isJavaIdentifierStart.
   3407      * @param cp the code point
   3408      * @return true if the code point can start a java identifier.
   3409      */
   3410     public static boolean isJavaIdentifierStart(int cp) {
   3411         // note, downcast to char for jdk 1.4 compatibility
   3412         return java.lang.Character.isJavaIdentifierStart((char)cp);
   3413     }
   3414 
   3415     /**
   3416      * Compatibility override of Java method, delegates to
   3417      * java.lang.Character.isJavaIdentifierPart.
   3418      * @param cp the code point
   3419      * @return true if the code point can continue a java identifier.
   3420      */
   3421     public static boolean isJavaIdentifierPart(int cp) {
   3422         // note, downcast to char for jdk 1.4 compatibility
   3423         return java.lang.Character.isJavaIdentifierPart((char)cp);
   3424     }
   3425 
   3426     /**
   3427      * Determines if the specified code point is a lowercase character.
   3428      * UnicodeData only contains case mappings for code points where they are
   3429      * one-to-one mappings; it also omits information about context-sensitive
   3430      * case mappings.<br> For more information about Unicode case mapping
   3431      * please refer to the
   3432      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
   3433      * #21</a>.<br>
   3434      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
   3435      * @param ch code point to determine if it is in lowercase
   3436      * @return true if code point is a lowercase character
   3437      */
   3438     public static boolean isLowerCase(int ch)
   3439     {
   3440         // if props == 0, it will just fall through and return false
   3441         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
   3442     }
   3443 
   3444     /**
   3445      * Determines if the specified code point is a white space character.
   3446      * A code point is considered to be an whitespace character if and only
   3447      * if it satisfies one of the following criteria:
   3448      * <ul>
   3449      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
   3450      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
   3451      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
   3452      * <li> It is &#92;u000A, LINE FEED.
   3453      * <li> It is &#92;u000B, VERTICAL TABULATION.
   3454      * <li> It is &#92;u000C, FORM FEED.
   3455      * <li> It is &#92;u000D, CARRIAGE RETURN.
   3456      * <li> It is &#92;u001C, FILE SEPARATOR.
   3457      * <li> It is &#92;u001D, GROUP SEPARATOR.
   3458      * <li> It is &#92;u001E, RECORD SEPARATOR.
   3459      * <li> It is &#92;u001F, UNIT SEPARATOR.
   3460      * </ul>
   3461      *
   3462      * This API tries to sync with the semantics of Java's
   3463      * java.lang.Character.isWhitespace(), but it may not return
   3464      * the exact same results because of the Unicode version
   3465      * difference.
   3466      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
   3467      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
   3468      * See http://www.unicode.org/versions/Unicode4.0.1/
   3469      * @param ch code point to determine if it is a white space
   3470      * @return true if the specified code point is a white space character
   3471      */
   3472     public static boolean isWhitespace(int ch)
   3473     {
   3474         // exclude no-break spaces
   3475         // if props == 0, it will just fall through and return false
   3476         return ((1 << getType(ch)) &
   3477                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
   3478                         | (1 << UCharacterCategory.LINE_SEPARATOR)
   3479                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
   3480                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
   3481                         // TAB VT LF FF CR FS GS RS US NL are all control characters
   3482                         // that are white spaces.
   3483                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
   3484     }
   3485 
   3486     /**
   3487      * Determines if the specified code point is a Unicode specified space
   3488      * character, i.e. if code point is in the category Zs, Zl and Zp.
   3489      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
   3490      * @param ch code point to determine if it is a space
   3491      * @return true if the specified code point is a space character
   3492      */
   3493     public static boolean isSpaceChar(int ch)
   3494     {
   3495         // if props == 0, it will just fall through and return false
   3496         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
   3497                 | (1 << UCharacterCategory.LINE_SEPARATOR)
   3498                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
   3499                 != 0;
   3500     }
   3501 
   3502     /**
   3503      * Determines if the specified code point is a titlecase character.
   3504      * UnicodeData only contains case mappings for code points where they are
   3505      * one-to-one mappings; it also omits information about context-sensitive
   3506      * case mappings.<br>
   3507      * For more information about Unicode case mapping please refer to the
   3508      * <a href=http://www.unicode.org/unicode/reports/tr21/>
   3509      * Technical report #21</a>.<br>
   3510      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
   3511      * @param ch code point to determine if it is in title case
   3512      * @return true if the specified code point is a titlecase character
   3513      */
   3514     public static boolean isTitleCase(int ch)
   3515     {
   3516         // if props == 0, it will just fall through and return false
   3517         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
   3518     }
   3519 
   3520     /**
   3521      * Determines if the specified code point may be any part of a Unicode
   3522      * identifier other than the starting character.
   3523      * A code point may be part of a Unicode identifier if and only if it is
   3524      * one of the following:
   3525      * <ul>
   3526      * <li> Lu Uppercase letter
   3527      * <li> Ll Lowercase letter
   3528      * <li> Lt Titlecase letter
   3529      * <li> Lm Modifier letter
   3530      * <li> Lo Other letter
   3531      * <li> Nl Letter number
   3532      * <li> Pc Connecting punctuation character
   3533      * <li> Nd decimal number
   3534      * <li> Mc Spacing combining mark
   3535      * <li> Mn Non-spacing mark
   3536      * <li> Cf formatting code
   3537      * </ul>
   3538      * Up-to-date Unicode implementation of
   3539      * java.lang.Character.isUnicodeIdentifierPart().<br>
   3540      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
   3541      * @param ch code point to determine if is can be part of a Unicode
   3542      *        identifier
   3543      * @return true if code point is any character belonging a unicode
   3544      *         identifier suffix after the first character
   3545      */
   3546     public static boolean isUnicodeIdentifierPart(int ch)
   3547     {
   3548         // if props == 0, it will just fall through and return false
   3549         // cat == format
   3550         return ((1 << getType(ch))
   3551                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
   3552                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
   3553                         | (1 << UCharacterCategory.TITLECASE_LETTER)
   3554                         | (1 << UCharacterCategory.MODIFIER_LETTER)
   3555                         | (1 << UCharacterCategory.OTHER_LETTER)
   3556                         | (1 << UCharacterCategory.LETTER_NUMBER)
   3557                         | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
   3558                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
   3559                         | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
   3560                         | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
   3561                         || isIdentifierIgnorable(ch);
   3562     }
   3563 
   3564     /**
   3565      * Determines if the specified code point is permissible as the first
   3566      * character in a Unicode identifier.
   3567      * A code point may start a Unicode identifier if it is of type either
   3568      * <ul>
   3569      * <li> Lu Uppercase letter
   3570      * <li> Ll Lowercase letter
   3571      * <li> Lt Titlecase letter
   3572      * <li> Lm Modifier letter
   3573      * <li> Lo Other letter
   3574      * <li> Nl Letter number
   3575      * </ul>
   3576      * Up-to-date Unicode implementation of
   3577      * java.lang.Character.isUnicodeIdentifierStart().<br>
   3578      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
   3579      * @param ch code point to determine if it can start a Unicode identifier
   3580      * @return true if code point is the first character belonging a unicode
   3581      *              identifier
   3582      */
   3583     public static boolean isUnicodeIdentifierStart(int ch)
   3584     {
   3585         /*int cat = getType(ch);*/
   3586         // if props == 0, it will just fall through and return false
   3587         return ((1 << getType(ch))
   3588                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
   3589                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
   3590                         | (1 << UCharacterCategory.TITLECASE_LETTER)
   3591                         | (1 << UCharacterCategory.MODIFIER_LETTER)
   3592                         | (1 << UCharacterCategory.OTHER_LETTER)
   3593                         | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
   3594     }
   3595 
   3596     /**
   3597      * Determines if the specified code point should be regarded as an
   3598      * ignorable character in a Java identifier.
   3599      * A character is Java-identifier-ignorable if it has the general category
   3600      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
   3601      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
   3602      * Up-to-date Unicode implementation of
   3603      * java.lang.Character.isIdentifierIgnorable().<br>
   3604      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
   3605      * <p>Note that Unicode just recommends to ignore Cf (format controls).
   3606      * @param ch code point to be determined if it can be ignored in a Unicode
   3607      *        identifier.
   3608      * @return true if the code point is ignorable
   3609      */
   3610     public static boolean isIdentifierIgnorable(int ch)
   3611     {
   3612         // see java.lang.Character.isIdentifierIgnorable() on range of
   3613         // ignorable characters.
   3614         if (ch <= 0x9f) {
   3615             return isISOControl(ch)
   3616                     && !((ch >= 0x9 && ch <= 0xd)
   3617                             || (ch >= 0x1c && ch <= 0x1f));
   3618         }
   3619         return getType(ch) == UCharacterCategory.FORMAT;
   3620     }
   3621 
   3622     /**
   3623      * Determines if the specified code point is an uppercase character.
   3624      * UnicodeData only contains case mappings for code point where they are
   3625      * one-to-one mappings; it also omits information about context-sensitive
   3626      * case mappings.<br>
   3627      * For language specific case conversion behavior, use
   3628      * toUpperCase(locale, str). <br>
   3629      * For example, the case conversion for dot-less i and dotted I in Turkish,
   3630      * or for final sigma in Greek.
   3631      * For more information about Unicode case mapping please refer to the
   3632      * <a href=http://www.unicode.org/unicode/reports/tr21/>
   3633      * Technical report #21</a>.<br>
   3634      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
   3635      * @param ch code point to determine if it is in uppercase
   3636      * @return true if the code point is an uppercase character
   3637      */
   3638     public static boolean isUpperCase(int ch)
   3639     {
   3640         // if props == 0, it will just fall through and return false
   3641         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
   3642     }
   3643 
   3644     /**
   3645      * The given code point is mapped to its lowercase equivalent; if the code
   3646      * point has no lowercase equivalent, the code point itself is returned.
   3647      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
   3648      *
   3649      * <p>This function only returns the simple, single-code point case mapping.
   3650      * Full case mappings should be used whenever possible because they produce
   3651      * better results by working on whole strings.
   3652      * They take into account the string context and the language and can map
   3653      * to a result string with a different length as appropriate.
   3654      * Full case mappings are applied by the case mapping functions
   3655      * that take String parameters rather than code points (int).
   3656      * See also the User Guide chapter on C/POSIX migration:
   3657      * http://www.icu-project.org/userguide/posix.html#case_mappings
   3658      *
   3659      * @param ch code point whose lowercase equivalent is to be retrieved
   3660      * @return the lowercase equivalent code point
   3661      */
   3662     public static int toLowerCase(int ch) {
   3663         return UCaseProps.INSTANCE.tolower(ch);
   3664     }
   3665 
   3666     /**
   3667      * Converts argument code point and returns a String object representing
   3668      * the code point's value in UTF-16 format.
   3669      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
   3670      *
   3671      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
   3672      *
   3673      * @param ch code point
   3674      * @return string representation of the code point, null if code point is not
   3675      *         defined in unicode
   3676      */
   3677     public static String toString(int ch)
   3678     {
   3679         if (ch < MIN_VALUE || ch > MAX_VALUE) {
   3680             return null;
   3681         }
   3682 
   3683         if (ch < SUPPLEMENTARY_MIN_VALUE) {
   3684             return String.valueOf((char)ch);
   3685         }
   3686 
   3687         return new String(Character.toChars(ch));
   3688     }
   3689 
   3690     /**
   3691      * Converts the code point argument to titlecase.
   3692      * If no titlecase is available, the uppercase is returned. If no uppercase
   3693      * is available, the code point itself is returned.
   3694      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
   3695      *
   3696      * <p>This function only returns the simple, single-code point case mapping.
   3697      * Full case mappings should be used whenever possible because they produce
   3698      * better results by working on whole strings.
   3699      * They take into account the string context and the language and can map
   3700      * to a result string with a different length as appropriate.
   3701      * Full case mappings are applied by the case mapping functions
   3702      * that take String parameters rather than code points (int).
   3703      * See also the User Guide chapter on C/POSIX migration:
   3704      * http://www.icu-project.org/userguide/posix.html#case_mappings
   3705      *
   3706      * @param ch code point  whose title case is to be retrieved
   3707      * @return titlecase code point
   3708      */
   3709     public static int toTitleCase(int ch) {
   3710         return UCaseProps.INSTANCE.totitle(ch);
   3711     }
   3712 
   3713     /**
   3714      * Converts the character argument to uppercase.
   3715      * If no uppercase is available, the character itself is returned.
   3716      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
   3717      *
   3718      * <p>This function only returns the simple, single-code point case mapping.
   3719      * Full case mappings should be used whenever possible because they produce
   3720      * better results by working on whole strings.
   3721      * They take into account the string context and the language and can map
   3722      * to a result string with a different length as appropriate.
   3723      * Full case mappings are applied by the case mapping functions
   3724      * that take String parameters rather than code points (int).
   3725      * See also the User Guide chapter on C/POSIX migration:
   3726      * http://www.icu-project.org/userguide/posix.html#case_mappings
   3727      *
   3728      * @param ch code point whose uppercase is to be retrieved
   3729      * @return uppercase code point
   3730      */
   3731     public static int toUpperCase(int ch) {
   3732         return UCaseProps.INSTANCE.toupper(ch);
   3733     }
   3734 
   3735     // extra methods not in java.lang.Character --------------------------
   3736 
   3737     /**
   3738      * <strong>[icu]</strong> Determines if the code point is a supplementary character.
   3739      * A code point is a supplementary character if and only if it is greater
   3740      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
   3741      * @param ch code point to be determined if it is in the supplementary
   3742      *        plane
   3743      * @return true if code point is a supplementary character
   3744      */
   3745     public static boolean isSupplementary(int ch)
   3746     {
   3747         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
   3748                 ch <= UCharacter.MAX_VALUE;
   3749     }
   3750 
   3751     /**
   3752      * <strong>[icu]</strong> Determines if the code point is in the BMP plane.
   3753      * @param ch code point to be determined if it is not a supplementary
   3754      *        character
   3755      * @return true if code point is not a supplementary character
   3756      */
   3757     public static boolean isBMP(int ch)
   3758     {
   3759         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
   3760     }
   3761 
   3762     /**
   3763      * <strong>[icu]</strong> Determines whether the specified code point is a printable character
   3764      * according to the Unicode standard.
   3765      * @param ch code point to be determined if it is printable
   3766      * @return true if the code point is a printable character
   3767      */
   3768     public static boolean isPrintable(int ch)
   3769     {
   3770         int cat = getType(ch);
   3771         // if props == 0, it will just fall through and return false
   3772         return (cat != UCharacterCategory.UNASSIGNED &&
   3773                 cat != UCharacterCategory.CONTROL &&
   3774                 cat != UCharacterCategory.FORMAT &&
   3775                 cat != UCharacterCategory.PRIVATE_USE &&
   3776                 cat != UCharacterCategory.SURROGATE &&
   3777                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
   3778     }
   3779 
   3780     /**
   3781      * <strong>[icu]</strong> Determines whether the specified code point is of base form.
   3782      * A code point of base form does not graphically combine with preceding
   3783      * characters, and is neither a control nor a format character.
   3784      * @param ch code point to be determined if it is of base form
   3785      * @return true if the code point is of base form
   3786      */
   3787     public static boolean isBaseForm(int ch)
   3788     {
   3789         int cat = getType(ch);
   3790         // if props == 0, it will just fall through and return false
   3791         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
   3792                 cat == UCharacterCategory.OTHER_NUMBER ||
   3793                 cat == UCharacterCategory.LETTER_NUMBER ||
   3794                 cat == UCharacterCategory.UPPERCASE_LETTER ||
   3795                 cat == UCharacterCategory.LOWERCASE_LETTER ||
   3796                 cat == UCharacterCategory.TITLECASE_LETTER ||
   3797                 cat == UCharacterCategory.MODIFIER_LETTER ||
   3798                 cat == UCharacterCategory.OTHER_LETTER ||
   3799                 cat == UCharacterCategory.NON_SPACING_MARK ||
   3800                 cat == UCharacterCategory.ENCLOSING_MARK ||
   3801                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
   3802     }
   3803 
   3804     /**
   3805      * <strong>[icu]</strong> Returns the Bidirection property of a code point.
   3806      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
   3807      * property.<br>
   3808      * Result returned belongs to the interface
   3809      * <a href=UCharacterDirection.html>UCharacterDirection</a>
   3810      * @param ch the code point to be determined its direction
   3811      * @return direction constant from UCharacterDirection.
   3812      */
   3813     public static int getDirection(int ch)
   3814     {
   3815         return UBiDiProps.INSTANCE.getClass(ch);
   3816     }
   3817 
   3818     /**
   3819      * Determines whether the code point has the "mirrored" property.
   3820      * This property is set for characters that are commonly used in
   3821      * Right-To-Left contexts and need to be displayed with a "mirrored"
   3822      * glyph.
   3823      * @param ch code point whose mirror is to be determined
   3824      * @return true if the code point has the "mirrored" property
   3825      */
   3826     public static boolean isMirrored(int ch)
   3827     {
   3828         return UBiDiProps.INSTANCE.isMirrored(ch);
   3829     }
   3830 
   3831     /**
   3832      * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point.
   3833      * For code points with the "mirrored" property, implementations sometimes
   3834      * need a "poor man's" mapping to another code point such that the default
   3835      * glyph may serve as the mirror-image of the default glyph of the
   3836      * specified code point.<br>
   3837      * This is useful for text conversion to and from codepages with visual
   3838      * order, and for displays without glyph selection capabilities.
   3839      * @param ch code point whose mirror is to be retrieved
   3840      * @return another code point that may serve as a mirror-image substitute,
   3841      *         or ch itself if there is no such mapping or ch does not have the
   3842      *         "mirrored" property
   3843      */
   3844     public static int getMirror(int ch)
   3845     {
   3846         return UBiDiProps.INSTANCE.getMirror(ch);
   3847     }
   3848 
   3849     /**
   3850      * <strong>[icu]</strong> Maps the specified character to its paired bracket character.
   3851      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
   3852      * Otherwise c itself is returned.
   3853      * See http://www.unicode.org/reports/tr9/
   3854      *
   3855      * @param c the code point to be mapped
   3856      * @return the paired bracket code point,
   3857      *         or c itself if there is no such mapping
   3858      *         (Bidi_Paired_Bracket_Type=None)
   3859      *
   3860      * @see UProperty#BIDI_PAIRED_BRACKET
   3861      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
   3862      * @see #getMirror(int)
   3863      */
   3864     public static int getBidiPairedBracket(int c) {
   3865         return UBiDiProps.INSTANCE.getPairedBracket(c);
   3866     }
   3867 
   3868     /**
   3869      * <strong>[icu]</strong> Returns the combining class of the argument codepoint
   3870      * @param ch code point whose combining is to be retrieved
   3871      * @return the combining class of the codepoint
   3872      */
   3873     public static int getCombiningClass(int ch)
   3874     {
   3875         return Normalizer2.getNFDInstance().getCombiningClass(ch);
   3876     }
   3877 
   3878     /**
   3879      * <strong>[icu]</strong> A code point is illegal if and only if
   3880      * <ul>
   3881      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
   3882      * <li> A surrogate value, 0xD800 to 0xDFFF
   3883      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
   3884      * </ul>
   3885      * Note: legal does not mean that it is assigned in this version of Unicode.
   3886      * @param ch code point to determine if it is a legal code point by itself
   3887      * @return true if and only if legal.
   3888      */
   3889     public static boolean isLegal(int ch)
   3890     {
   3891         if (ch < MIN_VALUE) {
   3892             return false;
   3893         }
   3894         if (ch < Character.MIN_SURROGATE) {
   3895             return true;
   3896         }
   3897         if (ch <= Character.MAX_SURROGATE) {
   3898             return false;
   3899         }
   3900         if (UCharacterUtility.isNonCharacter(ch)) {
   3901             return false;
   3902         }
   3903         return (ch <= MAX_VALUE);
   3904     }
   3905 
   3906     /**
   3907      * <strong>[icu]</strong> A string is legal iff all its code points are legal.
   3908      * A code point is illegal if and only if
   3909      * <ul>
   3910      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
   3911      * <li> A surrogate value, 0xD800 to 0xDFFF
   3912      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
   3913      * </ul>
   3914      * Note: legal does not mean that it is assigned in this version of Unicode.
   3915      * @param str containing code points to examin
   3916      * @return true if and only if legal.
   3917      */
   3918     public static boolean isLegal(String str)
   3919     {
   3920         int size = str.length();
   3921         int codepoint;
   3922         for (int i = 0; i < size; i += Character.charCount(codepoint))
   3923         {
   3924             codepoint = str.codePointAt(i);
   3925             if (!isLegal(codepoint)) {
   3926                 return false;
   3927             }
   3928         }
   3929         return true;
   3930     }
   3931 
   3932     /**
   3933      * <strong>[icu]</strong> Returns the version of Unicode data used.
   3934      * @return the unicode version number used
   3935      */
   3936     public static VersionInfo getUnicodeVersion()
   3937     {
   3938         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
   3939     }
   3940 
   3941     /**
   3942      * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or
   3943      * null if the character is unassigned or outside the range
   3944      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
   3945      * <br>
   3946      * Note calling any methods related to code point names, e.g. get*Name*()
   3947      * incurs a one-time initialisation cost to construct the name tables.
   3948      * @param ch the code point for which to get the name
   3949      * @return most current Unicode name
   3950      */
   3951     public static String getName(int ch)
   3952     {
   3953         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
   3954     }
   3955 
   3956     /**
   3957      * <strong>[icu]</strong> Returns the names for each of the characters in a string
   3958      * @param s string to format
   3959      * @param separator string to go between names
   3960      * @return string of names
   3961      */
   3962     public static String getName(String s, String separator) {
   3963         if (s.length() == 1) { // handle common case
   3964             return getName(s.charAt(0));
   3965         }
   3966         int cp;
   3967         StringBuilder sb = new StringBuilder();
   3968         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
   3969             cp = s.codePointAt(i);
   3970             if (i != 0) sb.append(separator);
   3971             sb.append(UCharacter.getName(cp));
   3972         }
   3973         return sb.toString();
   3974     }
   3975 
   3976     /**
   3977      * <strong>[icu]</strong> Returns null.
   3978      * Used to return the Unicode_1_Name property value which was of little practical value.
   3979      * @param ch the code point for which to get the name
   3980      * @return null
   3981      * @deprecated ICU 49
   3982      * @hide original deprecated declaration
   3983      */
   3984     @Deprecated
   3985     public static String getName1_0(int ch)
   3986     {
   3987         return null;
   3988     }
   3989 
   3990     /**
   3991      * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and
   3992      * getName1_0(int), this method will return a name even for codepoints that
   3993      * are not assigned a name in UnicodeData.txt.
   3994      *
   3995      * <p>The names are returned in the following order.
   3996      * <ul>
   3997      * <li> Most current Unicode name if there is any
   3998      * <li> Unicode 1.0 name if there is any
   3999      * <li> Extended name in the form of
   4000      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
   4001      * </ul>
   4002      * Note calling any methods related to code point names, e.g. get*Name*()
   4003      * incurs a one-time initialisation cost to construct the name tables.
   4004      * @param ch the code point for which to get the name
   4005      * @return a name for the argument codepoint
   4006      */
   4007     public static String getExtendedName(int ch) {
   4008         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
   4009     }
   4010 
   4011     /**
   4012      * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one.
   4013      * Returns null if the character is unassigned or outside the range
   4014      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
   4015      * <br>
   4016      * Note calling any methods related to code point names, e.g. get*Name*()
   4017      * incurs a one-time initialisation cost to construct the name tables.
   4018      * @param ch the code point for which to get the name alias
   4019      * @return Unicode name alias, or null
   4020      */
   4021     public static String getNameAlias(int ch)
   4022     {
   4023         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
   4024     }
   4025 
   4026     /**
   4027      * <strong>[icu]</strong> Returns null.
   4028      * Used to return the ISO 10646 comment for a character.
   4029      * The Unicode ISO_Comment property is deprecated and has no values.
   4030      *
   4031      * @param ch The code point for which to get the ISO comment.
   4032      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
   4033      * @return null
   4034      * @deprecated ICU 49
   4035      * @hide original deprecated declaration
   4036      */
   4037     @Deprecated
   4038     public static String getISOComment(int ch)
   4039     {
   4040         return null;
   4041     }
   4042 
   4043     /**
   4044      * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and
   4045      * return its code point value. All Unicode names are in uppercase.
   4046      * Note calling any methods related to code point names, e.g. get*Name*()
   4047      * incurs a one-time initialisation cost to construct the name tables.
   4048      * @param name most current Unicode character name whose code point is to
   4049      *        be returned
   4050      * @return code point or -1 if name is not found
   4051      */
   4052     public static int getCharFromName(String name){
   4053         return UCharacterName.INSTANCE.getCharFromName(
   4054                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
   4055     }
   4056 
   4057     /**
   4058      * <strong>[icu]</strong> Returns -1.
   4059      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
   4060      * its code point value.
   4061      * @param name Unicode 1.0 code point name whose code point is to be
   4062      *             returned
   4063      * @return -1
   4064      * @deprecated ICU 49
   4065      * @see #getName1_0(int)
   4066      * @hide original deprecated declaration
   4067      */
   4068     @Deprecated
   4069     public static int getCharFromName1_0(String name){
   4070         return -1;
   4071     }
   4072 
   4073     /**
   4074      * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code
   4075      * point value. All Unicode names are in uppercase.
   4076      * Extended names are all lowercase except for numbers and are contained
   4077      * within angle brackets.
   4078      * The names are searched in the following order
   4079      * <ul>
   4080      * <li> Most current Unicode name if there is any
   4081      * <li> Unicode 1.0 name if there is any
   4082      * <li> Extended name in the form of
   4083      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
   4084      * </ul>
   4085      * Note calling any methods related to code point names, e.g. get*Name*()
   4086      * incurs a one-time initialisation cost to construct the name tables.
   4087      * @param name codepoint name
   4088      * @return code point associated with the name or -1 if the name is not
   4089      *         found.
   4090      */
   4091     public static int getCharFromExtendedName(String name){
   4092         return UCharacterName.INSTANCE.getCharFromName(
   4093                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
   4094     }
   4095 
   4096     /**
   4097      * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return
   4098      * its code point value. All Unicode names are in uppercase.
   4099      * Note calling any methods related to code point names, e.g. get*Name*()
   4100      * incurs a one-time initialisation cost to construct the name tables.
   4101      * @param name Unicode name alias whose code point is to be returned
   4102      * @return code point or -1 if name is not found
   4103      */
   4104     public static int getCharFromNameAlias(String name){
   4105         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
   4106     }
   4107 
   4108     /**
   4109      * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the
   4110      * Unicode database file PropertyAliases.txt.  Most properties
   4111      * have more than one name.  The nameChoice determines which one
   4112      * is returned.
   4113      *
   4114      * In addition, this function maps the property
   4115      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
   4116      * "General_Category_Mask".  These names are not in
   4117      * PropertyAliases.txt.
   4118      *
   4119      * @param property UProperty selector.
   4120      *
   4121      * @param nameChoice UProperty.NameChoice selector for which name
   4122      * to get.  All properties have a long name.  Most have a short
   4123      * name, but some do not.  Unicode allows for additional names; if
   4124      * present these will be returned by UProperty.NameChoice.LONG + i,
   4125      * where i=1, 2,...
   4126      *
   4127      * @return a name, or null if Unicode explicitly defines no name
   4128      * ("n/a") for a given property/nameChoice.  If a given nameChoice
   4129      * throws an exception, then all larger values of nameChoice will
   4130      * throw an exception.  If null is returned for a given
   4131      * nameChoice, then other nameChoice values may return non-null
   4132      * results.
   4133      *
   4134      * @exception IllegalArgumentException thrown if property or
   4135      * nameChoice are invalid.
   4136      *
   4137      * @see UProperty
   4138      * @see UProperty.NameChoice
   4139      */
   4140     public static String getPropertyName(int property,
   4141             int nameChoice) {
   4142         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
   4143     }
   4144 
   4145     /**
   4146      * <strong>[icu]</strong> Return the UProperty selector for a given property name, as
   4147      * specified in the Unicode database file PropertyAliases.txt.
   4148      * Short, long, and any other variants are recognized.
   4149      *
   4150      * In addition, this function maps the synthetic names "gcm" /
   4151      * "General_Category_Mask" to the property
   4152      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
   4153      * PropertyAliases.txt.
   4154      *
   4155      * @param propertyAlias the property name to be matched.  The name
   4156      * is compared using "loose matching" as described in
   4157      * PropertyAliases.txt.
   4158      *
   4159      * @return a UProperty enum.
   4160      *
   4161      * @exception IllegalArgumentException thrown if propertyAlias
   4162      * is not recognized.
   4163      *
   4164      * @see UProperty
   4165      */
   4166     public static int getPropertyEnum(CharSequence propertyAlias) {
   4167         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
   4168         if (propEnum == UProperty.UNDEFINED) {
   4169             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
   4170         }
   4171         return propEnum;
   4172     }
   4173 
   4174     /**
   4175      * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in
   4176      * the Unicode database file PropertyValueAliases.txt.  Most
   4177      * values have more than one name.  The nameChoice determines
   4178      * which one is returned.
   4179      *
   4180      * Note: Some of the names in PropertyValueAliases.txt can only be
   4181      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
   4182      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
   4183      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
   4184      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
   4185      *
   4186      * @param property UProperty selector constant.
   4187      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
   4188      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
   4189      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
   4190      * If out of range, null is returned.
   4191      *
   4192      * @param value selector for a value for the given property.  In
   4193      * general, valid values range from 0 up to some maximum.  There
   4194      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
   4195      * non-zero value BASIC_LATIN.getID().  (2.)
   4196      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
   4197      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
   4198      * are mask values produced by left-shifting 1 by
   4199      * UCharacter.getType().  This allows grouped categories such as
   4200      * [:L:] to be represented.  Mask values are non-contiguous.
   4201      *
   4202      * @param nameChoice UProperty.NameChoice selector for which name
   4203      * to get.  All values have a long name.  Most have a short name,
   4204      * but some do not.  Unicode allows for additional names; if
   4205      * present these will be returned by UProperty.NameChoice.LONG + i,
   4206      * where i=1, 2,...
   4207      *
   4208      * @return a name, or null if Unicode explicitly defines no name
   4209      * ("n/a") for a given property/value/nameChoice.  If a given
   4210      * nameChoice throws an exception, then all larger values of
   4211      * nameChoice will throw an exception.  If null is returned for a
   4212      * given nameChoice, then other nameChoice values may return
   4213      * non-null results.
   4214      *
   4215      * @exception IllegalArgumentException thrown if property, value,
   4216      * or nameChoice are invalid.
   4217      *
   4218      * @see UProperty
   4219      * @see UProperty.NameChoice
   4220      */
   4221     public static String getPropertyValueName(int property,
   4222             int value,
   4223             int nameChoice)
   4224     {
   4225         if ((property == UProperty.CANONICAL_COMBINING_CLASS
   4226                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
   4227                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
   4228                 && value >= UCharacter.getIntPropertyMinValue(
   4229                         UProperty.CANONICAL_COMBINING_CLASS)
   4230                         && value <= UCharacter.getIntPropertyMaxValue(
   4231                                 UProperty.CANONICAL_COMBINING_CLASS)
   4232                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
   4233             // this is hard coded for the valid cc
   4234             // because PropertyValueAliases.txt does not contain all of them
   4235             try {
   4236                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
   4237                         nameChoice);
   4238             }
   4239             catch (IllegalArgumentException e) {
   4240                 return null;
   4241             }
   4242         }
   4243         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
   4244     }
   4245 
   4246     /**
   4247      * <strong>[icu]</strong> Return the property value integer for a given value name, as
   4248      * specified in the Unicode database file PropertyValueAliases.txt.
   4249      * Short, long, and any other variants are recognized.
   4250      *
   4251      * Note: Some of the names in PropertyValueAliases.txt will only be
   4252      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
   4253      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
   4254      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
   4255      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
   4256      *
   4257      * @param property UProperty selector constant.
   4258      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
   4259      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
   4260      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
   4261      * Only these properties can be enumerated.
   4262      *
   4263      * @param valueAlias the value name to be matched.  The name is
   4264      * compared using "loose matching" as described in
   4265      * PropertyValueAliases.txt.
   4266      *
   4267      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
   4268      * values are mask values produced by left-shifting 1 by
   4269      * UCharacter.getType().  This allows grouped categories such as
   4270      * [:L:] to be represented.
   4271      *
   4272      * @see UProperty
   4273      * @throws IllegalArgumentException if property is not a valid UProperty
   4274      *         selector or valueAlias is not a value of this property
   4275      */
   4276     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
   4277         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
   4278         if (propEnum == UProperty.UNDEFINED) {
   4279             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
   4280         }
   4281         return propEnum;
   4282     }
   4283 
   4284     /**
   4285      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
   4286      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
   4287      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
   4288      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
   4289      * @deprecated This API is ICU internal only.
   4290      * @hide original deprecated declaration
   4291      * @hide draft / provisional / internal are hidden on Android
   4292      */
   4293     @Deprecated
   4294     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
   4295         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
   4296     }
   4297 
   4298 
   4299     /**
   4300      * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units.
   4301      *
   4302      * @param lead the lead char
   4303      * @param trail the trail char
   4304      * @return code point if surrogate characters are valid.
   4305      * @exception IllegalArgumentException thrown when the code units do
   4306      *            not form a valid code point
   4307      */
   4308     public static int getCodePoint(char lead, char trail)
   4309     {
   4310         if (Character.isSurrogatePair(lead, trail)) {
   4311             return Character.toCodePoint(lead, trail);
   4312         }
   4313         throw new IllegalArgumentException("Illegal surrogate characters");
   4314     }
   4315 
   4316     /**
   4317      * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point.
   4318      *
   4319      * @param char16 the BMP code point
   4320      * @return code point if argument is a valid character.
   4321      * @exception IllegalArgumentException thrown when char16 is not a valid
   4322      *            code point
   4323      */
   4324     public static int getCodePoint(char char16)
   4325     {
   4326         if (UCharacter.isLegal(char16)) {
   4327             return char16;
   4328         }
   4329         throw new IllegalArgumentException("Illegal codepoint");
   4330     }
   4331 
   4332     /**
   4333      * Returns the uppercase version of the argument string.
   4334      * Casing is dependent on the default locale and context-sensitive.
   4335      * @param str source string to be performed on
   4336      * @return uppercase version of the argument string
   4337      */
   4338     public static String toUpperCase(String str)
   4339     {
   4340         return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str);
   4341     }
   4342 
   4343     /**
   4344      * Returns the lowercase version of the argument string.
   4345      * Casing is dependent on the default locale and context-sensitive
   4346      * @param str source string to be performed on
   4347      * @return lowercase version of the argument string
   4348      */
   4349     public static String toLowerCase(String str)
   4350     {
   4351         return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str);
   4352     }
   4353 
   4354     /**
   4355      * <p>Returns the titlecase version of the argument string.
   4356      * <p>Position for titlecasing is determined by the argument break
   4357      * iterator, hence the user can customize his break iterator for
   4358      * a specialized titlecasing. In this case only the forward iteration
   4359      * needs to be implemented.
   4360      * If the break iterator passed in is null, the default Unicode algorithm
   4361      * will be used to determine the titlecase positions.
   4362      *
   4363      * <p>Only positions returned by the break iterator will be title cased,
   4364      * character in between the positions will all be in lower case.
   4365      * <p>Casing is dependent on the default locale and context-sensitive
   4366      * @param str source string to be performed on
   4367      * @param breakiter break iterator to determine the positions in which
   4368      *        the character should be title cased.
   4369      * @return titlecase version of the argument string
   4370      */
   4371     public static String toTitleCase(String str, BreakIterator breakiter)
   4372     {
   4373         return toTitleCase(Locale.getDefault(), str, breakiter, 0);
   4374     }
   4375 
   4376     private static int getDefaultCaseLocale() {
   4377         return UCaseProps.getCaseLocale(Locale.getDefault());
   4378     }
   4379 
   4380     private static int getCaseLocale(Locale locale) {
   4381         if (locale == null) {
   4382             locale = Locale.getDefault();
   4383         }
   4384         return UCaseProps.getCaseLocale(locale);
   4385     }
   4386 
   4387     private static int getCaseLocale(ULocale locale) {
   4388         if (locale == null) {
   4389             locale = ULocale.getDefault();
   4390         }
   4391         return UCaseProps.getCaseLocale(locale);
   4392     }
   4393 
   4394     /**
   4395      * Returns the uppercase version of the argument string.
   4396      * Casing is dependent on the argument locale and context-sensitive.
   4397      * @param locale which string is to be converted in
   4398      * @param str source string to be performed on
   4399      * @return uppercase version of the argument string
   4400      */
   4401     public static String toUpperCase(Locale locale, String str)
   4402     {
   4403         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
   4404     }
   4405 
   4406     /**
   4407      * Returns the uppercase version of the argument string.
   4408      * Casing is dependent on the argument locale and context-sensitive.
   4409      * @param locale which string is to be converted in
   4410      * @param str source string to be performed on
   4411      * @return uppercase version of the argument string
   4412      */
   4413     public static String toUpperCase(ULocale locale, String str) {
   4414         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
   4415     }
   4416 
   4417     /**
   4418      * Returns the lowercase version of the argument string.
   4419      * Casing is dependent on the argument locale and context-sensitive
   4420      * @param locale which string is to be converted in
   4421      * @param str source string to be performed on
   4422      * @return lowercase version of the argument string
   4423      */
   4424     public static String toLowerCase(Locale locale, String str)
   4425     {
   4426         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
   4427     }
   4428 
   4429     /**
   4430      * Returns the lowercase version of the argument string.
   4431      * Casing is dependent on the argument locale and context-sensitive
   4432      * @param locale which string is to be converted in
   4433      * @param str source string to be performed on
   4434      * @return lowercase version of the argument string
   4435      */
   4436     public static String toLowerCase(ULocale locale, String str) {
   4437         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
   4438     }
   4439 
   4440     /**
   4441      * <p>Returns the titlecase version of the argument string.
   4442      * <p>Position for titlecasing is determined by the argument break
   4443      * iterator, hence the user can customize his break iterator for
   4444      * a specialized titlecasing. In this case only the forward iteration
   4445      * needs to be implemented.
   4446      * If the break iterator passed in is null, the default Unicode algorithm
   4447      * will be used to determine the titlecase positions.
   4448      *
   4449      * <p>Only positions returned by the break iterator will be title cased,
   4450      * character in between the positions will all be in lower case.
   4451      * <p>Casing is dependent on the argument locale and context-sensitive
   4452      * @param locale which string is to be converted in
   4453      * @param str source string to be performed on
   4454      * @param breakiter break iterator to determine the positions in which
   4455      *        the character should be title cased.
   4456      * @return titlecase version of the argument string
   4457      */
   4458     public static String toTitleCase(Locale locale, String str,
   4459             BreakIterator breakiter)
   4460     {
   4461         return toTitleCase(locale, str, breakiter, 0);
   4462     }
   4463 
   4464     /**
   4465      * <p>Returns the titlecase version of the argument string.
   4466      * <p>Position for titlecasing is determined by the argument break
   4467      * iterator, hence the user can customize his break iterator for
   4468      * a specialized titlecasing. In this case only the forward iteration
   4469      * needs to be implemented.
   4470      * If the break iterator passed in is null, the default Unicode algorithm
   4471      * will be used to determine the titlecase positions.
   4472      *
   4473      * <p>Only positions returned by the break iterator will be title cased,
   4474      * character in between the positions will all be in lower case.
   4475      * <p>Casing is dependent on the argument locale and context-sensitive
   4476      * @param locale which string is to be converted in
   4477      * @param str source string to be performed on
   4478      * @param titleIter break iterator to determine the positions in which
   4479      *        the character should be title cased.
   4480      * @return titlecase version of the argument string
   4481      */
   4482     public static String toTitleCase(ULocale locale, String str,
   4483             BreakIterator titleIter) {
   4484         return toTitleCase(locale, str, titleIter, 0);
   4485     }
   4486 
   4487     /**
   4488      * <p>Returns the titlecase version of the argument string.
   4489      * <p>Position for titlecasing is determined by the argument break
   4490      * iterator, hence the user can customize his break iterator for
   4491      * a specialized titlecasing. In this case only the forward iteration
   4492      * needs to be implemented.
   4493      * If the break iterator passed in is null, the default Unicode algorithm
   4494      * will be used to determine the titlecase positions.
   4495      *
   4496      * <p>Only positions returned by the break iterator will be title cased,
   4497      * character in between the positions will all be in lower case.
   4498      * <p>Casing is dependent on the argument locale and context-sensitive
   4499      * @param locale which string is to be converted in
   4500      * @param str source string to be performed on
   4501      * @param titleIter break iterator to determine the positions in which
   4502      *        the character should be title cased.
   4503      * @param options bit set to modify the titlecasing operation
   4504      * @return titlecase version of the argument string
   4505      * @see #TITLECASE_NO_LOWERCASE
   4506      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
   4507      */
   4508     public static String toTitleCase(ULocale locale, String str,
   4509             BreakIterator titleIter, int options) {
   4510         if (titleIter == null && locale == null) {
   4511             locale = ULocale.getDefault();
   4512         }
   4513         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
   4514         titleIter.setText(str);
   4515         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
   4516     }
   4517 
   4518     /**
   4519      * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
   4520      * and sometimes has no effect at all; the original string is returned whenever casing
   4521      * would not be appropriate for the first word (such as for CJK characters or initial numbers).
   4522      * Initial non-letters are skipped in order to find the character to change.
   4523      * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
   4524      * <p>Examples:
   4525      * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
   4526      * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
   4527      * <tr><td>contact us</td><td>Contact us</td></tr>
   4528      * <tr><td>49ers win!</td><td>49ers win!</td></tr>
   4529      * <tr><td>(abc)</td><td>(abc)</td></tr>
   4530      * <tr><td>ijs</td><td>Ijs</td></tr>
   4531      * <tr><td>ijs</td><td>IJs</td><td>nl-BE</td></tr>
   4532      * <tr><td>ijs</td><td>js</td><td>tr-DE</td></tr>
   4533      * </table>
   4534      * @param locale the locale for accessing exceptional behavior (eg for tr).
   4535      * @param str the source string to change
   4536      * @return the modified string, or the original if no modifications were necessary.
   4537      * @deprecated ICU internal only
   4538      * @hide original deprecated declaration
   4539      * @hide draft / provisional / internal are hidden on Android
   4540      */
   4541     @Deprecated
   4542     public static String toTitleFirst(ULocale locale, String str) {
   4543         // TODO: Remove this function. Inline it where it is called in CLDR.
   4544         return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, str);
   4545     }
   4546 
   4547     private static final android.icu.text.CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE =
   4548             android.icu.text.CaseMap.toTitle().wholeString().noLowercase();
   4549 
   4550     /**
   4551      * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string.
   4552      * <p>Position for titlecasing is determined by the argument break
   4553      * iterator, hence the user can customize his break iterator for
   4554      * a specialized titlecasing. In this case only the forward iteration
   4555      * needs to be implemented.
   4556      * If the break iterator passed in is null, the default Unicode algorithm
   4557      * will be used to determine the titlecase positions.
   4558      *
   4559      * <p>Only positions returned by the break iterator will be title cased,
   4560      * character in between the positions will all be in lower case.
   4561      * <p>Casing is dependent on the argument locale and context-sensitive
   4562      * @param locale which string is to be converted in
   4563      * @param str source string to be performed on
   4564      * @param titleIter break iterator to determine the positions in which
   4565      *        the character should be title cased.
   4566      * @param options bit set to modify the titlecasing operation
   4567      * @return titlecase version of the argument string
   4568      * @see #TITLECASE_NO_LOWERCASE
   4569      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
   4570      */
   4571     public static String toTitleCase(Locale locale, String str,
   4572             BreakIterator titleIter,
   4573             int options) {
   4574         if (titleIter == null && locale == null) {
   4575             locale = Locale.getDefault();
   4576         }
   4577         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
   4578         titleIter.setText(str);
   4579         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
   4580     }
   4581 
   4582     /**
   4583      * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
   4584      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
   4585      * folding equivalent, the character itself is returned.
   4586      *
   4587      * <p>This function only returns the simple, single-code point case mapping.
   4588      * Full case mappings should be used whenever possible because they produce
   4589      * better results by working on whole strings.
   4590      * They can map to a result string with a different length as appropriate.
   4591      * Full case mappings are applied by the case mapping functions
   4592      * that take String parameters rather than code points (int).
   4593      * See also the User Guide chapter on C/POSIX migration:
   4594      * http://www.icu-project.org/userguide/posix.html#case_mappings
   4595      *
   4596      * @param ch             the character to be converted
   4597      * @param defaultmapping Indicates whether the default mappings defined in
   4598      *                       CaseFolding.txt are to be used, otherwise the
   4599      *                       mappings for dotted I and dotless i marked with
   4600      *                       'T' in CaseFolding.txt are included.
   4601      * @return               the case folding equivalent of the character, if
   4602      *                       any; otherwise the character itself.
   4603      * @see                  #foldCase(String, boolean)
   4604      */
   4605     public static int foldCase(int ch, boolean defaultmapping) {
   4606         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
   4607     }
   4608 
   4609     /**
   4610      * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
   4611      * UnicodeData.txt and CaseFolding.txt; if any character has no case
   4612      * folding equivalent, the character itself is returned.
   4613      * "Full", multiple-code point case folding mappings are returned here.
   4614      * For "simple" single-code point mappings use the API
   4615      * foldCase(int ch, boolean defaultmapping).
   4616      * @param str            the String to be converted
   4617      * @param defaultmapping Indicates whether the default mappings defined in
   4618      *                       CaseFolding.txt are to be used, otherwise the
   4619      *                       mappings for dotted I and dotless i marked with
   4620      *                       'T' in CaseFolding.txt are included.
   4621      * @return               the case folding equivalent of the character, if
   4622      *                       any; otherwise the character itself.
   4623      * @see                  #foldCase(int, boolean)
   4624      */
   4625     public static String foldCase(String str, boolean defaultmapping) {
   4626         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
   4627     }
   4628 
   4629     /**
   4630      * <strong>[icu]</strong> Option value for case folding: use default mappings defined in
   4631      * CaseFolding.txt.
   4632      */
   4633     public static final int FOLD_CASE_DEFAULT    =      0x0000;
   4634     /**
   4635      * <strong>[icu]</strong> Option value for case folding:
   4636      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
   4637      * and dotless i appropriately for Turkic languages (tr, az).
   4638      *
   4639      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
   4640      * are to be included for default mappings and
   4641      * excluded for the Turkic-specific mappings.
   4642      *
   4643      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
   4644      * are to be excluded for default mappings and
   4645      * included for the Turkic-specific mappings.
   4646      */
   4647     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
   4648 
   4649     /**
   4650      * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
   4651      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
   4652      * folding equivalent, the character itself is returned.
   4653      *
   4654      * <p>This function only returns the simple, single-code point case mapping.
   4655      * Full case mappings should be used whenever possible because they produce
   4656      * better results by working on whole strings.
   4657      * They can map to a result string with a different length as appropriate.
   4658      * Full case mappings are applied by the case mapping functions
   4659      * that take String parameters rather than code points (int).
   4660      * See also the User Guide chapter on C/POSIX migration:
   4661      * http://www.icu-project.org/userguide/posix.html#case_mappings
   4662      *
   4663      * @param ch the character to be converted
   4664      * @param options A bit set for special processing. Currently the recognised options
   4665      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
   4666      * @return the case folding equivalent of the character, if any; otherwise the
   4667      * character itself.
   4668      * @see #foldCase(String, boolean)
   4669      */
   4670     public static int foldCase(int ch, int options) {
   4671         return UCaseProps.INSTANCE.fold(ch, options);
   4672     }
   4673 
   4674     /**
   4675      * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
   4676      * UnicodeData.txt and CaseFolding.txt; if any character has no case
   4677      * folding equivalent, the character itself is returned.
   4678      * "Full", multiple-code point case folding mappings are returned here.
   4679      * For "simple" single-code point mappings use the API
   4680      * foldCase(int ch, boolean defaultmapping).
   4681      * @param str the String to be converted
   4682      * @param options A bit set for special processing. Currently the recognised options
   4683      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
   4684      * @return the case folding equivalent of the character, if any; otherwise the
   4685      *         character itself.
   4686      * @see #foldCase(int, boolean)
   4687      */
   4688     public static final String foldCase(String str, int options) {
   4689         return CaseMapImpl.fold(options, str);
   4690     }
   4691 
   4692     /**
   4693      * <strong>[icu]</strong> Returns the numeric value of a Han character.
   4694      *
   4695      * <p>This returns the value of Han 'numeric' code points,
   4696      * including those for zero, ten, hundred, thousand, ten thousand,
   4697      * and hundred million.
   4698      * This includes both the standard and 'checkwriting'
   4699      * characters, the 'big circle' zero character, and the standard
   4700      * zero character.
   4701      *
   4702      * <p>Note: The Unicode Standard has numeric values for more
   4703      * Han characters recognized by this method
   4704      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
   4705      * and a {@link android.icu.text.NumberFormat} can be used with
   4706      * a Chinese {@link android.icu.text.NumberingSystem}.
   4707      *
   4708      * @param ch code point to query
   4709      * @return value if it is a Han 'numeric character,' otherwise return -1.
   4710      */
   4711     public static int getHanNumericValue(int ch)
   4712     {
   4713         switch(ch)
   4714         {
   4715         case IDEOGRAPHIC_NUMBER_ZERO_ :
   4716         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
   4717             return 0; // Han Zero
   4718         case CJK_IDEOGRAPH_FIRST_ :
   4719         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
   4720             return 1; // Han One
   4721         case CJK_IDEOGRAPH_SECOND_ :
   4722         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
   4723             return 2; // Han Two
   4724         case CJK_IDEOGRAPH_THIRD_ :
   4725         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
   4726             return 3; // Han Three
   4727         case CJK_IDEOGRAPH_FOURTH_ :
   4728         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
   4729             return 4; // Han Four
   4730         case CJK_IDEOGRAPH_FIFTH_ :
   4731         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
   4732             return 5; // Han Five
   4733         case CJK_IDEOGRAPH_SIXTH_ :
   4734         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
   4735             return 6; // Han Six
   4736         case CJK_IDEOGRAPH_SEVENTH_ :
   4737         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
   4738             return 7; // Han Seven
   4739         case CJK_IDEOGRAPH_EIGHTH_ :
   4740         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
   4741             return 8; // Han Eight
   4742         case CJK_IDEOGRAPH_NINETH_ :
   4743         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
   4744             return 9; // Han Nine
   4745         case CJK_IDEOGRAPH_TEN_ :
   4746         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
   4747             return 10;
   4748         case CJK_IDEOGRAPH_HUNDRED_ :
   4749         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
   4750             return 100;
   4751         case CJK_IDEOGRAPH_THOUSAND_ :
   4752         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
   4753             return 1000;
   4754         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
   4755             return 10000;
   4756         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
   4757             return 100000000;
   4758         }
   4759         return -1; // no value
   4760     }
   4761 
   4762     /**
   4763      * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints.
   4764      * <p>Example of use:<br>
   4765      * <pre>
   4766      * RangeValueIterator iterator = UCharacter.getTypeIterator();
   4767      * RangeValueIterator.Element element = new RangeValueIterator.Element();
   4768      * while (iterator.next(element)) {
   4769      *     System.out.println("Codepoint \\u" +
   4770      *                        Integer.toHexString(element.start) +
   4771      *                        " to codepoint \\u" +
   4772      *                        Integer.toHexString(element.limit - 1) +
   4773      *                        " has the character type " +
   4774      *                        element.value);
   4775      * }
   4776      * </pre>
   4777      * @return an iterator
   4778      */
   4779     public static RangeValueIterator getTypeIterator()
   4780     {
   4781         return new UCharacterTypeIterator();
   4782     }
   4783 
   4784     private static final class UCharacterTypeIterator implements RangeValueIterator {
   4785         UCharacterTypeIterator() {
   4786             reset();
   4787         }
   4788 
   4789         // implements RangeValueIterator
   4790         @Override
   4791         public boolean next(Element element) {
   4792             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
   4793                 element.start=range.startCodePoint;
   4794                 element.limit=range.endCodePoint+1;
   4795                 element.value=range.value;
   4796                 return true;
   4797             } else {
   4798                 return false;
   4799             }
   4800         }
   4801 
   4802         // implements RangeValueIterator
   4803         @Override
   4804         public void reset() {
   4805             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
   4806         }
   4807 
   4808         private Iterator<Trie2.Range> trieIterator;
   4809         private Trie2.Range range;
   4810 
   4811         private static final class MaskType implements Trie2.ValueMapper {
   4812             // Extracts the general category ("character type") from the trie value.
   4813             @Override
   4814             public int map(int value) {
   4815                 return value & UCharacterProperty.TYPE_MASK;
   4816             }
   4817         }
   4818         private static final MaskType MASK_TYPE=new MaskType();
   4819     }
   4820 
   4821     /**
   4822      * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
   4823      * <p>This API only gets the iterator for the modern, most up-to-date
   4824      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
   4825      * for extended names use getExtendedNameIterator().
   4826      * <p>Example of use:<br>
   4827      * <pre>
   4828      * ValueIterator iterator = UCharacter.getNameIterator();
   4829      * ValueIterator.Element element = new ValueIterator.Element();
   4830      * while (iterator.next(element)) {
   4831      *     System.out.println("Codepoint \\u" +
   4832      *                        Integer.toHexString(element.codepoint) +
   4833      *                        " has the name " + (String)element.value);
   4834      * }
   4835      * </pre>
   4836      * <p>The maximal range which the name iterator iterates is from
   4837      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
   4838      * @return an iterator
   4839      */
   4840     public static ValueIterator getNameIterator(){
   4841         return new UCharacterNameIterator(UCharacterName.INSTANCE,
   4842                 UCharacterNameChoice.UNICODE_CHAR_NAME);
   4843     }
   4844 
   4845     /**
   4846      * <strong>[icu]</strong> Returns an empty iterator.
   4847      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
   4848      * @return an empty iterator
   4849      * @deprecated ICU 49
   4850      * @see #getName1_0(int)
   4851      * @hide original deprecated declaration
   4852      */
   4853     @Deprecated
   4854     public static ValueIterator getName1_0Iterator(){
   4855         return new DummyValueIterator();
   4856     }
   4857 
   4858     private static final class DummyValueIterator implements ValueIterator {
   4859         @Override
   4860         public boolean next(Element element) { return false; }
   4861         @Override
   4862         public void reset() {}
   4863         @Override
   4864         public void setRange(int start, int limit) {}
   4865     }
   4866 
   4867     /**
   4868      * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
   4869      * <p>This API only gets the iterator for the extended names.
   4870      * For modern, most up-to-date Unicode names use getNameIterator() or
   4871      * for older 1.0 Unicode names use get1_0NameIterator().
   4872      * <p>Example of use:<br>
   4873      * <pre>
   4874      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
   4875      * ValueIterator.Element element = new ValueIterator.Element();
   4876      * while (iterator.next(element)) {
   4877      *     System.out.println("Codepoint \\u" +
   4878      *                        Integer.toHexString(element.codepoint) +
   4879      *                        " has the name " + (String)element.value);
   4880      * }
   4881      * </pre>
   4882      * <p>The maximal range which the name iterator iterates is from
   4883      * @return an iterator
   4884      */
   4885     public static ValueIterator getExtendedNameIterator(){
   4886         return new UCharacterNameIterator(UCharacterName.INSTANCE,
   4887                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
   4888     }
   4889 
   4890     /**
   4891      * <strong>[icu]</strong> Returns the "age" of the code point.
   4892      * <p>The "age" is the Unicode version when the code point was first
   4893      * designated (as a non-character or for Private Use) or assigned a
   4894      * character.
   4895      * <p>This can be useful to avoid emitting code points to receiving
   4896      * processes that do not accept newer characters.
   4897      * <p>The data is from the UCD file DerivedAge.txt.
   4898      * @param ch The code point.
   4899      * @return the Unicode version number
   4900      */
   4901     public static VersionInfo getAge(int ch)
   4902     {
   4903         if (ch < MIN_VALUE || ch > MAX_VALUE) {
   4904             throw new IllegalArgumentException("Codepoint out of bounds");
   4905         }
   4906         return UCharacterProperty.INSTANCE.getAge(ch);
   4907     }
   4908 
   4909     /**
   4910      * <strong>[icu]</strong> <p>Check a binary Unicode property for a code point.
   4911      * <p>Unicode, especially in version 3.2, defines many more properties
   4912      * than the original set in UnicodeData.txt.
   4913      * <p>This API is intended to reflect Unicode properties as defined in
   4914      * the Unicode Character Database (UCD) and Unicode Technical Reports
   4915      * (UTR).
   4916      * <p>For details about the properties see
   4917      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
   4918      * <p>For names of Unicode properties see the UCD file
   4919      * PropertyAliases.txt.
   4920      * <p>This API does not check the validity of the codepoint.
   4921      * <p>Important: If ICU is built with UCD files from Unicode versions
   4922      * below 3.2, then properties marked with "new" are not or
   4923      * not fully available.
   4924      * @param ch code point to test.
   4925      * @param property selector constant from android.icu.lang.UProperty,
   4926      *        identifies which binary property to check.
   4927      * @return true or false according to the binary Unicode property value
   4928      *         for ch. Also false if property is out of bounds or if the
   4929      *         Unicode version does not have data for the property at all, or
   4930      *         not for this code point.
   4931      * @see android.icu.lang.UProperty
   4932      */
   4933     public static boolean hasBinaryProperty(int ch, int property)
   4934     {
   4935         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
   4936     }
   4937 
   4938     /**
   4939      * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property.
   4940      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
   4941      * <p>Different from UCharacter.isLetter(ch)!
   4942      * @param ch codepoint to be tested
   4943      */
   4944     public static boolean isUAlphabetic(int ch)
   4945     {
   4946         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
   4947     }
   4948 
   4949     /**
   4950      * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property.
   4951      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
   4952      * <p>This is different from UCharacter.isLowerCase(ch)!
   4953      * @param ch codepoint to be tested
   4954      */
   4955     public static boolean isULowercase(int ch)
   4956     {
   4957         return hasBinaryProperty(ch, UProperty.LOWERCASE);
   4958     }
   4959 
   4960     /**
   4961      * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property.
   4962      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
   4963      * <p>This is different from UCharacter.isUpperCase(ch)!
   4964      * @param ch codepoint to be tested
   4965      */
   4966     public static boolean isUUppercase(int ch)
   4967     {
   4968         return hasBinaryProperty(ch, UProperty.UPPERCASE);
   4969     }
   4970 
   4971     /**
   4972      * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property.
   4973      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
   4974      * <p>This is different from both UCharacter.isSpace(ch) and
   4975      * UCharacter.isWhitespace(ch)!
   4976      * @param ch codepoint to be tested
   4977      */
   4978     public static boolean isUWhiteSpace(int ch)
   4979     {
   4980         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
   4981     }
   4982 
   4983     /**
   4984      * <strong>[icu]</strong> <p>Returns the property value for an Unicode property type of a code point.
   4985      * Also returns binary and mask property values.
   4986      * <p>Unicode, especially in version 3.2, defines many more properties than
   4987      * the original set in UnicodeData.txt.
   4988      * <p>The properties APIs are intended to reflect Unicode properties as
   4989      * defined in the Unicode Character Database (UCD) and Unicode Technical
   4990      * Reports (UTR). For details about the properties see
   4991      * http://www.unicode.org/.
   4992      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
   4993      *
   4994      * <pre>
   4995      * Sample usage:
   4996      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
   4997      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
   4998      * boolean b = (ideo == 1) ? true : false;
   4999      * </pre>
   5000      * @param ch code point to test.
   5001      * @param type UProperty selector constant, identifies which binary
   5002      *        property to check. Must be
   5003      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
   5004      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
   5005      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
   5006      * @return numeric value that is directly the property value or,
   5007      *         for enumerated properties, corresponds to the numeric value of
   5008      *         the enumerated constant of the respective property value
   5009      *         enumeration type (cast to enum type if necessary).
   5010      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
   5011      *         Returns a bit-mask for mask properties.
   5012      *         Returns 0 if 'type' is out of bounds or if the Unicode version
   5013      *         does not have data for the property at all, or not for this code
   5014      *         point.
   5015      * @see UProperty
   5016      * @see #hasBinaryProperty
   5017      * @see #getIntPropertyMinValue
   5018      * @see #getIntPropertyMaxValue
   5019      * @see #getUnicodeVersion
   5020      */
   5021     public static int getIntPropertyValue(int ch, int type)
   5022     {
   5023         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
   5024     }
   5025     /**
   5026      * <strong>[icu]</strong> Returns a string version of the property value.
   5027      * @param propertyEnum The property enum value.
   5028      * @param codepoint The codepoint value.
   5029      * @param nameChoice The choice of the name.
   5030      * @return value as string
   5031      * @deprecated This API is ICU internal only.
   5032      * @hide original deprecated declaration
   5033      * @hide draft / provisional / internal are hidden on Android
   5034      */
   5035     @Deprecated
   5036     ///CLOVER:OFF
   5037     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
   5038         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
   5039                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
   5040             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
   5041                     nameChoice);
   5042         }
   5043         if (propertyEnum == UProperty.NUMERIC_VALUE) {
   5044             return String.valueOf(getUnicodeNumericValue(codepoint));
   5045         }
   5046         // otherwise must be string property
   5047         switch (propertyEnum) {
   5048         case UProperty.AGE: return getAge(codepoint).toString();
   5049         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
   5050         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
   5051         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
   5052         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
   5053         case UProperty.NAME: return getName(codepoint);
   5054         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
   5055         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
   5056         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
   5057         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
   5058         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
   5059         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
   5060         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
   5061         }
   5062         throw new IllegalArgumentException("Illegal Property Enum");
   5063     }
   5064     ///CLOVER:ON
   5065 
   5066     /**
   5067      * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type.
   5068      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
   5069      * to allocate arrays of android.icu.text.UnicodeSet or similar.
   5070      * @param type UProperty selector constant, identifies which binary
   5071      *        property to check. Must be
   5072      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
   5073      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
   5074      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
   5075      *         for a Unicode property. 0 if the property
   5076      *         selector 'type' is out of range.
   5077      * @see UProperty
   5078      * @see #hasBinaryProperty
   5079      * @see #getUnicodeVersion
   5080      * @see #getIntPropertyMaxValue
   5081      * @see #getIntPropertyValue
   5082      */
   5083     public static int getIntPropertyMinValue(int type){
   5084 
   5085         return 0; // undefined; and: all other properties have a minimum value of 0
   5086     }
   5087 
   5088 
   5089     /**
   5090      * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property.
   5091      * Can be used together with UCharacter.getIntPropertyMinValue(int)
   5092      * to allocate arrays of android.icu.text.UnicodeSet or similar.
   5093      * Examples for min/max values (for Unicode 3.2):
   5094      * <ul>
   5095      * <li> UProperty.BIDI_CLASS:    0/18
   5096      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
   5097      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
   5098      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
   5099      * </ul>
   5100      * For undefined UProperty constant values, min/max values will be 0/-1.
   5101      * @param type UProperty selector constant, identifies which binary
   5102      *        property to check. Must be
   5103      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
   5104      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
   5105      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
   5106      *         property. &lt;= 0 if the property selector 'type' is out of range.
   5107      * @see UProperty
   5108      * @see #hasBinaryProperty
   5109      * @see #getUnicodeVersion
   5110      * @see #getIntPropertyMaxValue
   5111      * @see #getIntPropertyValue
   5112      */
   5113     public static int getIntPropertyMaxValue(int type)
   5114     {
   5115         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
   5116     }
   5117 
   5118     /**
   5119      * Provide the java.lang.Character forDigit API, for convenience.
   5120      */
   5121     public static char forDigit(int digit, int radix) {
   5122         return java.lang.Character.forDigit(digit, radix);
   5123     }
   5124 
   5125     // JDK 1.5 API coverage
   5126 
   5127     /**
   5128      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
   5129      */
   5130     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
   5131 
   5132     /**
   5133      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
   5134      */
   5135     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
   5136 
   5137     /**
   5138      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
   5139      */
   5140     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
   5141 
   5142     /**
   5143      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
   5144      */
   5145     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
   5146 
   5147     /**
   5148      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
   5149      */
   5150     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
   5151 
   5152     /**
   5153      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
   5154      */
   5155     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
   5156 
   5157     /**
   5158      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
   5159      */
   5160     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
   5161 
   5162     /**
   5163      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
   5164      */
   5165     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
   5166 
   5167     /**
   5168      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
   5169      */
   5170     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
   5171 
   5172     /**
   5173      * Equivalent to {@link Character#isValidCodePoint}.
   5174      *
   5175      * @param cp the code point to check
   5176      * @return true if cp is a valid code point
   5177      */
   5178     public static final boolean isValidCodePoint(int cp) {
   5179         return cp >= 0 && cp <= MAX_CODE_POINT;
   5180     }
   5181 
   5182     /**
   5183      * Same as {@link Character#isSupplementaryCodePoint}.
   5184      *
   5185      * @param cp the code point to check
   5186      * @return true if cp is a supplementary code point
   5187      */
   5188     public static final boolean isSupplementaryCodePoint(int cp) {
   5189         return Character.isSupplementaryCodePoint(cp);
   5190     }
   5191 
   5192     /**
   5193      * Same as {@link Character#isHighSurrogate}.
   5194      *
   5195      * @param ch the char to check
   5196      * @return true if ch is a high (lead) surrogate
   5197      */
   5198     public static boolean isHighSurrogate(char ch) {
   5199         return Character.isHighSurrogate(ch);
   5200     }
   5201 
   5202     /**
   5203      * Same as {@link Character#isLowSurrogate}.
   5204      *
   5205      * @param ch the char to check
   5206      * @return true if ch is a low (trail) surrogate
   5207      */
   5208     public static boolean isLowSurrogate(char ch) {
   5209         return Character.isLowSurrogate(ch);
   5210     }
   5211 
   5212     /**
   5213      * Same as {@link Character#isSurrogatePair}.
   5214      *
   5215      * @param high the high (lead) char
   5216      * @param low the low (trail) char
   5217      * @return true if high, low form a surrogate pair
   5218      */
   5219     public static final boolean isSurrogatePair(char high, char low) {
   5220         return Character.isSurrogatePair(high, low);
   5221     }
   5222 
   5223     /**
   5224      * Same as {@link Character#charCount}.
   5225      * Returns the number of chars needed to represent the code point (1 or 2).
   5226      * This does not check the code point for validity.
   5227      *
   5228      * @param cp the code point to check
   5229      * @return the number of chars needed to represent the code point
   5230      */
   5231     public static int charCount(int cp) {
   5232         return Character.charCount(cp);
   5233     }
   5234 
   5235     /**
   5236      * Same as {@link Character#toCodePoint}.
   5237      * Returns the code point represented by the two surrogate code units.
   5238      * This does not check the surrogate pair for validity.
   5239      *
   5240      * @param high the high (lead) surrogate
   5241      * @param low the low (trail) surrogate
   5242      * @return the code point formed by the surrogate pair
   5243      */
   5244     public static final int toCodePoint(char high, char low) {
   5245         return Character.toCodePoint(high, low);
   5246     }
   5247 
   5248     /**
   5249      * Same as {@link Character#codePointAt(CharSequence, int)}.
   5250      * Returns the code point at index.
   5251      * This examines only the characters at index and index+1.
   5252      *
   5253      * @param seq the characters to check
   5254      * @param index the index of the first or only char forming the code point
   5255      * @return the code point at the index
   5256      */
   5257     public static final int codePointAt(CharSequence seq, int index) {
   5258         char c1 = seq.charAt(index++);
   5259         if (isHighSurrogate(c1)) {
   5260             if (index < seq.length()) {
   5261                 char c2 = seq.charAt(index);
   5262                 if (isLowSurrogate(c2)) {
   5263                     return toCodePoint(c1, c2);
   5264                 }
   5265             }
   5266         }
   5267         return c1;
   5268     }
   5269 
   5270     /**
   5271      * Same as {@link Character#codePointAt(char[], int)}.
   5272      * Returns the code point at index.
   5273      * This examines only the characters at index and index+1.
   5274      *
   5275      * @param text the characters to check
   5276      * @param index the index of the first or only char forming the code point
   5277      * @return the code point at the index
   5278      */
   5279     public static final int codePointAt(char[] text, int index) {
   5280         char c1 = text[index++];
   5281         if (isHighSurrogate(c1)) {
   5282             if (index < text.length) {
   5283                 char c2 = text[index];
   5284                 if (isLowSurrogate(c2)) {
   5285                     return toCodePoint(c1, c2);
   5286                 }
   5287             }
   5288         }
   5289         return c1;
   5290     }
   5291 
   5292     /**
   5293      * Same as {@link Character#codePointAt(char[], int, int)}.
   5294      * Returns the code point at index.
   5295      * This examines only the characters at index and index+1.
   5296      *
   5297      * @param text the characters to check
   5298      * @param index the index of the first or only char forming the code point
   5299      * @param limit the limit of the valid text
   5300      * @return the code point at the index
   5301      */
   5302     public static final int codePointAt(char[] text, int index, int limit) {
   5303         if (index >= limit || limit > text.length) {
   5304             throw new IndexOutOfBoundsException();
   5305         }
   5306         char c1 = text[index++];
   5307         if (isHighSurrogate(c1)) {
   5308             if (index < limit) {
   5309                 char c2 = text[index];
   5310                 if (isLowSurrogate(c2)) {
   5311                     return toCodePoint(c1, c2);
   5312                 }
   5313             }
   5314         }
   5315         return c1;
   5316     }
   5317 
   5318     /**
   5319      * Same as {@link Character#codePointBefore(CharSequence, int)}.
   5320      * Return the code point before index.
   5321      * This examines only the characters at index-1 and index-2.
   5322      *
   5323      * @param seq the characters to check
   5324      * @param index the index after the last or only char forming the code point
   5325      * @return the code point before the index
   5326      */
   5327     public static final int codePointBefore(CharSequence seq, int index) {
   5328         char c2 = seq.charAt(--index);
   5329         if (isLowSurrogate(c2)) {
   5330             if (index > 0) {
   5331                 char c1 = seq.charAt(--index);
   5332                 if (isHighSurrogate(c1)) {
   5333                     return toCodePoint(c1, c2);
   5334                 }
   5335             }
   5336         }
   5337         return c2;
   5338     }
   5339 
   5340     /**
   5341      * Same as {@link Character#codePointBefore(char[], int)}.
   5342      * Returns the code point before index.
   5343      * This examines only the characters at index-1 and index-2.
   5344      *
   5345      * @param text the characters to check
   5346      * @param index the index after the last or only char forming the code point
   5347      * @return the code point before the index
   5348      */
   5349     public static final int codePointBefore(char[] text, int index) {
   5350         char c2 = text[--index];
   5351         if (isLowSurrogate(c2)) {
   5352             if (index > 0) {
   5353                 char c1 = text[--index];
   5354                 if (isHighSurrogate(c1)) {
   5355                     return toCodePoint(c1, c2);
   5356                 }
   5357             }
   5358         }
   5359         return c2;
   5360     }
   5361 
   5362     /**
   5363      * Same as {@link Character#codePointBefore(char[], int, int)}.
   5364      * Return the code point before index.
   5365      * This examines only the characters at index-1 and index-2.
   5366      *
   5367      * @param text the characters to check
   5368      * @param index the index after the last or only char forming the code point
   5369      * @param limit the start of the valid text
   5370      * @return the code point before the index
   5371      */
   5372     public static final int codePointBefore(char[] text, int index, int limit) {
   5373         if (index <= limit || limit < 0) {
   5374             throw new IndexOutOfBoundsException();
   5375         }
   5376         char c2 = text[--index];
   5377         if (isLowSurrogate(c2)) {
   5378             if (index > limit) {
   5379                 char c1 = text[--index];
   5380                 if (isHighSurrogate(c1)) {
   5381                     return toCodePoint(c1, c2);
   5382                 }
   5383             }
   5384         }
   5385         return c2;
   5386     }
   5387 
   5388     /**
   5389      * Same as {@link Character#toChars(int, char[], int)}.
   5390      * Writes the chars representing the
   5391      * code point into the destination at the given index.
   5392      *
   5393      * @param cp the code point to convert
   5394      * @param dst the destination array into which to put the char(s) representing the code point
   5395      * @param dstIndex the index at which to put the first (or only) char
   5396      * @return the count of the number of chars written (1 or 2)
   5397      * @throws IllegalArgumentException if cp is not a valid code point
   5398      */
   5399     public static final int toChars(int cp, char[] dst, int dstIndex) {
   5400         return Character.toChars(cp, dst, dstIndex);
   5401     }
   5402 
   5403     /**
   5404      * Same as {@link Character#toChars(int)}.
   5405      * Returns a char array representing the code point.
   5406      *
   5407      * @param cp the code point to convert
   5408      * @return an array containing the char(s) representing the code point
   5409      * @throws IllegalArgumentException if cp is not a valid code point
   5410      */
   5411     public static final char[] toChars(int cp) {
   5412         return Character.toChars(cp);
   5413     }
   5414 
   5415     /**
   5416      * Equivalent to the {@link Character#getDirectionality(char)} method, for
   5417      * convenience. Returns a byte representing the directionality of the
   5418      * character.
   5419      *
   5420      * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns
   5421      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
   5422      *
   5423      * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link
   5424      * UCharacterDirection} and its interface {@link
   5425      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
   5426      * defined by <code>java.lang.Character</code>.
   5427      * @param cp the code point to check
   5428      * @return the directionality of the code point
   5429      * @see #getDirection
   5430      */
   5431     public static byte getDirectionality(int cp)
   5432     {
   5433         return (byte)getDirection(cp);
   5434     }
   5435 
   5436     /**
   5437      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
   5438      * method, for convenience.  Counts the number of code points in the range
   5439      * of text.
   5440      * @param text the characters to check
   5441      * @param start the start of the range
   5442      * @param limit the limit of the range
   5443      * @return the number of code points in the range
   5444      */
   5445     public static int codePointCount(CharSequence text, int start, int limit) {
   5446         if (start < 0 || limit < start || limit > text.length()) {
   5447             throw new IndexOutOfBoundsException("start (" + start +
   5448                     ") or limit (" + limit +
   5449                     ") invalid or out of range 0, " + text.length());
   5450         }
   5451 
   5452         int len = limit - start;
   5453         while (limit > start) {
   5454             char ch = text.charAt(--limit);
   5455             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
   5456                 ch = text.charAt(--limit);
   5457                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
   5458                     --len;
   5459                     break;
   5460                 }
   5461             }
   5462         }
   5463         return len;
   5464     }
   5465 
   5466     /**
   5467      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
   5468      * convenience. Counts the number of code points in the range of text.
   5469      * @param text the characters to check
   5470      * @param start the start of the range
   5471      * @param limit the limit of the range
   5472      * @return the number of code points in the range
   5473      */
   5474     public static int codePointCount(char[] text, int start, int limit) {
   5475         if (start < 0 || limit < start || limit > text.length) {
   5476             throw new IndexOutOfBoundsException("start (" + start +
   5477                     ") or limit (" + limit +
   5478                     ") invalid or out of range 0, " + text.length);
   5479         }
   5480 
   5481         int len = limit - start;
   5482         while (limit > start) {
   5483             char ch = text[--limit];
   5484             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
   5485                 ch = text[--limit];
   5486                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
   5487                     --len;
   5488                     break;
   5489                 }
   5490             }
   5491         }
   5492         return len;
   5493     }
   5494 
   5495     /**
   5496      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
   5497      * method, for convenience.  Adjusts the char index by a code point offset.
   5498      * @param text the characters to check
   5499      * @param index the index to adjust
   5500      * @param codePointOffset the number of code points by which to offset the index
   5501      * @return the adjusted index
   5502      */
   5503     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
   5504         if (index < 0 || index > text.length()) {
   5505             throw new IndexOutOfBoundsException("index ( " + index +
   5506                     ") out of range 0, " + text.length());
   5507         }
   5508 
   5509         if (codePointOffset < 0) {
   5510             while (++codePointOffset <= 0) {
   5511                 char ch = text.charAt(--index);
   5512                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
   5513                     ch = text.charAt(--index);
   5514                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
   5515                         if (++codePointOffset > 0) {
   5516                             return index+1;
   5517                         }
   5518                     }
   5519                 }
   5520             }
   5521         } else {
   5522             int limit = text.length();
   5523             while (--codePointOffset >= 0) {
   5524                 char ch = text.charAt(index++);
   5525                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
   5526                     ch = text.charAt(index++);
   5527                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
   5528                         if (--codePointOffset < 0) {
   5529                             return index-1;
   5530                         }
   5531                     }
   5532                 }
   5533             }
   5534         }
   5535 
   5536         return index;
   5537     }
   5538 
   5539     /**
   5540      * Equivalent to the
   5541      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
   5542      * method, for convenience.  Adjusts the char index by a code point offset.
   5543      * @param text the characters to check
   5544      * @param start the start of the range to check
   5545      * @param count the length of the range to check
   5546      * @param index the index to adjust
   5547      * @param codePointOffset the number of code points by which to offset the index
   5548      * @return the adjusted index
   5549      */
   5550     public static int offsetByCodePoints(char[] text, int start, int count, int index,
   5551             int codePointOffset) {
   5552         int limit = start + count;
   5553         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
   5554             throw new IndexOutOfBoundsException("index ( " + index +
   5555                     ") out of range " + start +
   5556                     ", " + limit +
   5557                     " in array 0, " + text.length);
   5558         }
   5559 
   5560         if (codePointOffset < 0) {
   5561             while (++codePointOffset <= 0) {
   5562                 char ch = text[--index];
   5563                 if (index < start) {
   5564                     throw new IndexOutOfBoundsException("index ( " + index +
   5565                             ") < start (" + start +
   5566                             ")");
   5567                 }
   5568                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
   5569                     ch = text[--index];
   5570                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
   5571                         if (++codePointOffset > 0) {
   5572                             return index+1;
   5573                         }
   5574                     }
   5575                 }
   5576             }
   5577         } else {
   5578             while (--codePointOffset >= 0) {
   5579                 char ch = text[index++];
   5580                 if (index > limit) {
   5581                     throw new IndexOutOfBoundsException("index ( " + index +
   5582                             ") > limit (" + limit +
   5583                             ")");
   5584                 }
   5585                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
   5586                     ch = text[index++];
   5587                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
   5588                         if (--codePointOffset < 0) {
   5589                             return index-1;
   5590                         }
   5591                     }
   5592                 }
   5593             }
   5594         }
   5595 
   5596         return index;
   5597     }
   5598 
   5599     // private variables -------------------------------------------------
   5600 
   5601     /**
   5602      * To get the last character out from a data type
   5603      */
   5604     private static final int LAST_CHAR_MASK_ = 0xFFFF;
   5605 
   5606     //    /**
   5607     //     * To get the last byte out from a data type
   5608     //     */
   5609     //    private static final int LAST_BYTE_MASK_ = 0xFF;
   5610     //
   5611     //    /**
   5612     //     * Shift 16 bits
   5613     //     */
   5614     //    private static final int SHIFT_16_ = 16;
   5615     //
   5616     //    /**
   5617     //     * Shift 24 bits
   5618     //     */
   5619     //    private static final int SHIFT_24_ = 24;
   5620     //
   5621     //    /**
   5622     //     * Decimal radix
   5623     //     */
   5624     //    private static final int DECIMAL_RADIX_ = 10;
   5625 
   5626     /**
   5627      * No break space code point
   5628      */
   5629     private static final int NO_BREAK_SPACE_ = 0xA0;
   5630 
   5631     /**
   5632      * Figure space code point
   5633      */
   5634     private static final int FIGURE_SPACE_ = 0x2007;
   5635 
   5636     /**
   5637      * Narrow no break space code point
   5638      */
   5639     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
   5640 
   5641     /**
   5642      * Ideographic number zero code point
   5643      */
   5644     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
   5645 
   5646     /**
   5647      * CJK Ideograph, First code point
   5648      */
   5649     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
   5650 
   5651     /**
   5652      * CJK Ideograph, Second code point
   5653      */
   5654     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
   5655 
   5656     /**
   5657      * CJK Ideograph, Third code point
   5658      */
   5659     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
   5660 
   5661     /**
   5662      * CJK Ideograph, Fourth code point
   5663      */
   5664     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
   5665 
   5666     /**
   5667      * CJK Ideograph, FIFTH code point
   5668      */
   5669     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
   5670 
   5671     /**
   5672      * CJK Ideograph, Sixth code point
   5673      */
   5674     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
   5675 
   5676     /**
   5677      * CJK Ideograph, Seventh code point
   5678      */
   5679     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
   5680 
   5681     /**
   5682      * CJK Ideograph, Eighth code point
   5683      */
   5684     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
   5685 
   5686     /**
   5687      * CJK Ideograph, Nineth code point
   5688      */
   5689     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
   5690 
   5691     /**
   5692      * Application Program command code point
   5693      */
   5694     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
   5695 
   5696     /**
   5697      * Unit separator code point
   5698      */
   5699     private static final int UNIT_SEPARATOR_ = 0x001F;
   5700 
   5701     /**
   5702      * Delete code point
   5703      */
   5704     private static final int DELETE_ = 0x007F;
   5705 
   5706     /**
   5707      * Han digit characters
   5708      */
   5709     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
   5710     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
   5711     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
   5712     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
   5713     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
   5714     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
   5715     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
   5716     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
   5717     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
   5718     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
   5719     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
   5720     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
   5721     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
   5722     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
   5723     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
   5724     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
   5725     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
   5726     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
   5727 
   5728     // private constructor -----------------------------------------------
   5729     ///CLOVER:OFF
   5730     /**
   5731      * Private constructor to prevent instantiation
   5732      */
   5733     private UCharacter()
   5734     {
   5735     }
   5736     ///CLOVER:ON
   5737 }
   5738