Home | History | Annotate | Download | only in lang
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
      4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      5  *
      6  * This code is free software; you can redistribute it and/or modify it
      7  * under the terms of the GNU General Public License version 2 only, as
      8  * published by the Free Software Foundation.  Oracle designates this
      9  * particular file as subject to the "Classpath" exception as provided
     10  * by Oracle in the LICENSE file that accompanied this code.
     11  *
     12  * This code is distributed in the hope that it will be useful, but WITHOUT
     13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  * version 2 for more details (a copy is included in the LICENSE file that
     16  * accompanied this code).
     17  *
     18  * You should have received a copy of the GNU General Public License version
     19  * 2 along with this work; if not, write to the Free Software Foundation,
     20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     21  *
     22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     23  * or visit www.oracle.com if you need additional information or have any
     24  * questions.
     25  */
     26 
     27 package java.lang;
     28 
     29 import java.util.Arrays;
     30 import java.util.HashMap;
     31 import java.util.Locale;
     32 import java.util.Map;
     33 
     34 /**
     35  * The {@code Character} class wraps a value of the primitive
     36  * type {@code char} in an object. An object of type
     37  * {@code Character} contains a single field whose type is
     38  * {@code char}.
     39  * <p>
     40  * In addition, this class provides several methods for determining
     41  * a character's category (lowercase letter, digit, etc.) and for converting
     42  * characters from uppercase to lowercase and vice versa.
     43  * <p>
     44  * Character information is based on the Unicode Standard, version 6.0.0.
     45  * <p>
     46  * The methods and data of class {@code Character} are defined by
     47  * the information in the <i>UnicodeData</i> file that is part of the
     48  * Unicode Character Database maintained by the Unicode
     49  * Consortium. This file specifies various properties including name
     50  * and general category for every defined Unicode code point or
     51  * character range.
     52  * <p>
     53  * The file and its description are available from the Unicode Consortium at:
     54  * <ul>
     55  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
     56  * </ul>
     57  *
     58  * <h4><a name="unicode">Unicode Character Representations</a></h4>
     59  *
     60  * <p>The {@code char} data type (and therefore the value that a
     61  * {@code Character} object encapsulates) are based on the
     62  * original Unicode specification, which defined characters as
     63  * fixed-width 16-bit entities. The Unicode Standard has since been
     64  * changed to allow for characters whose representation requires more
     65  * than 16 bits.  The range of legal <em>code point</em>s is now
     66  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
     67  * (Refer to the <a
     68  * href="http://www.unicode.org/reports/tr27/#notation"><i>
     69  * definition</i></a> of the U+<i>n</i> notation in the Unicode
     70  * Standard.)
     71  *
     72  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
     73  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
     74  * <a name="supplementary">Characters</a> whose code points are greater
     75  * than U+FFFF are called <em>supplementary character</em>s.  The Java
     76  * platform uses the UTF-16 representation in {@code char} arrays and
     77  * in the {@code String} and {@code StringBuffer} classes. In
     78  * this representation, supplementary characters are represented as a pair
     79  * of {@code char} values, the first from the <em>high-surrogates</em>
     80  * range, (&#92;uD800-&#92;uDBFF), the second from the
     81  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
     82  *
     83  * <p>A {@code char} value, therefore, represents Basic
     84  * Multilingual Plane (BMP) code points, including the surrogate
     85  * code points, or code units of the UTF-16 encoding. An
     86  * {@code int} value represents all Unicode code points,
     87  * including supplementary code points. The lower (least significant)
     88  * 21 bits of {@code int} are used to represent Unicode code
     89  * points and the upper (most significant) 11 bits must be zero.
     90  * Unless otherwise specified, the behavior with respect to
     91  * supplementary characters and surrogate {@code char} values is
     92  * as follows:
     93  *
     94  * <ul>
     95  * <li>The methods that only accept a {@code char} value cannot support
     96  * supplementary characters. They treat {@code char} values from the
     97  * surrogate ranges as undefined characters. For example,
     98  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
     99  * this specific value if followed by any low-surrogate value in a string
    100  * would represent a letter.
    101  *
    102  * <li>The methods that accept an {@code int} value support all
    103  * Unicode characters, including supplementary characters. For
    104  * example, {@code Character.isLetter(0x2F81A)} returns
    105  * {@code true} because the code point value represents a letter
    106  * (a CJK ideograph).
    107  * </ul>
    108  *
    109  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
    110  * used for character values in the range between U+0000 and U+10FFFF,
    111  * and <em>Unicode code unit</em> is used for 16-bit
    112  * {@code char} values that are code units of the <em>UTF-16</em>
    113  * encoding. For more information on Unicode terminology, refer to the
    114  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
    115  *
    116  * @author  Lee Boynton
    117  * @author  Guy Steele
    118  * @author  Akira Tanaka
    119  * @author  Martin Buchholz
    120  * @author  Ulf Zibis
    121  * @since   1.0
    122  */
    123 public final
    124 class Character implements java.io.Serializable, Comparable<Character> {
    125     /**
    126      * The minimum radix available for conversion to and from strings.
    127      * The constant value of this field is the smallest value permitted
    128      * for the radix argument in radix-conversion methods such as the
    129      * {@code digit} method, the {@code forDigit} method, and the
    130      * {@code toString} method of class {@code Integer}.
    131      *
    132      * @see     Character#digit(char, int)
    133      * @see     Character#forDigit(int, int)
    134      * @see     Integer#toString(int, int)
    135      * @see     Integer#valueOf(String)
    136      */
    137     public static final int MIN_RADIX = 2;
    138 
    139     /**
    140      * The maximum radix available for conversion to and from strings.
    141      * The constant value of this field is the largest value permitted
    142      * for the radix argument in radix-conversion methods such as the
    143      * {@code digit} method, the {@code forDigit} method, and the
    144      * {@code toString} method of class {@code Integer}.
    145      *
    146      * @see     Character#digit(char, int)
    147      * @see     Character#forDigit(int, int)
    148      * @see     Integer#toString(int, int)
    149      * @see     Integer#valueOf(String)
    150      */
    151     public static final int MAX_RADIX = 36;
    152 
    153     /**
    154      * The constant value of this field is the smallest value of type
    155      * {@code char}, {@code '\u005Cu0000'}.
    156      *
    157      * @since   1.0.2
    158      */
    159     public static final char MIN_VALUE = '\u0000';
    160 
    161     /**
    162      * The constant value of this field is the largest value of type
    163      * {@code char}, {@code '\u005CuFFFF'}.
    164      *
    165      * @since   1.0.2
    166      */
    167     public static final char MAX_VALUE = '\uFFFF';
    168 
    169     /**
    170      * The {@code Class} instance representing the primitive type
    171      * {@code char}.
    172      *
    173      * @since   1.1
    174      */
    175     @SuppressWarnings("unchecked")
    176     /* ----- BEGIN android -----
    177     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
    178     */
    179     public static final Class<Character> TYPE = (Class<Character>) char[].class.getComponentType();
    180     // ----- END android -----
    181 
    182     /*
    183      * Normative general types
    184      */
    185 
    186     /*
    187      * General character types
    188      */
    189 
    190     /**
    191      * General category "Cn" in the Unicode specification.
    192      * @since   1.1
    193      */
    194     public static final byte UNASSIGNED = 0;
    195 
    196     /**
    197      * General category "Lu" in the Unicode specification.
    198      * @since   1.1
    199      */
    200     public static final byte UPPERCASE_LETTER = 1;
    201 
    202     /**
    203      * General category "Ll" in the Unicode specification.
    204      * @since   1.1
    205      */
    206     public static final byte LOWERCASE_LETTER = 2;
    207 
    208     /**
    209      * General category "Lt" in the Unicode specification.
    210      * @since   1.1
    211      */
    212     public static final byte TITLECASE_LETTER = 3;
    213 
    214     /**
    215      * General category "Lm" in the Unicode specification.
    216      * @since   1.1
    217      */
    218     public static final byte MODIFIER_LETTER = 4;
    219 
    220     /**
    221      * General category "Lo" in the Unicode specification.
    222      * @since   1.1
    223      */
    224     public static final byte OTHER_LETTER = 5;
    225 
    226     /**
    227      * General category "Mn" in the Unicode specification.
    228      * @since   1.1
    229      */
    230     public static final byte NON_SPACING_MARK = 6;
    231 
    232     /**
    233      * General category "Me" in the Unicode specification.
    234      * @since   1.1
    235      */
    236     public static final byte ENCLOSING_MARK = 7;
    237 
    238     /**
    239      * General category "Mc" in the Unicode specification.
    240      * @since   1.1
    241      */
    242     public static final byte COMBINING_SPACING_MARK = 8;
    243 
    244     /**
    245      * General category "Nd" in the Unicode specification.
    246      * @since   1.1
    247      */
    248     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
    249 
    250     /**
    251      * General category "Nl" in the Unicode specification.
    252      * @since   1.1
    253      */
    254     public static final byte LETTER_NUMBER = 10;
    255 
    256     /**
    257      * General category "No" in the Unicode specification.
    258      * @since   1.1
    259      */
    260     public static final byte OTHER_NUMBER = 11;
    261 
    262     /**
    263      * General category "Zs" in the Unicode specification.
    264      * @since   1.1
    265      */
    266     public static final byte SPACE_SEPARATOR = 12;
    267 
    268     /**
    269      * General category "Zl" in the Unicode specification.
    270      * @since   1.1
    271      */
    272     public static final byte LINE_SEPARATOR = 13;
    273 
    274     /**
    275      * General category "Zp" in the Unicode specification.
    276      * @since   1.1
    277      */
    278     public static final byte PARAGRAPH_SEPARATOR = 14;
    279 
    280     /**
    281      * General category "Cc" in the Unicode specification.
    282      * @since   1.1
    283      */
    284     public static final byte CONTROL = 15;
    285 
    286     /**
    287      * General category "Cf" in the Unicode specification.
    288      * @since   1.1
    289      */
    290     public static final byte FORMAT = 16;
    291 
    292     /**
    293      * General category "Co" in the Unicode specification.
    294      * @since   1.1
    295      */
    296     public static final byte PRIVATE_USE = 18;
    297 
    298     /**
    299      * General category "Cs" in the Unicode specification.
    300      * @since   1.1
    301      */
    302     public static final byte SURROGATE = 19;
    303 
    304     /**
    305      * General category "Pd" in the Unicode specification.
    306      * @since   1.1
    307      */
    308     public static final byte DASH_PUNCTUATION = 20;
    309 
    310     /**
    311      * General category "Ps" in the Unicode specification.
    312      * @since   1.1
    313      */
    314     public static final byte START_PUNCTUATION = 21;
    315 
    316     /**
    317      * General category "Pe" in the Unicode specification.
    318      * @since   1.1
    319      */
    320     public static final byte END_PUNCTUATION = 22;
    321 
    322     /**
    323      * General category "Pc" in the Unicode specification.
    324      * @since   1.1
    325      */
    326     public static final byte CONNECTOR_PUNCTUATION = 23;
    327 
    328     /**
    329      * General category "Po" in the Unicode specification.
    330      * @since   1.1
    331      */
    332     public static final byte OTHER_PUNCTUATION = 24;
    333 
    334     /**
    335      * General category "Sm" in the Unicode specification.
    336      * @since   1.1
    337      */
    338     public static final byte MATH_SYMBOL = 25;
    339 
    340     /**
    341      * General category "Sc" in the Unicode specification.
    342      * @since   1.1
    343      */
    344     public static final byte CURRENCY_SYMBOL = 26;
    345 
    346     /**
    347      * General category "Sk" in the Unicode specification.
    348      * @since   1.1
    349      */
    350     public static final byte MODIFIER_SYMBOL = 27;
    351 
    352     /**
    353      * General category "So" in the Unicode specification.
    354      * @since   1.1
    355      */
    356     public static final byte OTHER_SYMBOL = 28;
    357 
    358     /**
    359      * General category "Pi" in the Unicode specification.
    360      * @since   1.4
    361      */
    362     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
    363 
    364     /**
    365      * General category "Pf" in the Unicode specification.
    366      * @since   1.4
    367      */
    368     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
    369 
    370     /**
    371      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
    372      */
    373     static final int ERROR = 0xFFFFFFFF;
    374 
    375 
    376     /**
    377      * Undefined bidirectional character type. Undefined {@code char}
    378      * values have undefined directionality in the Unicode specification.
    379      * @since 1.4
    380      */
    381     public static final byte DIRECTIONALITY_UNDEFINED = -1;
    382 
    383     /**
    384      * Strong bidirectional character type "L" in the Unicode specification.
    385      * @since 1.4
    386      */
    387     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
    388 
    389     /**
    390      * Strong bidirectional character type "R" in the Unicode specification.
    391      * @since 1.4
    392      */
    393     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
    394 
    395     /**
    396     * Strong bidirectional character type "AL" in the Unicode specification.
    397      * @since 1.4
    398      */
    399     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
    400 
    401     /**
    402      * Weak bidirectional character type "EN" in the Unicode specification.
    403      * @since 1.4
    404      */
    405     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
    406 
    407     /**
    408      * Weak bidirectional character type "ES" in the Unicode specification.
    409      * @since 1.4
    410      */
    411     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
    412 
    413     /**
    414      * Weak bidirectional character type "ET" in the Unicode specification.
    415      * @since 1.4
    416      */
    417     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
    418 
    419     /**
    420      * Weak bidirectional character type "AN" in the Unicode specification.
    421      * @since 1.4
    422      */
    423     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
    424 
    425     /**
    426      * Weak bidirectional character type "CS" in the Unicode specification.
    427      * @since 1.4
    428      */
    429     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
    430 
    431     /**
    432      * Weak bidirectional character type "NSM" in the Unicode specification.
    433      * @since 1.4
    434      */
    435     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
    436 
    437     /**
    438      * Weak bidirectional character type "BN" in the Unicode specification.
    439      * @since 1.4
    440      */
    441     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
    442 
    443     /**
    444      * Neutral bidirectional character type "B" in the Unicode specification.
    445      * @since 1.4
    446      */
    447     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
    448 
    449     /**
    450      * Neutral bidirectional character type "S" in the Unicode specification.
    451      * @since 1.4
    452      */
    453     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
    454 
    455     /**
    456      * Neutral bidirectional character type "WS" in the Unicode specification.
    457      * @since 1.4
    458      */
    459     public static final byte DIRECTIONALITY_WHITESPACE = 12;
    460 
    461     /**
    462      * Neutral bidirectional character type "ON" in the Unicode specification.
    463      * @since 1.4
    464      */
    465     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
    466 
    467     /**
    468      * Strong bidirectional character type "LRE" in the Unicode specification.
    469      * @since 1.4
    470      */
    471     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
    472 
    473     /**
    474      * Strong bidirectional character type "LRO" in the Unicode specification.
    475      * @since 1.4
    476      */
    477     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
    478 
    479     /**
    480      * Strong bidirectional character type "RLE" in the Unicode specification.
    481      * @since 1.4
    482      */
    483     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
    484 
    485     /**
    486      * Strong bidirectional character type "RLO" in the Unicode specification.
    487      * @since 1.4
    488      */
    489     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
    490 
    491     /**
    492      * Weak bidirectional character type "PDF" in the Unicode specification.
    493      * @since 1.4
    494      */
    495     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
    496 
    497     /**
    498      * The minimum value of a
    499      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
    500      * Unicode high-surrogate code unit</a>
    501      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
    502      * A high-surrogate is also known as a <i>leading-surrogate</i>.
    503      *
    504      * @since 1.5
    505      */
    506     public static final char MIN_HIGH_SURROGATE = '\uD800';
    507 
    508     /**
    509      * The maximum value of a
    510      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
    511      * Unicode high-surrogate code unit</a>
    512      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
    513      * A high-surrogate is also known as a <i>leading-surrogate</i>.
    514      *
    515      * @since 1.5
    516      */
    517     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
    518 
    519     /**
    520      * The minimum value of a
    521      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
    522      * Unicode low-surrogate code unit</a>
    523      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
    524      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
    525      *
    526      * @since 1.5
    527      */
    528     public static final char MIN_LOW_SURROGATE  = '\uDC00';
    529 
    530     /**
    531      * The maximum value of a
    532      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
    533      * Unicode low-surrogate code unit</a>
    534      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
    535      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
    536      *
    537      * @since 1.5
    538      */
    539     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
    540 
    541     /**
    542      * The minimum value of a Unicode surrogate code unit in the
    543      * UTF-16 encoding, constant {@code '\u005CuD800'}.
    544      *
    545      * @since 1.5
    546      */
    547     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
    548 
    549     /**
    550      * The maximum value of a Unicode surrogate code unit in the
    551      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
    552      *
    553      * @since 1.5
    554      */
    555     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
    556 
    557     /**
    558      * The minimum value of a
    559      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
    560      * Unicode supplementary code point</a>, constant {@code U+10000}.
    561      *
    562      * @since 1.5
    563      */
    564     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
    565 
    566     /**
    567      * The minimum value of a
    568      * <a href="http://www.unicode.org/glossary/#code_point">
    569      * Unicode code point</a>, constant {@code U+0000}.
    570      *
    571      * @since 1.5
    572      */
    573     public static final int MIN_CODE_POINT = 0x000000;
    574 
    575     /**
    576      * The maximum value of a
    577      * <a href="http://www.unicode.org/glossary/#code_point">
    578      * Unicode code point</a>, constant {@code U+10FFFF}.
    579      *
    580      * @since 1.5
    581      */
    582     public static final int MAX_CODE_POINT = 0X10FFFF;
    583 
    584     private static final byte[] DIRECTIONALITY = new byte[] {
    585             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
    586             DIRECTIONALITY_EUROPEAN_NUMBER,
    587             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
    588             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
    589             DIRECTIONALITY_ARABIC_NUMBER,
    590             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
    591             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
    592             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
    593             DIRECTIONALITY_OTHER_NEUTRALS,
    594             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
    595             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
    596             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
    597             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
    598             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
    599             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
    600             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
    601 
    602     /**
    603      * Instances of this class represent particular subsets of the Unicode
    604      * character set.  The only family of subsets defined in the
    605      * {@code Character} class is {@link Character.UnicodeBlock}.
    606      * Other portions of the Java API may define other subsets for their
    607      * own purposes.
    608      *
    609      * @since 1.2
    610      */
    611     public static class Subset  {
    612 
    613         private String name;
    614 
    615         /**
    616          * Constructs a new {@code Subset} instance.
    617          *
    618          * @param  name  The name of this subset
    619          * @exception NullPointerException if name is {@code null}
    620          */
    621         protected Subset(String name) {
    622             if (name == null) {
    623                 throw new NullPointerException("name");
    624             }
    625             this.name = name;
    626         }
    627 
    628         /**
    629          * Compares two {@code Subset} objects for equality.
    630          * This method returns {@code true} if and only if
    631          * {@code this} and the argument refer to the same
    632          * object; since this method is {@code final}, this
    633          * guarantee holds for all subclasses.
    634          */
    635         public final boolean equals(Object obj) {
    636             return (this == obj);
    637         }
    638 
    639         /**
    640          * Returns the standard hash code as defined by the
    641          * {@link Object#hashCode} method.  This method
    642          * is {@code final} in order to ensure that the
    643          * {@code equals} and {@code hashCode} methods will
    644          * be consistent in all subclasses.
    645          */
    646         public final int hashCode() {
    647             return super.hashCode();
    648         }
    649 
    650         /**
    651          * Returns the name of this subset.
    652          */
    653         public final String toString() {
    654             return name;
    655         }
    656     }
    657 
    658     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
    659     // for the latest specification of Unicode Blocks.
    660 
    661     /**
    662      * A family of character subsets representing the character blocks in the
    663      * Unicode specification. Character blocks generally define characters
    664      * used for a specific script or purpose. A character is contained by
    665      * at most one Unicode block.
    666      *
    667      * @since 1.2
    668      */
    669     public static final class UnicodeBlock extends Subset {
    670 
    671         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
    672 
    673         /**
    674          * Creates a UnicodeBlock with the given identifier name.
    675          * This name must be the same as the block identifier.
    676          */
    677         private UnicodeBlock(String idName) {
    678             this(idName, true);
    679         }
    680 
    681         private UnicodeBlock(String idName, boolean isMap) {
    682             super(idName);
    683             if (isMap) {
    684                 map.put(idName, this);
    685             }
    686         }
    687 
    688         /**
    689          * Creates a UnicodeBlock with the given identifier name and
    690          * alias name.
    691          */
    692         private UnicodeBlock(String idName, String alias) {
    693             this(idName, true);
    694             map.put(alias, this);
    695         }
    696 
    697         /**
    698          * Creates a UnicodeBlock with the given identifier name and
    699          * alias names.
    700          */
    701         private UnicodeBlock(String idName, String... aliases) {
    702             this(idName, true);
    703             for (String alias : aliases)
    704                 map.put(alias, this);
    705         }
    706 
    707         /**
    708          * Constant for the "Basic Latin" Unicode character block.
    709          * @since 1.2
    710          */
    711         public static final UnicodeBlock  BASIC_LATIN =
    712             new UnicodeBlock("BASIC_LATIN",
    713                              "BASIC LATIN",
    714                              "BASICLATIN");
    715 
    716         /**
    717          * Constant for the "Latin-1 Supplement" Unicode character block.
    718          * @since 1.2
    719          */
    720         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
    721             new UnicodeBlock("LATIN_1_SUPPLEMENT",
    722                              "LATIN-1 SUPPLEMENT",
    723                              "LATIN-1SUPPLEMENT");
    724 
    725         /**
    726          * Constant for the "Latin Extended-A" Unicode character block.
    727          * @since 1.2
    728          */
    729         public static final UnicodeBlock LATIN_EXTENDED_A =
    730             new UnicodeBlock("LATIN_EXTENDED_A",
    731                              "LATIN EXTENDED-A",
    732                              "LATINEXTENDED-A");
    733 
    734         /**
    735          * Constant for the "Latin Extended-B" Unicode character block.
    736          * @since 1.2
    737          */
    738         public static final UnicodeBlock LATIN_EXTENDED_B =
    739             new UnicodeBlock("LATIN_EXTENDED_B",
    740                              "LATIN EXTENDED-B",
    741                              "LATINEXTENDED-B");
    742 
    743         /**
    744          * Constant for the "IPA Extensions" Unicode character block.
    745          * @since 1.2
    746          */
    747         public static final UnicodeBlock IPA_EXTENSIONS =
    748             new UnicodeBlock("IPA_EXTENSIONS",
    749                              "IPA EXTENSIONS",
    750                              "IPAEXTENSIONS");
    751 
    752         /**
    753          * Constant for the "Spacing Modifier Letters" Unicode character block.
    754          * @since 1.2
    755          */
    756         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
    757             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
    758                              "SPACING MODIFIER LETTERS",
    759                              "SPACINGMODIFIERLETTERS");
    760 
    761         /**
    762          * Constant for the "Combining Diacritical Marks" Unicode character block.
    763          * @since 1.2
    764          */
    765         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
    766             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
    767                              "COMBINING DIACRITICAL MARKS",
    768                              "COMBININGDIACRITICALMARKS");
    769 
    770         /**
    771          * Constant for the "Greek and Coptic" Unicode character block.
    772          * <p>
    773          * This block was previously known as the "Greek" block.
    774          *
    775          * @since 1.2
    776          */
    777         public static final UnicodeBlock GREEK =
    778             new UnicodeBlock("GREEK",
    779                              "GREEK AND COPTIC",
    780                              "GREEKANDCOPTIC");
    781 
    782         /**
    783          * Constant for the "Cyrillic" Unicode character block.
    784          * @since 1.2
    785          */
    786         public static final UnicodeBlock CYRILLIC =
    787             new UnicodeBlock("CYRILLIC");
    788 
    789         /**
    790          * Constant for the "Armenian" Unicode character block.
    791          * @since 1.2
    792          */
    793         public static final UnicodeBlock ARMENIAN =
    794             new UnicodeBlock("ARMENIAN");
    795 
    796         /**
    797          * Constant for the "Hebrew" Unicode character block.
    798          * @since 1.2
    799          */
    800         public static final UnicodeBlock HEBREW =
    801             new UnicodeBlock("HEBREW");
    802 
    803         /**
    804          * Constant for the "Arabic" Unicode character block.
    805          * @since 1.2
    806          */
    807         public static final UnicodeBlock ARABIC =
    808             new UnicodeBlock("ARABIC");
    809 
    810         /**
    811          * Constant for the "Devanagari" Unicode character block.
    812          * @since 1.2
    813          */
    814         public static final UnicodeBlock DEVANAGARI =
    815             new UnicodeBlock("DEVANAGARI");
    816 
    817         /**
    818          * Constant for the "Bengali" Unicode character block.
    819          * @since 1.2
    820          */
    821         public static final UnicodeBlock BENGALI =
    822             new UnicodeBlock("BENGALI");
    823 
    824         /**
    825          * Constant for the "Gurmukhi" Unicode character block.
    826          * @since 1.2
    827          */
    828         public static final UnicodeBlock GURMUKHI =
    829             new UnicodeBlock("GURMUKHI");
    830 
    831         /**
    832          * Constant for the "Gujarati" Unicode character block.
    833          * @since 1.2
    834          */
    835         public static final UnicodeBlock GUJARATI =
    836             new UnicodeBlock("GUJARATI");
    837 
    838         /**
    839          * Constant for the "Oriya" Unicode character block.
    840          * @since 1.2
    841          */
    842         public static final UnicodeBlock ORIYA =
    843             new UnicodeBlock("ORIYA");
    844 
    845         /**
    846          * Constant for the "Tamil" Unicode character block.
    847          * @since 1.2
    848          */
    849         public static final UnicodeBlock TAMIL =
    850             new UnicodeBlock("TAMIL");
    851 
    852         /**
    853          * Constant for the "Telugu" Unicode character block.
    854          * @since 1.2
    855          */
    856         public static final UnicodeBlock TELUGU =
    857             new UnicodeBlock("TELUGU");
    858 
    859         /**
    860          * Constant for the "Kannada" Unicode character block.
    861          * @since 1.2
    862          */
    863         public static final UnicodeBlock KANNADA =
    864             new UnicodeBlock("KANNADA");
    865 
    866         /**
    867          * Constant for the "Malayalam" Unicode character block.
    868          * @since 1.2
    869          */
    870         public static final UnicodeBlock MALAYALAM =
    871             new UnicodeBlock("MALAYALAM");
    872 
    873         /**
    874          * Constant for the "Thai" Unicode character block.
    875          * @since 1.2
    876          */
    877         public static final UnicodeBlock THAI =
    878             new UnicodeBlock("THAI");
    879 
    880         /**
    881          * Constant for the "Lao" Unicode character block.
    882          * @since 1.2
    883          */
    884         public static final UnicodeBlock LAO =
    885             new UnicodeBlock("LAO");
    886 
    887         /**
    888          * Constant for the "Tibetan" Unicode character block.
    889          * @since 1.2
    890          */
    891         public static final UnicodeBlock TIBETAN =
    892             new UnicodeBlock("TIBETAN");
    893 
    894         /**
    895          * Constant for the "Georgian" Unicode character block.
    896          * @since 1.2
    897          */
    898         public static final UnicodeBlock GEORGIAN =
    899             new UnicodeBlock("GEORGIAN");
    900 
    901         /**
    902          * Constant for the "Hangul Jamo" Unicode character block.
    903          * @since 1.2
    904          */
    905         public static final UnicodeBlock HANGUL_JAMO =
    906             new UnicodeBlock("HANGUL_JAMO",
    907                              "HANGUL JAMO",
    908                              "HANGULJAMO");
    909 
    910         /**
    911          * Constant for the "Latin Extended Additional" Unicode character block.
    912          * @since 1.2
    913          */
    914         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
    915             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
    916                              "LATIN EXTENDED ADDITIONAL",
    917                              "LATINEXTENDEDADDITIONAL");
    918 
    919         /**
    920          * Constant for the "Greek Extended" Unicode character block.
    921          * @since 1.2
    922          */
    923         public static final UnicodeBlock GREEK_EXTENDED =
    924             new UnicodeBlock("GREEK_EXTENDED",
    925                              "GREEK EXTENDED",
    926                              "GREEKEXTENDED");
    927 
    928         /**
    929          * Constant for the "General Punctuation" Unicode character block.
    930          * @since 1.2
    931          */
    932         public static final UnicodeBlock GENERAL_PUNCTUATION =
    933             new UnicodeBlock("GENERAL_PUNCTUATION",
    934                              "GENERAL PUNCTUATION",
    935                              "GENERALPUNCTUATION");
    936 
    937         /**
    938          * Constant for the "Superscripts and Subscripts" Unicode character
    939          * block.
    940          * @since 1.2
    941          */
    942         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
    943             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
    944                              "SUPERSCRIPTS AND SUBSCRIPTS",
    945                              "SUPERSCRIPTSANDSUBSCRIPTS");
    946 
    947         /**
    948          * Constant for the "Currency Symbols" Unicode character block.
    949          * @since 1.2
    950          */
    951         public static final UnicodeBlock CURRENCY_SYMBOLS =
    952             new UnicodeBlock("CURRENCY_SYMBOLS",
    953                              "CURRENCY SYMBOLS",
    954                              "CURRENCYSYMBOLS");
    955 
    956         /**
    957          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
    958          * character block.
    959          * <p>
    960          * This block was previously known as "Combining Marks for Symbols".
    961          * @since 1.2
    962          */
    963         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
    964             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
    965                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
    966                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
    967                              "COMBINING MARKS FOR SYMBOLS",
    968                              "COMBININGMARKSFORSYMBOLS");
    969 
    970         /**
    971          * Constant for the "Letterlike Symbols" Unicode character block.
    972          * @since 1.2
    973          */
    974         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
    975             new UnicodeBlock("LETTERLIKE_SYMBOLS",
    976                              "LETTERLIKE SYMBOLS",
    977                              "LETTERLIKESYMBOLS");
    978 
    979         /**
    980          * Constant for the "Number Forms" Unicode character block.
    981          * @since 1.2
    982          */
    983         public static final UnicodeBlock NUMBER_FORMS =
    984             new UnicodeBlock("NUMBER_FORMS",
    985                              "NUMBER FORMS",
    986                              "NUMBERFORMS");
    987 
    988         /**
    989          * Constant for the "Arrows" Unicode character block.
    990          * @since 1.2
    991          */
    992         public static final UnicodeBlock ARROWS =
    993             new UnicodeBlock("ARROWS");
    994 
    995         /**
    996          * Constant for the "Mathematical Operators" Unicode character block.
    997          * @since 1.2
    998          */
    999         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
   1000             new UnicodeBlock("MATHEMATICAL_OPERATORS",
   1001                              "MATHEMATICAL OPERATORS",
   1002                              "MATHEMATICALOPERATORS");
   1003 
   1004         /**
   1005          * Constant for the "Miscellaneous Technical" Unicode character block.
   1006          * @since 1.2
   1007          */
   1008         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
   1009             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
   1010                              "MISCELLANEOUS TECHNICAL",
   1011                              "MISCELLANEOUSTECHNICAL");
   1012 
   1013         /**
   1014          * Constant for the "Control Pictures" Unicode character block.
   1015          * @since 1.2
   1016          */
   1017         public static final UnicodeBlock CONTROL_PICTURES =
   1018             new UnicodeBlock("CONTROL_PICTURES",
   1019                              "CONTROL PICTURES",
   1020                              "CONTROLPICTURES");
   1021 
   1022         /**
   1023          * Constant for the "Optical Character Recognition" Unicode character block.
   1024          * @since 1.2
   1025          */
   1026         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
   1027             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
   1028                              "OPTICAL CHARACTER RECOGNITION",
   1029                              "OPTICALCHARACTERRECOGNITION");
   1030 
   1031         /**
   1032          * Constant for the "Enclosed Alphanumerics" Unicode character block.
   1033          * @since 1.2
   1034          */
   1035         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
   1036             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
   1037                              "ENCLOSED ALPHANUMERICS",
   1038                              "ENCLOSEDALPHANUMERICS");
   1039 
   1040         /**
   1041          * Constant for the "Box Drawing" Unicode character block.
   1042          * @since 1.2
   1043          */
   1044         public static final UnicodeBlock BOX_DRAWING =
   1045             new UnicodeBlock("BOX_DRAWING",
   1046                              "BOX DRAWING",
   1047                              "BOXDRAWING");
   1048 
   1049         /**
   1050          * Constant for the "Block Elements" Unicode character block.
   1051          * @since 1.2
   1052          */
   1053         public static final UnicodeBlock BLOCK_ELEMENTS =
   1054             new UnicodeBlock("BLOCK_ELEMENTS",
   1055                              "BLOCK ELEMENTS",
   1056                              "BLOCKELEMENTS");
   1057 
   1058         /**
   1059          * Constant for the "Geometric Shapes" Unicode character block.
   1060          * @since 1.2
   1061          */
   1062         public static final UnicodeBlock GEOMETRIC_SHAPES =
   1063             new UnicodeBlock("GEOMETRIC_SHAPES",
   1064                              "GEOMETRIC SHAPES",
   1065                              "GEOMETRICSHAPES");
   1066 
   1067         /**
   1068          * Constant for the "Miscellaneous Symbols" Unicode character block.
   1069          * @since 1.2
   1070          */
   1071         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
   1072             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
   1073                              "MISCELLANEOUS SYMBOLS",
   1074                              "MISCELLANEOUSSYMBOLS");
   1075 
   1076         /**
   1077          * Constant for the "Dingbats" Unicode character block.
   1078          * @since 1.2
   1079          */
   1080         public static final UnicodeBlock DINGBATS =
   1081             new UnicodeBlock("DINGBATS");
   1082 
   1083         /**
   1084          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
   1085          * @since 1.2
   1086          */
   1087         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
   1088             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
   1089                              "CJK SYMBOLS AND PUNCTUATION",
   1090                              "CJKSYMBOLSANDPUNCTUATION");
   1091 
   1092         /**
   1093          * Constant for the "Hiragana" Unicode character block.
   1094          * @since 1.2
   1095          */
   1096         public static final UnicodeBlock HIRAGANA =
   1097             new UnicodeBlock("HIRAGANA");
   1098 
   1099         /**
   1100          * Constant for the "Katakana" Unicode character block.
   1101          * @since 1.2
   1102          */
   1103         public static final UnicodeBlock KATAKANA =
   1104             new UnicodeBlock("KATAKANA");
   1105 
   1106         /**
   1107          * Constant for the "Bopomofo" Unicode character block.
   1108          * @since 1.2
   1109          */
   1110         public static final UnicodeBlock BOPOMOFO =
   1111             new UnicodeBlock("BOPOMOFO");
   1112 
   1113         /**
   1114          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
   1115          * @since 1.2
   1116          */
   1117         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
   1118             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
   1119                              "HANGUL COMPATIBILITY JAMO",
   1120                              "HANGULCOMPATIBILITYJAMO");
   1121 
   1122         /**
   1123          * Constant for the "Kanbun" Unicode character block.
   1124          * @since 1.2
   1125          */
   1126         public static final UnicodeBlock KANBUN =
   1127             new UnicodeBlock("KANBUN");
   1128 
   1129         /**
   1130          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
   1131          * @since 1.2
   1132          */
   1133         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
   1134             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
   1135                              "ENCLOSED CJK LETTERS AND MONTHS",
   1136                              "ENCLOSEDCJKLETTERSANDMONTHS");
   1137 
   1138         /**
   1139          * Constant for the "CJK Compatibility" Unicode character block.
   1140          * @since 1.2
   1141          */
   1142         public static final UnicodeBlock CJK_COMPATIBILITY =
   1143             new UnicodeBlock("CJK_COMPATIBILITY",
   1144                              "CJK COMPATIBILITY",
   1145                              "CJKCOMPATIBILITY");
   1146 
   1147         /**
   1148          * Constant for the "CJK Unified Ideographs" Unicode character block.
   1149          * @since 1.2
   1150          */
   1151         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
   1152             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
   1153                              "CJK UNIFIED IDEOGRAPHS",
   1154                              "CJKUNIFIEDIDEOGRAPHS");
   1155 
   1156         /**
   1157          * Constant for the "Hangul Syllables" Unicode character block.
   1158          * @since 1.2
   1159          */
   1160         public static final UnicodeBlock HANGUL_SYLLABLES =
   1161             new UnicodeBlock("HANGUL_SYLLABLES",
   1162                              "HANGUL SYLLABLES",
   1163                              "HANGULSYLLABLES");
   1164 
   1165         /**
   1166          * Constant for the "Private Use Area" Unicode character block.
   1167          * @since 1.2
   1168          */
   1169         public static final UnicodeBlock PRIVATE_USE_AREA =
   1170             new UnicodeBlock("PRIVATE_USE_AREA",
   1171                              "PRIVATE USE AREA",
   1172                              "PRIVATEUSEAREA");
   1173 
   1174         /**
   1175          * Constant for the "CJK Compatibility Ideographs" Unicode character
   1176          * block.
   1177          * @since 1.2
   1178          */
   1179         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
   1180             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
   1181                              "CJK COMPATIBILITY IDEOGRAPHS",
   1182                              "CJKCOMPATIBILITYIDEOGRAPHS");
   1183 
   1184         /**
   1185          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
   1186          * @since 1.2
   1187          */
   1188         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
   1189             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
   1190                              "ALPHABETIC PRESENTATION FORMS",
   1191                              "ALPHABETICPRESENTATIONFORMS");
   1192 
   1193         /**
   1194          * Constant for the "Arabic Presentation Forms-A" Unicode character
   1195          * block.
   1196          * @since 1.2
   1197          */
   1198         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
   1199             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
   1200                              "ARABIC PRESENTATION FORMS-A",
   1201                              "ARABICPRESENTATIONFORMS-A");
   1202 
   1203         /**
   1204          * Constant for the "Combining Half Marks" Unicode character block.
   1205          * @since 1.2
   1206          */
   1207         public static final UnicodeBlock COMBINING_HALF_MARKS =
   1208             new UnicodeBlock("COMBINING_HALF_MARKS",
   1209                              "COMBINING HALF MARKS",
   1210                              "COMBININGHALFMARKS");
   1211 
   1212         /**
   1213          * Constant for the "CJK Compatibility Forms" Unicode character block.
   1214          * @since 1.2
   1215          */
   1216         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
   1217             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
   1218                              "CJK COMPATIBILITY FORMS",
   1219                              "CJKCOMPATIBILITYFORMS");
   1220 
   1221         /**
   1222          * Constant for the "Small Form Variants" Unicode character block.
   1223          * @since 1.2
   1224          */
   1225         public static final UnicodeBlock SMALL_FORM_VARIANTS =
   1226             new UnicodeBlock("SMALL_FORM_VARIANTS",
   1227                              "SMALL FORM VARIANTS",
   1228                              "SMALLFORMVARIANTS");
   1229 
   1230         /**
   1231          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
   1232          * @since 1.2
   1233          */
   1234         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
   1235             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
   1236                              "ARABIC PRESENTATION FORMS-B",
   1237                              "ARABICPRESENTATIONFORMS-B");
   1238 
   1239         /**
   1240          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
   1241          * block.
   1242          * @since 1.2
   1243          */
   1244         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
   1245             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
   1246                              "HALFWIDTH AND FULLWIDTH FORMS",
   1247                              "HALFWIDTHANDFULLWIDTHFORMS");
   1248 
   1249         /**
   1250          * Constant for the "Specials" Unicode character block.
   1251          * @since 1.2
   1252          */
   1253         public static final UnicodeBlock SPECIALS =
   1254             new UnicodeBlock("SPECIALS");
   1255 
   1256         /**
   1257          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
   1258          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
   1259          *             {@link #LOW_SURROGATES}. These new constants match
   1260          *             the block definitions of the Unicode Standard.
   1261          *             The {@link #of(char)} and {@link #of(int)} methods
   1262          *             return the new constants, not SURROGATES_AREA.
   1263          */
   1264         @Deprecated
   1265         public static final UnicodeBlock SURROGATES_AREA =
   1266             new UnicodeBlock("SURROGATES_AREA", false);
   1267 
   1268         /**
   1269          * Constant for the "Syriac" Unicode character block.
   1270          * @since 1.4
   1271          */
   1272         public static final UnicodeBlock SYRIAC =
   1273             new UnicodeBlock("SYRIAC");
   1274 
   1275         /**
   1276          * Constant for the "Thaana" Unicode character block.
   1277          * @since 1.4
   1278          */
   1279         public static final UnicodeBlock THAANA =
   1280             new UnicodeBlock("THAANA");
   1281 
   1282         /**
   1283          * Constant for the "Sinhala" Unicode character block.
   1284          * @since 1.4
   1285          */
   1286         public static final UnicodeBlock SINHALA =
   1287             new UnicodeBlock("SINHALA");
   1288 
   1289         /**
   1290          * Constant for the "Myanmar" Unicode character block.
   1291          * @since 1.4
   1292          */
   1293         public static final UnicodeBlock MYANMAR =
   1294             new UnicodeBlock("MYANMAR");
   1295 
   1296         /**
   1297          * Constant for the "Ethiopic" Unicode character block.
   1298          * @since 1.4
   1299          */
   1300         public static final UnicodeBlock ETHIOPIC =
   1301             new UnicodeBlock("ETHIOPIC");
   1302 
   1303         /**
   1304          * Constant for the "Cherokee" Unicode character block.
   1305          * @since 1.4
   1306          */
   1307         public static final UnicodeBlock CHEROKEE =
   1308             new UnicodeBlock("CHEROKEE");
   1309 
   1310         /**
   1311          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
   1312          * @since 1.4
   1313          */
   1314         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
   1315             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
   1316                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
   1317                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
   1318 
   1319         /**
   1320          * Constant for the "Ogham" Unicode character block.
   1321          * @since 1.4
   1322          */
   1323         public static final UnicodeBlock OGHAM =
   1324             new UnicodeBlock("OGHAM");
   1325 
   1326         /**
   1327          * Constant for the "Runic" Unicode character block.
   1328          * @since 1.4
   1329          */
   1330         public static final UnicodeBlock RUNIC =
   1331             new UnicodeBlock("RUNIC");
   1332 
   1333         /**
   1334          * Constant for the "Khmer" Unicode character block.
   1335          * @since 1.4
   1336          */
   1337         public static final UnicodeBlock KHMER =
   1338             new UnicodeBlock("KHMER");
   1339 
   1340         /**
   1341          * Constant for the "Mongolian" Unicode character block.
   1342          * @since 1.4
   1343          */
   1344         public static final UnicodeBlock MONGOLIAN =
   1345             new UnicodeBlock("MONGOLIAN");
   1346 
   1347         /**
   1348          * Constant for the "Braille Patterns" Unicode character block.
   1349          * @since 1.4
   1350          */
   1351         public static final UnicodeBlock BRAILLE_PATTERNS =
   1352             new UnicodeBlock("BRAILLE_PATTERNS",
   1353                              "BRAILLE PATTERNS",
   1354                              "BRAILLEPATTERNS");
   1355 
   1356         /**
   1357          * Constant for the "CJK Radicals Supplement" Unicode character block.
   1358          * @since 1.4
   1359          */
   1360         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
   1361             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
   1362                              "CJK RADICALS SUPPLEMENT",
   1363                              "CJKRADICALSSUPPLEMENT");
   1364 
   1365         /**
   1366          * Constant for the "Kangxi Radicals" Unicode character block.
   1367          * @since 1.4
   1368          */
   1369         public static final UnicodeBlock KANGXI_RADICALS =
   1370             new UnicodeBlock("KANGXI_RADICALS",
   1371                              "KANGXI RADICALS",
   1372                              "KANGXIRADICALS");
   1373 
   1374         /**
   1375          * Constant for the "Ideographic Description Characters" Unicode character block.
   1376          * @since 1.4
   1377          */
   1378         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
   1379             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
   1380                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
   1381                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
   1382 
   1383         /**
   1384          * Constant for the "Bopomofo Extended" Unicode character block.
   1385          * @since 1.4
   1386          */
   1387         public static final UnicodeBlock BOPOMOFO_EXTENDED =
   1388             new UnicodeBlock("BOPOMOFO_EXTENDED",
   1389                              "BOPOMOFO EXTENDED",
   1390                              "BOPOMOFOEXTENDED");
   1391 
   1392         /**
   1393          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
   1394          * @since 1.4
   1395          */
   1396         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
   1397             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
   1398                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
   1399                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
   1400 
   1401         /**
   1402          * Constant for the "Yi Syllables" Unicode character block.
   1403          * @since 1.4
   1404          */
   1405         public static final UnicodeBlock YI_SYLLABLES =
   1406             new UnicodeBlock("YI_SYLLABLES",
   1407                              "YI SYLLABLES",
   1408                              "YISYLLABLES");
   1409 
   1410         /**
   1411          * Constant for the "Yi Radicals" Unicode character block.
   1412          * @since 1.4
   1413          */
   1414         public static final UnicodeBlock YI_RADICALS =
   1415             new UnicodeBlock("YI_RADICALS",
   1416                              "YI RADICALS",
   1417                              "YIRADICALS");
   1418 
   1419         /**
   1420          * Constant for the "Cyrillic Supplementary" Unicode character block.
   1421          * @since 1.5
   1422          */
   1423         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
   1424             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
   1425                              "CYRILLIC SUPPLEMENTARY",
   1426                              "CYRILLICSUPPLEMENTARY",
   1427                              "CYRILLIC SUPPLEMENT",
   1428                              "CYRILLICSUPPLEMENT");
   1429 
   1430         /**
   1431          * Constant for the "Tagalog" Unicode character block.
   1432          * @since 1.5
   1433          */
   1434         public static final UnicodeBlock TAGALOG =
   1435             new UnicodeBlock("TAGALOG");
   1436 
   1437         /**
   1438          * Constant for the "Hanunoo" Unicode character block.
   1439          * @since 1.5
   1440          */
   1441         public static final UnicodeBlock HANUNOO =
   1442             new UnicodeBlock("HANUNOO");
   1443 
   1444         /**
   1445          * Constant for the "Buhid" Unicode character block.
   1446          * @since 1.5
   1447          */
   1448         public static final UnicodeBlock BUHID =
   1449             new UnicodeBlock("BUHID");
   1450 
   1451         /**
   1452          * Constant for the "Tagbanwa" Unicode character block.
   1453          * @since 1.5
   1454          */
   1455         public static final UnicodeBlock TAGBANWA =
   1456             new UnicodeBlock("TAGBANWA");
   1457 
   1458         /**
   1459          * Constant for the "Limbu" Unicode character block.
   1460          * @since 1.5
   1461          */
   1462         public static final UnicodeBlock LIMBU =
   1463             new UnicodeBlock("LIMBU");
   1464 
   1465         /**
   1466          * Constant for the "Tai Le" Unicode character block.
   1467          * @since 1.5
   1468          */
   1469         public static final UnicodeBlock TAI_LE =
   1470             new UnicodeBlock("TAI_LE",
   1471                              "TAI LE",
   1472                              "TAILE");
   1473 
   1474         /**
   1475          * Constant for the "Khmer Symbols" Unicode character block.
   1476          * @since 1.5
   1477          */
   1478         public static final UnicodeBlock KHMER_SYMBOLS =
   1479             new UnicodeBlock("KHMER_SYMBOLS",
   1480                              "KHMER SYMBOLS",
   1481                              "KHMERSYMBOLS");
   1482 
   1483         /**
   1484          * Constant for the "Phonetic Extensions" Unicode character block.
   1485          * @since 1.5
   1486          */
   1487         public static final UnicodeBlock PHONETIC_EXTENSIONS =
   1488             new UnicodeBlock("PHONETIC_EXTENSIONS",
   1489                              "PHONETIC EXTENSIONS",
   1490                              "PHONETICEXTENSIONS");
   1491 
   1492         /**
   1493          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
   1494          * @since 1.5
   1495          */
   1496         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
   1497             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
   1498                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
   1499                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
   1500 
   1501         /**
   1502          * Constant for the "Supplemental Arrows-A" Unicode character block.
   1503          * @since 1.5
   1504          */
   1505         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
   1506             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
   1507                              "SUPPLEMENTAL ARROWS-A",
   1508                              "SUPPLEMENTALARROWS-A");
   1509 
   1510         /**
   1511          * Constant for the "Supplemental Arrows-B" Unicode character block.
   1512          * @since 1.5
   1513          */
   1514         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
   1515             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
   1516                              "SUPPLEMENTAL ARROWS-B",
   1517                              "SUPPLEMENTALARROWS-B");
   1518 
   1519         /**
   1520          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
   1521          * character block.
   1522          * @since 1.5
   1523          */
   1524         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
   1525             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
   1526                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
   1527                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
   1528 
   1529         /**
   1530          * Constant for the "Supplemental Mathematical Operators" Unicode
   1531          * character block.
   1532          * @since 1.5
   1533          */
   1534         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
   1535             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
   1536                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
   1537                              "SUPPLEMENTALMATHEMATICALOPERATORS");
   1538 
   1539         /**
   1540          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
   1541          * block.
   1542          * @since 1.5
   1543          */
   1544         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
   1545             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
   1546                              "MISCELLANEOUS SYMBOLS AND ARROWS",
   1547                              "MISCELLANEOUSSYMBOLSANDARROWS");
   1548 
   1549         /**
   1550          * Constant for the "Katakana Phonetic Extensions" Unicode character
   1551          * block.
   1552          * @since 1.5
   1553          */
   1554         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
   1555             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
   1556                              "KATAKANA PHONETIC EXTENSIONS",
   1557                              "KATAKANAPHONETICEXTENSIONS");
   1558 
   1559         /**
   1560          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
   1561          * @since 1.5
   1562          */
   1563         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
   1564             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
   1565                              "YIJING HEXAGRAM SYMBOLS",
   1566                              "YIJINGHEXAGRAMSYMBOLS");
   1567 
   1568         /**
   1569          * Constant for the "Variation Selectors" Unicode character block.
   1570          * @since 1.5
   1571          */
   1572         public static final UnicodeBlock VARIATION_SELECTORS =
   1573             new UnicodeBlock("VARIATION_SELECTORS",
   1574                              "VARIATION SELECTORS",
   1575                              "VARIATIONSELECTORS");
   1576 
   1577         /**
   1578          * Constant for the "Linear B Syllabary" Unicode character block.
   1579          * @since 1.5
   1580          */
   1581         public static final UnicodeBlock LINEAR_B_SYLLABARY =
   1582             new UnicodeBlock("LINEAR_B_SYLLABARY",
   1583                              "LINEAR B SYLLABARY",
   1584                              "LINEARBSYLLABARY");
   1585 
   1586         /**
   1587          * Constant for the "Linear B Ideograms" Unicode character block.
   1588          * @since 1.5
   1589          */
   1590         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
   1591             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
   1592                              "LINEAR B IDEOGRAMS",
   1593                              "LINEARBIDEOGRAMS");
   1594 
   1595         /**
   1596          * Constant for the "Aegean Numbers" Unicode character block.
   1597          * @since 1.5
   1598          */
   1599         public static final UnicodeBlock AEGEAN_NUMBERS =
   1600             new UnicodeBlock("AEGEAN_NUMBERS",
   1601                              "AEGEAN NUMBERS",
   1602                              "AEGEANNUMBERS");
   1603 
   1604         /**
   1605          * Constant for the "Old Italic" Unicode character block.
   1606          * @since 1.5
   1607          */
   1608         public static final UnicodeBlock OLD_ITALIC =
   1609             new UnicodeBlock("OLD_ITALIC",
   1610                              "OLD ITALIC",
   1611                              "OLDITALIC");
   1612 
   1613         /**
   1614          * Constant for the "Gothic" Unicode character block.
   1615          * @since 1.5
   1616          */
   1617         public static final UnicodeBlock GOTHIC =
   1618             new UnicodeBlock("GOTHIC");
   1619 
   1620         /**
   1621          * Constant for the "Ugaritic" Unicode character block.
   1622          * @since 1.5
   1623          */
   1624         public static final UnicodeBlock UGARITIC =
   1625             new UnicodeBlock("UGARITIC");
   1626 
   1627         /**
   1628          * Constant for the "Deseret" Unicode character block.
   1629          * @since 1.5
   1630          */
   1631         public static final UnicodeBlock DESERET =
   1632             new UnicodeBlock("DESERET");
   1633 
   1634         /**
   1635          * Constant for the "Shavian" Unicode character block.
   1636          * @since 1.5
   1637          */
   1638         public static final UnicodeBlock SHAVIAN =
   1639             new UnicodeBlock("SHAVIAN");
   1640 
   1641         /**
   1642          * Constant for the "Osmanya" Unicode character block.
   1643          * @since 1.5
   1644          */
   1645         public static final UnicodeBlock OSMANYA =
   1646             new UnicodeBlock("OSMANYA");
   1647 
   1648         /**
   1649          * Constant for the "Cypriot Syllabary" Unicode character block.
   1650          * @since 1.5
   1651          */
   1652         public static final UnicodeBlock CYPRIOT_SYLLABARY =
   1653             new UnicodeBlock("CYPRIOT_SYLLABARY",
   1654                              "CYPRIOT SYLLABARY",
   1655                              "CYPRIOTSYLLABARY");
   1656 
   1657         /**
   1658          * Constant for the "Byzantine Musical Symbols" Unicode character block.
   1659          * @since 1.5
   1660          */
   1661         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
   1662             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
   1663                              "BYZANTINE MUSICAL SYMBOLS",
   1664                              "BYZANTINEMUSICALSYMBOLS");
   1665 
   1666         /**
   1667          * Constant for the "Musical Symbols" Unicode character block.
   1668          * @since 1.5
   1669          */
   1670         public static final UnicodeBlock MUSICAL_SYMBOLS =
   1671             new UnicodeBlock("MUSICAL_SYMBOLS",
   1672                              "MUSICAL SYMBOLS",
   1673                              "MUSICALSYMBOLS");
   1674 
   1675         /**
   1676          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
   1677          * @since 1.5
   1678          */
   1679         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
   1680             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
   1681                              "TAI XUAN JING SYMBOLS",
   1682                              "TAIXUANJINGSYMBOLS");
   1683 
   1684         /**
   1685          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
   1686          * character block.
   1687          * @since 1.5
   1688          */
   1689         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
   1690             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
   1691                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
   1692                              "MATHEMATICALALPHANUMERICSYMBOLS");
   1693 
   1694         /**
   1695          * Constant for the "CJK Unified Ideographs Extension B" Unicode
   1696          * character block.
   1697          * @since 1.5
   1698          */
   1699         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
   1700             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
   1701                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
   1702                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
   1703 
   1704         /**
   1705          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
   1706          * @since 1.5
   1707          */
   1708         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
   1709             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
   1710                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
   1711                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
   1712 
   1713         /**
   1714          * Constant for the "Tags" Unicode character block.
   1715          * @since 1.5
   1716          */
   1717         public static final UnicodeBlock TAGS =
   1718             new UnicodeBlock("TAGS");
   1719 
   1720         /**
   1721          * Constant for the "Variation Selectors Supplement" Unicode character
   1722          * block.
   1723          * @since 1.5
   1724          */
   1725         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
   1726             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
   1727                              "VARIATION SELECTORS SUPPLEMENT",
   1728                              "VARIATIONSELECTORSSUPPLEMENT");
   1729 
   1730         /**
   1731          * Constant for the "Supplementary Private Use Area-A" Unicode character
   1732          * block.
   1733          * @since 1.5
   1734          */
   1735         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
   1736             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
   1737                              "SUPPLEMENTARY PRIVATE USE AREA-A",
   1738                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
   1739 
   1740         /**
   1741          * Constant for the "Supplementary Private Use Area-B" Unicode character
   1742          * block.
   1743          * @since 1.5
   1744          */
   1745         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
   1746             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
   1747                              "SUPPLEMENTARY PRIVATE USE AREA-B",
   1748                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
   1749 
   1750         /**
   1751          * Constant for the "High Surrogates" Unicode character block.
   1752          * This block represents codepoint values in the high surrogate
   1753          * range: U+D800 through U+DB7F
   1754          *
   1755          * @since 1.5
   1756          */
   1757         public static final UnicodeBlock HIGH_SURROGATES =
   1758             new UnicodeBlock("HIGH_SURROGATES",
   1759                              "HIGH SURROGATES",
   1760                              "HIGHSURROGATES");
   1761 
   1762         /**
   1763          * Constant for the "High Private Use Surrogates" Unicode character
   1764          * block.
   1765          * This block represents codepoint values in the private use high
   1766          * surrogate range: U+DB80 through U+DBFF
   1767          *
   1768          * @since 1.5
   1769          */
   1770         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
   1771             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
   1772                              "HIGH PRIVATE USE SURROGATES",
   1773                              "HIGHPRIVATEUSESURROGATES");
   1774 
   1775         /**
   1776          * Constant for the "Low Surrogates" Unicode character block.
   1777          * This block represents codepoint values in the low surrogate
   1778          * range: U+DC00 through U+DFFF
   1779          *
   1780          * @since 1.5
   1781          */
   1782         public static final UnicodeBlock LOW_SURROGATES =
   1783             new UnicodeBlock("LOW_SURROGATES",
   1784                              "LOW SURROGATES",
   1785                              "LOWSURROGATES");
   1786 
   1787         /**
   1788          * Constant for the "Arabic Supplement" Unicode character block.
   1789          * @since 1.7
   1790          */
   1791         public static final UnicodeBlock ARABIC_SUPPLEMENT =
   1792             new UnicodeBlock("ARABIC_SUPPLEMENT",
   1793                              "ARABIC SUPPLEMENT",
   1794                              "ARABICSUPPLEMENT");
   1795 
   1796         /**
   1797          * Constant for the "NKo" Unicode character block.
   1798          * @since 1.7
   1799          */
   1800         public static final UnicodeBlock NKO =
   1801             new UnicodeBlock("NKO");
   1802 
   1803         /**
   1804          * Constant for the "Samaritan" Unicode character block.
   1805          * @since 1.7
   1806          */
   1807         public static final UnicodeBlock SAMARITAN =
   1808             new UnicodeBlock("SAMARITAN");
   1809 
   1810         /**
   1811          * Constant for the "Mandaic" Unicode character block.
   1812          * @since 1.7
   1813          */
   1814         public static final UnicodeBlock MANDAIC =
   1815             new UnicodeBlock("MANDAIC");
   1816 
   1817         /**
   1818          * Constant for the "Ethiopic Supplement" Unicode character block.
   1819          * @since 1.7
   1820          */
   1821         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
   1822             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
   1823                              "ETHIOPIC SUPPLEMENT",
   1824                              "ETHIOPICSUPPLEMENT");
   1825 
   1826         /**
   1827          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
   1828          * Unicode character block.
   1829          * @since 1.7
   1830          */
   1831         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
   1832             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
   1833                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
   1834                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
   1835 
   1836         /**
   1837          * Constant for the "New Tai Lue" Unicode character block.
   1838          * @since 1.7
   1839          */
   1840         public static final UnicodeBlock NEW_TAI_LUE =
   1841             new UnicodeBlock("NEW_TAI_LUE",
   1842                              "NEW TAI LUE",
   1843                              "NEWTAILUE");
   1844 
   1845         /**
   1846          * Constant for the "Buginese" Unicode character block.
   1847          * @since 1.7
   1848          */
   1849         public static final UnicodeBlock BUGINESE =
   1850             new UnicodeBlock("BUGINESE");
   1851 
   1852         /**
   1853          * Constant for the "Tai Tham" Unicode character block.
   1854          * @since 1.7
   1855          */
   1856         public static final UnicodeBlock TAI_THAM =
   1857             new UnicodeBlock("TAI_THAM",
   1858                              "TAI THAM",
   1859                              "TAITHAM");
   1860 
   1861         /**
   1862          * Constant for the "Balinese" Unicode character block.
   1863          * @since 1.7
   1864          */
   1865         public static final UnicodeBlock BALINESE =
   1866             new UnicodeBlock("BALINESE");
   1867 
   1868         /**
   1869          * Constant for the "Sundanese" Unicode character block.
   1870          * @since 1.7
   1871          */
   1872         public static final UnicodeBlock SUNDANESE =
   1873             new UnicodeBlock("SUNDANESE");
   1874 
   1875         /**
   1876          * Constant for the "Batak" Unicode character block.
   1877          * @since 1.7
   1878          */
   1879         public static final UnicodeBlock BATAK =
   1880             new UnicodeBlock("BATAK");
   1881 
   1882         /**
   1883          * Constant for the "Lepcha" Unicode character block.
   1884          * @since 1.7
   1885          */
   1886         public static final UnicodeBlock LEPCHA =
   1887             new UnicodeBlock("LEPCHA");
   1888 
   1889         /**
   1890          * Constant for the "Ol Chiki" Unicode character block.
   1891          * @since 1.7
   1892          */
   1893         public static final UnicodeBlock OL_CHIKI =
   1894             new UnicodeBlock("OL_CHIKI",
   1895                              "OL CHIKI",
   1896                              "OLCHIKI");
   1897 
   1898         /**
   1899          * Constant for the "Vedic Extensions" Unicode character block.
   1900          * @since 1.7
   1901          */
   1902         public static final UnicodeBlock VEDIC_EXTENSIONS =
   1903             new UnicodeBlock("VEDIC_EXTENSIONS",
   1904                              "VEDIC EXTENSIONS",
   1905                              "VEDICEXTENSIONS");
   1906 
   1907         /**
   1908          * Constant for the "Phonetic Extensions Supplement" Unicode character
   1909          * block.
   1910          * @since 1.7
   1911          */
   1912         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
   1913             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
   1914                              "PHONETIC EXTENSIONS SUPPLEMENT",
   1915                              "PHONETICEXTENSIONSSUPPLEMENT");
   1916 
   1917         /**
   1918          * Constant for the "Combining Diacritical Marks Supplement" Unicode
   1919          * character block.
   1920          * @since 1.7
   1921          */
   1922         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
   1923             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
   1924                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
   1925                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
   1926 
   1927         /**
   1928          * Constant for the "Glagolitic" Unicode character block.
   1929          * @since 1.7
   1930          */
   1931         public static final UnicodeBlock GLAGOLITIC =
   1932             new UnicodeBlock("GLAGOLITIC");
   1933 
   1934         /**
   1935          * Constant for the "Latin Extended-C" Unicode character block.
   1936          * @since 1.7
   1937          */
   1938         public static final UnicodeBlock LATIN_EXTENDED_C =
   1939             new UnicodeBlock("LATIN_EXTENDED_C",
   1940                              "LATIN EXTENDED-C",
   1941                              "LATINEXTENDED-C");
   1942 
   1943         /**
   1944          * Constant for the "Coptic" Unicode character block.
   1945          * @since 1.7
   1946          */
   1947         public static final UnicodeBlock COPTIC =
   1948             new UnicodeBlock("COPTIC");
   1949 
   1950         /**
   1951          * Constant for the "Georgian Supplement" Unicode character block.
   1952          * @since 1.7
   1953          */
   1954         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
   1955             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
   1956                              "GEORGIAN SUPPLEMENT",
   1957                              "GEORGIANSUPPLEMENT");
   1958 
   1959         /**
   1960          * Constant for the "Tifinagh" Unicode character block.
   1961          * @since 1.7
   1962          */
   1963         public static final UnicodeBlock TIFINAGH =
   1964             new UnicodeBlock("TIFINAGH");
   1965 
   1966         /**
   1967          * Constant for the "Ethiopic Extended" Unicode character block.
   1968          * @since 1.7
   1969          */
   1970         public static final UnicodeBlock ETHIOPIC_EXTENDED =
   1971             new UnicodeBlock("ETHIOPIC_EXTENDED",
   1972                              "ETHIOPIC EXTENDED",
   1973                              "ETHIOPICEXTENDED");
   1974 
   1975         /**
   1976          * Constant for the "Cyrillic Extended-A" Unicode character block.
   1977          * @since 1.7
   1978          */
   1979         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
   1980             new UnicodeBlock("CYRILLIC_EXTENDED_A",
   1981                              "CYRILLIC EXTENDED-A",
   1982                              "CYRILLICEXTENDED-A");
   1983 
   1984         /**
   1985          * Constant for the "Supplemental Punctuation" Unicode character block.
   1986          * @since 1.7
   1987          */
   1988         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
   1989             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
   1990                              "SUPPLEMENTAL PUNCTUATION",
   1991                              "SUPPLEMENTALPUNCTUATION");
   1992 
   1993         /**
   1994          * Constant for the "CJK Strokes" Unicode character block.
   1995          * @since 1.7
   1996          */
   1997         public static final UnicodeBlock CJK_STROKES =
   1998             new UnicodeBlock("CJK_STROKES",
   1999                              "CJK STROKES",
   2000                              "CJKSTROKES");
   2001 
   2002         /**
   2003          * Constant for the "Lisu" Unicode character block.
   2004          * @since 1.7
   2005          */
   2006         public static final UnicodeBlock LISU =
   2007             new UnicodeBlock("LISU");
   2008 
   2009         /**
   2010          * Constant for the "Vai" Unicode character block.
   2011          * @since 1.7
   2012          */
   2013         public static final UnicodeBlock VAI =
   2014             new UnicodeBlock("VAI");
   2015 
   2016         /**
   2017          * Constant for the "Cyrillic Extended-B" Unicode character block.
   2018          * @since 1.7
   2019          */
   2020         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
   2021             new UnicodeBlock("CYRILLIC_EXTENDED_B",
   2022                              "CYRILLIC EXTENDED-B",
   2023                              "CYRILLICEXTENDED-B");
   2024 
   2025         /**
   2026          * Constant for the "Bamum" Unicode character block.
   2027          * @since 1.7
   2028          */
   2029         public static final UnicodeBlock BAMUM =
   2030             new UnicodeBlock("BAMUM");
   2031 
   2032         /**
   2033          * Constant for the "Modifier Tone Letters" Unicode character block.
   2034          * @since 1.7
   2035          */
   2036         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
   2037             new UnicodeBlock("MODIFIER_TONE_LETTERS",
   2038                              "MODIFIER TONE LETTERS",
   2039                              "MODIFIERTONELETTERS");
   2040 
   2041         /**
   2042          * Constant for the "Latin Extended-D" Unicode character block.
   2043          * @since 1.7
   2044          */
   2045         public static final UnicodeBlock LATIN_EXTENDED_D =
   2046             new UnicodeBlock("LATIN_EXTENDED_D",
   2047                              "LATIN EXTENDED-D",
   2048                              "LATINEXTENDED-D");
   2049 
   2050         /**
   2051          * Constant for the "Syloti Nagri" Unicode character block.
   2052          * @since 1.7
   2053          */
   2054         public static final UnicodeBlock SYLOTI_NAGRI =
   2055             new UnicodeBlock("SYLOTI_NAGRI",
   2056                              "SYLOTI NAGRI",
   2057                              "SYLOTINAGRI");
   2058 
   2059         /**
   2060          * Constant for the "Common Indic Number Forms" Unicode character block.
   2061          * @since 1.7
   2062          */
   2063         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
   2064             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
   2065                              "COMMON INDIC NUMBER FORMS",
   2066                              "COMMONINDICNUMBERFORMS");
   2067 
   2068         /**
   2069          * Constant for the "Phags-pa" Unicode character block.
   2070          * @since 1.7
   2071          */
   2072         public static final UnicodeBlock PHAGS_PA =
   2073             new UnicodeBlock("PHAGS_PA",
   2074                              "PHAGS-PA");
   2075 
   2076         /**
   2077          * Constant for the "Saurashtra" Unicode character block.
   2078          * @since 1.7
   2079          */
   2080         public static final UnicodeBlock SAURASHTRA =
   2081             new UnicodeBlock("SAURASHTRA");
   2082 
   2083         /**
   2084          * Constant for the "Devanagari Extended" Unicode character block.
   2085          * @since 1.7
   2086          */
   2087         public static final UnicodeBlock DEVANAGARI_EXTENDED =
   2088             new UnicodeBlock("DEVANAGARI_EXTENDED",
   2089                              "DEVANAGARI EXTENDED",
   2090                              "DEVANAGARIEXTENDED");
   2091 
   2092         /**
   2093          * Constant for the "Kayah Li" Unicode character block.
   2094          * @since 1.7
   2095          */
   2096         public static final UnicodeBlock KAYAH_LI =
   2097             new UnicodeBlock("KAYAH_LI",
   2098                              "KAYAH LI",
   2099                              "KAYAHLI");
   2100 
   2101         /**
   2102          * Constant for the "Rejang" Unicode character block.
   2103          * @since 1.7
   2104          */
   2105         public static final UnicodeBlock REJANG =
   2106             new UnicodeBlock("REJANG");
   2107 
   2108         /**
   2109          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
   2110          * @since 1.7
   2111          */
   2112         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
   2113             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
   2114                              "HANGUL JAMO EXTENDED-A",
   2115                              "HANGULJAMOEXTENDED-A");
   2116 
   2117         /**
   2118          * Constant for the "Javanese" Unicode character block.
   2119          * @since 1.7
   2120          */
   2121         public static final UnicodeBlock JAVANESE =
   2122             new UnicodeBlock("JAVANESE");
   2123 
   2124         /**
   2125          * Constant for the "Cham" Unicode character block.
   2126          * @since 1.7
   2127          */
   2128         public static final UnicodeBlock CHAM =
   2129             new UnicodeBlock("CHAM");
   2130 
   2131         /**
   2132          * Constant for the "Myanmar Extended-A" Unicode character block.
   2133          * @since 1.7
   2134          */
   2135         public static final UnicodeBlock MYANMAR_EXTENDED_A =
   2136             new UnicodeBlock("MYANMAR_EXTENDED_A",
   2137                              "MYANMAR EXTENDED-A",
   2138                              "MYANMAREXTENDED-A");
   2139 
   2140         /**
   2141          * Constant for the "Tai Viet" Unicode character block.
   2142          * @since 1.7
   2143          */
   2144         public static final UnicodeBlock TAI_VIET =
   2145             new UnicodeBlock("TAI_VIET",
   2146                              "TAI VIET",
   2147                              "TAIVIET");
   2148 
   2149         /**
   2150          * Constant for the "Ethiopic Extended-A" Unicode character block.
   2151          * @since 1.7
   2152          */
   2153         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
   2154             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
   2155                              "ETHIOPIC EXTENDED-A",
   2156                              "ETHIOPICEXTENDED-A");
   2157 
   2158         /**
   2159          * Constant for the "Meetei Mayek" Unicode character block.
   2160          * @since 1.7
   2161          */
   2162         public static final UnicodeBlock MEETEI_MAYEK =
   2163             new UnicodeBlock("MEETEI_MAYEK",
   2164                              "MEETEI MAYEK",
   2165                              "MEETEIMAYEK");
   2166 
   2167         /**
   2168          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
   2169          * @since 1.7
   2170          */
   2171         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
   2172             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
   2173                              "HANGUL JAMO EXTENDED-B",
   2174                              "HANGULJAMOEXTENDED-B");
   2175 
   2176         /**
   2177          * Constant for the "Vertical Forms" Unicode character block.
   2178          * @since 1.7
   2179          */
   2180         public static final UnicodeBlock VERTICAL_FORMS =
   2181             new UnicodeBlock("VERTICAL_FORMS",
   2182                              "VERTICAL FORMS",
   2183                              "VERTICALFORMS");
   2184 
   2185         /**
   2186          * Constant for the "Ancient Greek Numbers" Unicode character block.
   2187          * @since 1.7
   2188          */
   2189         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
   2190             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
   2191                              "ANCIENT GREEK NUMBERS",
   2192                              "ANCIENTGREEKNUMBERS");
   2193 
   2194         /**
   2195          * Constant for the "Ancient Symbols" Unicode character block.
   2196          * @since 1.7
   2197          */
   2198         public static final UnicodeBlock ANCIENT_SYMBOLS =
   2199             new UnicodeBlock("ANCIENT_SYMBOLS",
   2200                              "ANCIENT SYMBOLS",
   2201                              "ANCIENTSYMBOLS");
   2202 
   2203         /**
   2204          * Constant for the "Phaistos Disc" Unicode character block.
   2205          * @since 1.7
   2206          */
   2207         public static final UnicodeBlock PHAISTOS_DISC =
   2208             new UnicodeBlock("PHAISTOS_DISC",
   2209                              "PHAISTOS DISC",
   2210                              "PHAISTOSDISC");
   2211 
   2212         /**
   2213          * Constant for the "Lycian" Unicode character block.
   2214          * @since 1.7
   2215          */
   2216         public static final UnicodeBlock LYCIAN =
   2217             new UnicodeBlock("LYCIAN");
   2218 
   2219         /**
   2220          * Constant for the "Carian" Unicode character block.
   2221          * @since 1.7
   2222          */
   2223         public static final UnicodeBlock CARIAN =
   2224             new UnicodeBlock("CARIAN");
   2225 
   2226         /**
   2227          * Constant for the "Old Persian" Unicode character block.
   2228          * @since 1.7
   2229          */
   2230         public static final UnicodeBlock OLD_PERSIAN =
   2231             new UnicodeBlock("OLD_PERSIAN",
   2232                              "OLD PERSIAN",
   2233                              "OLDPERSIAN");
   2234 
   2235         /**
   2236          * Constant for the "Imperial Aramaic" Unicode character block.
   2237          * @since 1.7
   2238          */
   2239         public static final UnicodeBlock IMPERIAL_ARAMAIC =
   2240             new UnicodeBlock("IMPERIAL_ARAMAIC",
   2241                              "IMPERIAL ARAMAIC",
   2242                              "IMPERIALARAMAIC");
   2243 
   2244         /**
   2245          * Constant for the "Phoenician" Unicode character block.
   2246          * @since 1.7
   2247          */
   2248         public static final UnicodeBlock PHOENICIAN =
   2249             new UnicodeBlock("PHOENICIAN");
   2250 
   2251         /**
   2252          * Constant for the "Lydian" Unicode character block.
   2253          * @since 1.7
   2254          */
   2255         public static final UnicodeBlock LYDIAN =
   2256             new UnicodeBlock("LYDIAN");
   2257 
   2258         /**
   2259          * Constant for the "Kharoshthi" Unicode character block.
   2260          * @since 1.7
   2261          */
   2262         public static final UnicodeBlock KHAROSHTHI =
   2263             new UnicodeBlock("KHAROSHTHI");
   2264 
   2265         /**
   2266          * Constant for the "Old South Arabian" Unicode character block.
   2267          * @since 1.7
   2268          */
   2269         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
   2270             new UnicodeBlock("OLD_SOUTH_ARABIAN",
   2271                              "OLD SOUTH ARABIAN",
   2272                              "OLDSOUTHARABIAN");
   2273 
   2274         /**
   2275          * Constant for the "Avestan" Unicode character block.
   2276          * @since 1.7
   2277          */
   2278         public static final UnicodeBlock AVESTAN =
   2279             new UnicodeBlock("AVESTAN");
   2280 
   2281         /**
   2282          * Constant for the "Inscriptional Parthian" Unicode character block.
   2283          * @since 1.7
   2284          */
   2285         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
   2286             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
   2287                              "INSCRIPTIONAL PARTHIAN",
   2288                              "INSCRIPTIONALPARTHIAN");
   2289 
   2290         /**
   2291          * Constant for the "Inscriptional Pahlavi" Unicode character block.
   2292          * @since 1.7
   2293          */
   2294         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
   2295             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
   2296                              "INSCRIPTIONAL PAHLAVI",
   2297                              "INSCRIPTIONALPAHLAVI");
   2298 
   2299         /**
   2300          * Constant for the "Old Turkic" Unicode character block.
   2301          * @since 1.7
   2302          */
   2303         public static final UnicodeBlock OLD_TURKIC =
   2304             new UnicodeBlock("OLD_TURKIC",
   2305                              "OLD TURKIC",
   2306                              "OLDTURKIC");
   2307 
   2308         /**
   2309          * Constant for the "Rumi Numeral Symbols" Unicode character block.
   2310          * @since 1.7
   2311          */
   2312         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
   2313             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
   2314                              "RUMI NUMERAL SYMBOLS",
   2315                              "RUMINUMERALSYMBOLS");
   2316 
   2317         /**
   2318          * Constant for the "Brahmi" Unicode character block.
   2319          * @since 1.7
   2320          */
   2321         public static final UnicodeBlock BRAHMI =
   2322             new UnicodeBlock("BRAHMI");
   2323 
   2324         /**
   2325          * Constant for the "Kaithi" Unicode character block.
   2326          * @since 1.7
   2327          */
   2328         public static final UnicodeBlock KAITHI =
   2329             new UnicodeBlock("KAITHI");
   2330 
   2331         /**
   2332          * Constant for the "Cuneiform" Unicode character block.
   2333          * @since 1.7
   2334          */
   2335         public static final UnicodeBlock CUNEIFORM =
   2336             new UnicodeBlock("CUNEIFORM");
   2337 
   2338         /**
   2339          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
   2340          * character block.
   2341          * @since 1.7
   2342          */
   2343         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
   2344             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
   2345                              "CUNEIFORM NUMBERS AND PUNCTUATION",
   2346                              "CUNEIFORMNUMBERSANDPUNCTUATION");
   2347 
   2348         /**
   2349          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
   2350          * @since 1.7
   2351          */
   2352         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
   2353             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
   2354                              "EGYPTIAN HIEROGLYPHS",
   2355                              "EGYPTIANHIEROGLYPHS");
   2356 
   2357         /**
   2358          * Constant for the "Bamum Supplement" Unicode character block.
   2359          * @since 1.7
   2360          */
   2361         public static final UnicodeBlock BAMUM_SUPPLEMENT =
   2362             new UnicodeBlock("BAMUM_SUPPLEMENT",
   2363                              "BAMUM SUPPLEMENT",
   2364                              "BAMUMSUPPLEMENT");
   2365 
   2366         /**
   2367          * Constant for the "Kana Supplement" Unicode character block.
   2368          * @since 1.7
   2369          */
   2370         public static final UnicodeBlock KANA_SUPPLEMENT =
   2371             new UnicodeBlock("KANA_SUPPLEMENT",
   2372                              "KANA SUPPLEMENT",
   2373                              "KANASUPPLEMENT");
   2374 
   2375         /**
   2376          * Constant for the "Ancient Greek Musical Notation" Unicode character
   2377          * block.
   2378          * @since 1.7
   2379          */
   2380         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
   2381             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
   2382                              "ANCIENT GREEK MUSICAL NOTATION",
   2383                              "ANCIENTGREEKMUSICALNOTATION");
   2384 
   2385         /**
   2386          * Constant for the "Counting Rod Numerals" Unicode character block.
   2387          * @since 1.7
   2388          */
   2389         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
   2390             new UnicodeBlock("COUNTING_ROD_NUMERALS",
   2391                              "COUNTING ROD NUMERALS",
   2392                              "COUNTINGRODNUMERALS");
   2393 
   2394         /**
   2395          * Constant for the "Mahjong Tiles" Unicode character block.
   2396          * @since 1.7
   2397          */
   2398         public static final UnicodeBlock MAHJONG_TILES =
   2399             new UnicodeBlock("MAHJONG_TILES",
   2400                              "MAHJONG TILES",
   2401                              "MAHJONGTILES");
   2402 
   2403         /**
   2404          * Constant for the "Domino Tiles" Unicode character block.
   2405          * @since 1.7
   2406          */
   2407         public static final UnicodeBlock DOMINO_TILES =
   2408             new UnicodeBlock("DOMINO_TILES",
   2409                              "DOMINO TILES",
   2410                              "DOMINOTILES");
   2411 
   2412         /**
   2413          * Constant for the "Playing Cards" Unicode character block.
   2414          * @since 1.7
   2415          */
   2416         public static final UnicodeBlock PLAYING_CARDS =
   2417             new UnicodeBlock("PLAYING_CARDS",
   2418                              "PLAYING CARDS",
   2419                              "PLAYINGCARDS");
   2420 
   2421         /**
   2422          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
   2423          * block.
   2424          * @since 1.7
   2425          */
   2426         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
   2427             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
   2428                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
   2429                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
   2430 
   2431         /**
   2432          * Constant for the "Enclosed Ideographic Supplement" Unicode character
   2433          * block.
   2434          * @since 1.7
   2435          */
   2436         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
   2437             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
   2438                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
   2439                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
   2440 
   2441         /**
   2442          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
   2443          * character block.
   2444          * @since 1.7
   2445          */
   2446         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
   2447             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
   2448                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
   2449                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
   2450 
   2451         /**
   2452          * Constant for the "Emoticons" Unicode character block.
   2453          * @since 1.7
   2454          */
   2455         public static final UnicodeBlock EMOTICONS =
   2456             new UnicodeBlock("EMOTICONS");
   2457 
   2458         /**
   2459          * Constant for the "Transport And Map Symbols" Unicode character block.
   2460          * @since 1.7
   2461          */
   2462         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
   2463             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
   2464                              "TRANSPORT AND MAP SYMBOLS",
   2465                              "TRANSPORTANDMAPSYMBOLS");
   2466 
   2467         /**
   2468          * Constant for the "Alchemical Symbols" Unicode character block.
   2469          * @since 1.7
   2470          */
   2471         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
   2472             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
   2473                              "ALCHEMICAL SYMBOLS",
   2474                              "ALCHEMICALSYMBOLS");
   2475 
   2476         /**
   2477          * Constant for the "CJK Unified Ideographs Extension C" Unicode
   2478          * character block.
   2479          * @since 1.7
   2480          */
   2481         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
   2482             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
   2483                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
   2484                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
   2485 
   2486         /**
   2487          * Constant for the "CJK Unified Ideographs Extension D" Unicode
   2488          * character block.
   2489          * @since 1.7
   2490          */
   2491         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
   2492             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
   2493                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
   2494                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
   2495 
   2496         private static final int blockStarts[] = {
   2497             0x0000,   // 0000..007F; Basic Latin
   2498             0x0080,   // 0080..00FF; Latin-1 Supplement
   2499             0x0100,   // 0100..017F; Latin Extended-A
   2500             0x0180,   // 0180..024F; Latin Extended-B
   2501             0x0250,   // 0250..02AF; IPA Extensions
   2502             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
   2503             0x0300,   // 0300..036F; Combining Diacritical Marks
   2504             0x0370,   // 0370..03FF; Greek and Coptic
   2505             0x0400,   // 0400..04FF; Cyrillic
   2506             0x0500,   // 0500..052F; Cyrillic Supplement
   2507             0x0530,   // 0530..058F; Armenian
   2508             0x0590,   // 0590..05FF; Hebrew
   2509             0x0600,   // 0600..06FF; Arabic
   2510             0x0700,   // 0700..074F; Syriac
   2511             0x0750,   // 0750..077F; Arabic Supplement
   2512             0x0780,   // 0780..07BF; Thaana
   2513             0x07C0,   // 07C0..07FF; NKo
   2514             0x0800,   // 0800..083F; Samaritan
   2515             0x0840,   // 0840..085F; Mandaic
   2516             0x0860,   //             unassigned
   2517             0x0900,   // 0900..097F; Devanagari
   2518             0x0980,   // 0980..09FF; Bengali
   2519             0x0A00,   // 0A00..0A7F; Gurmukhi
   2520             0x0A80,   // 0A80..0AFF; Gujarati
   2521             0x0B00,   // 0B00..0B7F; Oriya
   2522             0x0B80,   // 0B80..0BFF; Tamil
   2523             0x0C00,   // 0C00..0C7F; Telugu
   2524             0x0C80,   // 0C80..0CFF; Kannada
   2525             0x0D00,   // 0D00..0D7F; Malayalam
   2526             0x0D80,   // 0D80..0DFF; Sinhala
   2527             0x0E00,   // 0E00..0E7F; Thai
   2528             0x0E80,   // 0E80..0EFF; Lao
   2529             0x0F00,   // 0F00..0FFF; Tibetan
   2530             0x1000,   // 1000..109F; Myanmar
   2531             0x10A0,   // 10A0..10FF; Georgian
   2532             0x1100,   // 1100..11FF; Hangul Jamo
   2533             0x1200,   // 1200..137F; Ethiopic
   2534             0x1380,   // 1380..139F; Ethiopic Supplement
   2535             0x13A0,   // 13A0..13FF; Cherokee
   2536             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
   2537             0x1680,   // 1680..169F; Ogham
   2538             0x16A0,   // 16A0..16FF; Runic
   2539             0x1700,   // 1700..171F; Tagalog
   2540             0x1720,   // 1720..173F; Hanunoo
   2541             0x1740,   // 1740..175F; Buhid
   2542             0x1760,   // 1760..177F; Tagbanwa
   2543             0x1780,   // 1780..17FF; Khmer
   2544             0x1800,   // 1800..18AF; Mongolian
   2545             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
   2546             0x1900,   // 1900..194F; Limbu
   2547             0x1950,   // 1950..197F; Tai Le
   2548             0x1980,   // 1980..19DF; New Tai Lue
   2549             0x19E0,   // 19E0..19FF; Khmer Symbols
   2550             0x1A00,   // 1A00..1A1F; Buginese
   2551             0x1A20,   // 1A20..1AAF; Tai Tham
   2552             0x1AB0,   //             unassigned
   2553             0x1B00,   // 1B00..1B7F; Balinese
   2554             0x1B80,   // 1B80..1BBF; Sundanese
   2555             0x1BC0,   // 1BC0..1BFF; Batak
   2556             0x1C00,   // 1C00..1C4F; Lepcha
   2557             0x1C50,   // 1C50..1C7F; Ol Chiki
   2558             0x1C80,   //             unassigned
   2559             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
   2560             0x1D00,   // 1D00..1D7F; Phonetic Extensions
   2561             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
   2562             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
   2563             0x1E00,   // 1E00..1EFF; Latin Extended Additional
   2564             0x1F00,   // 1F00..1FFF; Greek Extended
   2565             0x2000,   // 2000..206F; General Punctuation
   2566             0x2070,   // 2070..209F; Superscripts and Subscripts
   2567             0x20A0,   // 20A0..20CF; Currency Symbols
   2568             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
   2569             0x2100,   // 2100..214F; Letterlike Symbols
   2570             0x2150,   // 2150..218F; Number Forms
   2571             0x2190,   // 2190..21FF; Arrows
   2572             0x2200,   // 2200..22FF; Mathematical Operators
   2573             0x2300,   // 2300..23FF; Miscellaneous Technical
   2574             0x2400,   // 2400..243F; Control Pictures
   2575             0x2440,   // 2440..245F; Optical Character Recognition
   2576             0x2460,   // 2460..24FF; Enclosed Alphanumerics
   2577             0x2500,   // 2500..257F; Box Drawing
   2578             0x2580,   // 2580..259F; Block Elements
   2579             0x25A0,   // 25A0..25FF; Geometric Shapes
   2580             0x2600,   // 2600..26FF; Miscellaneous Symbols
   2581             0x2700,   // 2700..27BF; Dingbats
   2582             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
   2583             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
   2584             0x2800,   // 2800..28FF; Braille Patterns
   2585             0x2900,   // 2900..297F; Supplemental Arrows-B
   2586             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
   2587             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
   2588             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
   2589             0x2C00,   // 2C00..2C5F; Glagolitic
   2590             0x2C60,   // 2C60..2C7F; Latin Extended-C
   2591             0x2C80,   // 2C80..2CFF; Coptic
   2592             0x2D00,   // 2D00..2D2F; Georgian Supplement
   2593             0x2D30,   // 2D30..2D7F; Tifinagh
   2594             0x2D80,   // 2D80..2DDF; Ethiopic Extended
   2595             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
   2596             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
   2597             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
   2598             0x2F00,   // 2F00..2FDF; Kangxi Radicals
   2599             0x2FE0,   //             unassigned
   2600             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
   2601             0x3000,   // 3000..303F; CJK Symbols and Punctuation
   2602             0x3040,   // 3040..309F; Hiragana
   2603             0x30A0,   // 30A0..30FF; Katakana
   2604             0x3100,   // 3100..312F; Bopomofo
   2605             0x3130,   // 3130..318F; Hangul Compatibility Jamo
   2606             0x3190,   // 3190..319F; Kanbun
   2607             0x31A0,   // 31A0..31BF; Bopomofo Extended
   2608             0x31C0,   // 31C0..31EF; CJK Strokes
   2609             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
   2610             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
   2611             0x3300,   // 3300..33FF; CJK Compatibility
   2612             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
   2613             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
   2614             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
   2615             0xA000,   // A000..A48F; Yi Syllables
   2616             0xA490,   // A490..A4CF; Yi Radicals
   2617             0xA4D0,   // A4D0..A4FF; Lisu
   2618             0xA500,   // A500..A63F; Vai
   2619             0xA640,   // A640..A69F; Cyrillic Extended-B
   2620             0xA6A0,   // A6A0..A6FF; Bamum
   2621             0xA700,   // A700..A71F; Modifier Tone Letters
   2622             0xA720,   // A720..A7FF; Latin Extended-D
   2623             0xA800,   // A800..A82F; Syloti Nagri
   2624             0xA830,   // A830..A83F; Common Indic Number Forms
   2625             0xA840,   // A840..A87F; Phags-pa
   2626             0xA880,   // A880..A8DF; Saurashtra
   2627             0xA8E0,   // A8E0..A8FF; Devanagari Extended
   2628             0xA900,   // A900..A92F; Kayah Li
   2629             0xA930,   // A930..A95F; Rejang
   2630             0xA960,   // A960..A97F; Hangul Jamo Extended-A
   2631             0xA980,   // A980..A9DF; Javanese
   2632             0xA9E0,   //             unassigned
   2633             0xAA00,   // AA00..AA5F; Cham
   2634             0xAA60,   // AA60..AA7F; Myanmar Extended-A
   2635             0xAA80,   // AA80..AADF; Tai Viet
   2636             0xAAE0,   //             unassigned
   2637             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
   2638             0xAB30,   //             unassigned
   2639             0xABC0,   // ABC0..ABFF; Meetei Mayek
   2640             0xAC00,   // AC00..D7AF; Hangul Syllables
   2641             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
   2642             0xD800,   // D800..DB7F; High Surrogates
   2643             0xDB80,   // DB80..DBFF; High Private Use Surrogates
   2644             0xDC00,   // DC00..DFFF; Low Surrogates
   2645             0xE000,   // E000..F8FF; Private Use Area
   2646             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
   2647             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
   2648             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
   2649             0xFE00,   // FE00..FE0F; Variation Selectors
   2650             0xFE10,   // FE10..FE1F; Vertical Forms
   2651             0xFE20,   // FE20..FE2F; Combining Half Marks
   2652             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
   2653             0xFE50,   // FE50..FE6F; Small Form Variants
   2654             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
   2655             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
   2656             0xFFF0,   // FFF0..FFFF; Specials
   2657             0x10000,  // 10000..1007F; Linear B Syllabary
   2658             0x10080,  // 10080..100FF; Linear B Ideograms
   2659             0x10100,  // 10100..1013F; Aegean Numbers
   2660             0x10140,  // 10140..1018F; Ancient Greek Numbers
   2661             0x10190,  // 10190..101CF; Ancient Symbols
   2662             0x101D0,  // 101D0..101FF; Phaistos Disc
   2663             0x10200,  //               unassigned
   2664             0x10280,  // 10280..1029F; Lycian
   2665             0x102A0,  // 102A0..102DF; Carian
   2666             0x102E0,  //               unassigned
   2667             0x10300,  // 10300..1032F; Old Italic
   2668             0x10330,  // 10330..1034F; Gothic
   2669             0x10350,  //               unassigned
   2670             0x10380,  // 10380..1039F; Ugaritic
   2671             0x103A0,  // 103A0..103DF; Old Persian
   2672             0x103E0,  //               unassigned
   2673             0x10400,  // 10400..1044F; Deseret
   2674             0x10450,  // 10450..1047F; Shavian
   2675             0x10480,  // 10480..104AF; Osmanya
   2676             0x104B0,  //               unassigned
   2677             0x10800,  // 10800..1083F; Cypriot Syllabary
   2678             0x10840,  // 10840..1085F; Imperial Aramaic
   2679             0x10860,  //               unassigned
   2680             0x10900,  // 10900..1091F; Phoenician
   2681             0x10920,  // 10920..1093F; Lydian
   2682             0x10940,  //               unassigned
   2683             0x10A00,  // 10A00..10A5F; Kharoshthi
   2684             0x10A60,  // 10A60..10A7F; Old South Arabian
   2685             0x10A80,  //               unassigned
   2686             0x10B00,  // 10B00..10B3F; Avestan
   2687             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
   2688             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
   2689             0x10B80,  //               unassigned
   2690             0x10C00,  // 10C00..10C4F; Old Turkic
   2691             0x10C50,  //               unassigned
   2692             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
   2693             0x10E80,  //               unassigned
   2694             0x11000,  // 11000..1107F; Brahmi
   2695             0x11080,  // 11080..110CF; Kaithi
   2696             0x110D0,  //               unassigned
   2697             0x12000,  // 12000..123FF; Cuneiform
   2698             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
   2699             0x12480,  //               unassigned
   2700             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
   2701             0x13430,  //               unassigned
   2702             0x16800,  // 16800..16A3F; Bamum Supplement
   2703             0x16A40,  //               unassigned
   2704             0x1B000,  // 1B000..1B0FF; Kana Supplement
   2705             0x1B100,  //               unassigned
   2706             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
   2707             0x1D100,  // 1D100..1D1FF; Musical Symbols
   2708             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
   2709             0x1D250,  //               unassigned
   2710             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
   2711             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
   2712             0x1D380,  //               unassigned
   2713             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
   2714             0x1D800,  //               unassigned
   2715             0x1F000,  // 1F000..1F02F; Mahjong Tiles
   2716             0x1F030,  // 1F030..1F09F; Domino Tiles
   2717             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
   2718             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
   2719             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
   2720             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
   2721             0x1F600,  // 1F600..1F64F; Emoticons
   2722             0x1F650,  //               unassigned
   2723             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
   2724             0x1F700,  // 1F700..1F77F; Alchemical Symbols
   2725             0x1F780,  //               unassigned
   2726             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
   2727             0x2A6E0,  //               unassigned
   2728             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
   2729             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
   2730             0x2B820,  //               unassigned
   2731             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
   2732             0x2FA20,  //               unassigned
   2733             0xE0000,  // E0000..E007F; Tags
   2734             0xE0080,  //               unassigned
   2735             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
   2736             0xE01F0,  //               unassigned
   2737             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
   2738             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
   2739         };
   2740 
   2741         private static final UnicodeBlock[] blocks = {
   2742             BASIC_LATIN,
   2743             LATIN_1_SUPPLEMENT,
   2744             LATIN_EXTENDED_A,
   2745             LATIN_EXTENDED_B,
   2746             IPA_EXTENSIONS,
   2747             SPACING_MODIFIER_LETTERS,
   2748             COMBINING_DIACRITICAL_MARKS,
   2749             GREEK,
   2750             CYRILLIC,
   2751             CYRILLIC_SUPPLEMENTARY,
   2752             ARMENIAN,
   2753             HEBREW,
   2754             ARABIC,
   2755             SYRIAC,
   2756             ARABIC_SUPPLEMENT,
   2757             THAANA,
   2758             NKO,
   2759             SAMARITAN,
   2760             MANDAIC,
   2761             null,
   2762             DEVANAGARI,
   2763             BENGALI,
   2764             GURMUKHI,
   2765             GUJARATI,
   2766             ORIYA,
   2767             TAMIL,
   2768             TELUGU,
   2769             KANNADA,
   2770             MALAYALAM,
   2771             SINHALA,
   2772             THAI,
   2773             LAO,
   2774             TIBETAN,
   2775             MYANMAR,
   2776             GEORGIAN,
   2777             HANGUL_JAMO,
   2778             ETHIOPIC,
   2779             ETHIOPIC_SUPPLEMENT,
   2780             CHEROKEE,
   2781             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
   2782             OGHAM,
   2783             RUNIC,
   2784             TAGALOG,
   2785             HANUNOO,
   2786             BUHID,
   2787             TAGBANWA,
   2788             KHMER,
   2789             MONGOLIAN,
   2790             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
   2791             LIMBU,
   2792             TAI_LE,
   2793             NEW_TAI_LUE,
   2794             KHMER_SYMBOLS,
   2795             BUGINESE,
   2796             TAI_THAM,
   2797             null,
   2798             BALINESE,
   2799             SUNDANESE,
   2800             BATAK,
   2801             LEPCHA,
   2802             OL_CHIKI,
   2803             null,
   2804             VEDIC_EXTENSIONS,
   2805             PHONETIC_EXTENSIONS,
   2806             PHONETIC_EXTENSIONS_SUPPLEMENT,
   2807             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
   2808             LATIN_EXTENDED_ADDITIONAL,
   2809             GREEK_EXTENDED,
   2810             GENERAL_PUNCTUATION,
   2811             SUPERSCRIPTS_AND_SUBSCRIPTS,
   2812             CURRENCY_SYMBOLS,
   2813             COMBINING_MARKS_FOR_SYMBOLS,
   2814             LETTERLIKE_SYMBOLS,
   2815             NUMBER_FORMS,
   2816             ARROWS,
   2817             MATHEMATICAL_OPERATORS,
   2818             MISCELLANEOUS_TECHNICAL,
   2819             CONTROL_PICTURES,
   2820             OPTICAL_CHARACTER_RECOGNITION,
   2821             ENCLOSED_ALPHANUMERICS,
   2822             BOX_DRAWING,
   2823             BLOCK_ELEMENTS,
   2824             GEOMETRIC_SHAPES,
   2825             MISCELLANEOUS_SYMBOLS,
   2826             DINGBATS,
   2827             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
   2828             SUPPLEMENTAL_ARROWS_A,
   2829             BRAILLE_PATTERNS,
   2830             SUPPLEMENTAL_ARROWS_B,
   2831             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
   2832             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
   2833             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
   2834             GLAGOLITIC,
   2835             LATIN_EXTENDED_C,
   2836             COPTIC,
   2837             GEORGIAN_SUPPLEMENT,
   2838             TIFINAGH,
   2839             ETHIOPIC_EXTENDED,
   2840             CYRILLIC_EXTENDED_A,
   2841             SUPPLEMENTAL_PUNCTUATION,
   2842             CJK_RADICALS_SUPPLEMENT,
   2843             KANGXI_RADICALS,
   2844             null,
   2845             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
   2846             CJK_SYMBOLS_AND_PUNCTUATION,
   2847             HIRAGANA,
   2848             KATAKANA,
   2849             BOPOMOFO,
   2850             HANGUL_COMPATIBILITY_JAMO,
   2851             KANBUN,
   2852             BOPOMOFO_EXTENDED,
   2853             CJK_STROKES,
   2854             KATAKANA_PHONETIC_EXTENSIONS,
   2855             ENCLOSED_CJK_LETTERS_AND_MONTHS,
   2856             CJK_COMPATIBILITY,
   2857             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
   2858             YIJING_HEXAGRAM_SYMBOLS,
   2859             CJK_UNIFIED_IDEOGRAPHS,
   2860             YI_SYLLABLES,
   2861             YI_RADICALS,
   2862             LISU,
   2863             VAI,
   2864             CYRILLIC_EXTENDED_B,
   2865             BAMUM,
   2866             MODIFIER_TONE_LETTERS,
   2867             LATIN_EXTENDED_D,
   2868             SYLOTI_NAGRI,
   2869             COMMON_INDIC_NUMBER_FORMS,
   2870             PHAGS_PA,
   2871             SAURASHTRA,
   2872             DEVANAGARI_EXTENDED,
   2873             KAYAH_LI,
   2874             REJANG,
   2875             HANGUL_JAMO_EXTENDED_A,
   2876             JAVANESE,
   2877             null,
   2878             CHAM,
   2879             MYANMAR_EXTENDED_A,
   2880             TAI_VIET,
   2881             null,
   2882             ETHIOPIC_EXTENDED_A,
   2883             null,
   2884             MEETEI_MAYEK,
   2885             HANGUL_SYLLABLES,
   2886             HANGUL_JAMO_EXTENDED_B,
   2887             HIGH_SURROGATES,
   2888             HIGH_PRIVATE_USE_SURROGATES,
   2889             LOW_SURROGATES,
   2890             PRIVATE_USE_AREA,
   2891             CJK_COMPATIBILITY_IDEOGRAPHS,
   2892             ALPHABETIC_PRESENTATION_FORMS,
   2893             ARABIC_PRESENTATION_FORMS_A,
   2894             VARIATION_SELECTORS,
   2895             VERTICAL_FORMS,
   2896             COMBINING_HALF_MARKS,
   2897             CJK_COMPATIBILITY_FORMS,
   2898             SMALL_FORM_VARIANTS,
   2899             ARABIC_PRESENTATION_FORMS_B,
   2900             HALFWIDTH_AND_FULLWIDTH_FORMS,
   2901             SPECIALS,
   2902             LINEAR_B_SYLLABARY,
   2903             LINEAR_B_IDEOGRAMS,
   2904             AEGEAN_NUMBERS,
   2905             ANCIENT_GREEK_NUMBERS,
   2906             ANCIENT_SYMBOLS,
   2907             PHAISTOS_DISC,
   2908             null,
   2909             LYCIAN,
   2910             CARIAN,
   2911             null,
   2912             OLD_ITALIC,
   2913             GOTHIC,
   2914             null,
   2915             UGARITIC,
   2916             OLD_PERSIAN,
   2917             null,
   2918             DESERET,
   2919             SHAVIAN,
   2920             OSMANYA,
   2921             null,
   2922             CYPRIOT_SYLLABARY,
   2923             IMPERIAL_ARAMAIC,
   2924             null,
   2925             PHOENICIAN,
   2926             LYDIAN,
   2927             null,
   2928             KHAROSHTHI,
   2929             OLD_SOUTH_ARABIAN,
   2930             null,
   2931             AVESTAN,
   2932             INSCRIPTIONAL_PARTHIAN,
   2933             INSCRIPTIONAL_PAHLAVI,
   2934             null,
   2935             OLD_TURKIC,
   2936             null,
   2937             RUMI_NUMERAL_SYMBOLS,
   2938             null,
   2939             BRAHMI,
   2940             KAITHI,
   2941             null,
   2942             CUNEIFORM,
   2943             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
   2944             null,
   2945             EGYPTIAN_HIEROGLYPHS,
   2946             null,
   2947             BAMUM_SUPPLEMENT,
   2948             null,
   2949             KANA_SUPPLEMENT,
   2950             null,
   2951             BYZANTINE_MUSICAL_SYMBOLS,
   2952             MUSICAL_SYMBOLS,
   2953             ANCIENT_GREEK_MUSICAL_NOTATION,
   2954             null,
   2955             TAI_XUAN_JING_SYMBOLS,
   2956             COUNTING_ROD_NUMERALS,
   2957             null,
   2958             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
   2959             null,
   2960             MAHJONG_TILES,
   2961             DOMINO_TILES,
   2962             PLAYING_CARDS,
   2963             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
   2964             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
   2965             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
   2966             EMOTICONS,
   2967             null,
   2968             TRANSPORT_AND_MAP_SYMBOLS,
   2969             ALCHEMICAL_SYMBOLS,
   2970             null,
   2971             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
   2972             null,
   2973             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
   2974             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
   2975             null,
   2976             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
   2977             null,
   2978             TAGS,
   2979             null,
   2980             VARIATION_SELECTORS_SUPPLEMENT,
   2981             null,
   2982             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
   2983             SUPPLEMENTARY_PRIVATE_USE_AREA_B
   2984         };
   2985 
   2986 
   2987         /**
   2988          * Returns the object representing the Unicode block containing the
   2989          * given character, or {@code null} if the character is not a
   2990          * member of a defined block.
   2991          *
   2992          * <p><b>Note:</b> This method cannot handle
   2993          * <a href="Character.html#supplementary"> supplementary
   2994          * characters</a>.  To support all Unicode characters, including
   2995          * supplementary characters, use the {@link #of(int)} method.
   2996          *
   2997          * @param   c  The character in question
   2998          * @return  The {@code UnicodeBlock} instance representing the
   2999          *          Unicode block of which this character is a member, or
   3000          *          {@code null} if the character is not a member of any
   3001          *          Unicode block
   3002          */
   3003         public static UnicodeBlock of(char c) {
   3004             return of((int)c);
   3005         }
   3006 
   3007         /**
   3008          * Returns the object representing the Unicode block
   3009          * containing the given character (Unicode code point), or
   3010          * {@code null} if the character is not a member of a
   3011          * defined block.
   3012          *
   3013          * @param   codePoint the character (Unicode code point) in question.
   3014          * @return  The {@code UnicodeBlock} instance representing the
   3015          *          Unicode block of which this character is a member, or
   3016          *          {@code null} if the character is not a member of any
   3017          *          Unicode block
   3018          * @exception IllegalArgumentException if the specified
   3019          * {@code codePoint} is an invalid Unicode code point.
   3020          * @see Character#isValidCodePoint(int)
   3021          * @since   1.5
   3022          */
   3023         public static UnicodeBlock of(int codePoint) {
   3024             if (!isValidCodePoint(codePoint)) {
   3025                 throw new IllegalArgumentException();
   3026             }
   3027 
   3028             int top, bottom, current;
   3029             bottom = 0;
   3030             top = blockStarts.length;
   3031             current = top/2;
   3032 
   3033             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
   3034             while (top - bottom > 1) {
   3035                 if (codePoint >= blockStarts[current]) {
   3036                     bottom = current;
   3037                 } else {
   3038                     top = current;
   3039                 }
   3040                 current = (top + bottom) / 2;
   3041             }
   3042             return blocks[current];
   3043         }
   3044 
   3045         /**
   3046          * Returns the UnicodeBlock with the given name. Block
   3047          * names are determined by The Unicode Standard. The file
   3048          * Blocks-&lt;version&gt;.txt defines blocks for a particular
   3049          * version of the standard. The {@link Character} class specifies
   3050          * the version of the standard that it supports.
   3051          * <p>
   3052          * This method accepts block names in the following forms:
   3053          * <ol>
   3054          * <li> Canonical block names as defined by the Unicode Standard.
   3055          * For example, the standard defines a "Basic Latin" block. Therefore, this
   3056          * method accepts "Basic Latin" as a valid block name. The documentation of
   3057          * each UnicodeBlock provides the canonical name.
   3058          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
   3059          * is a valid block name for the "Basic Latin" block.
   3060          * <li>The text representation of each constant UnicodeBlock identifier.
   3061          * For example, this method will return the {@link #BASIC_LATIN} block if
   3062          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
   3063          * hyphens in the canonical name with underscores.
   3064          * </ol>
   3065          * Finally, character case is ignored for all of the valid block name forms.
   3066          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
   3067          * The en_US locale's case mapping rules are used to provide case-insensitive
   3068          * string comparisons for block name validation.
   3069          * <p>
   3070          * If the Unicode Standard changes block names, both the previous and
   3071          * current names will be accepted.
   3072          *
   3073          * @param blockName A {@code UnicodeBlock} name.
   3074          * @return The {@code UnicodeBlock} instance identified
   3075          *         by {@code blockName}
   3076          * @throws IllegalArgumentException if {@code blockName} is an
   3077          *         invalid name
   3078          * @throws NullPointerException if {@code blockName} is null
   3079          * @since 1.5
   3080          */
   3081         public static final UnicodeBlock forName(String blockName) {
   3082             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
   3083             if (block == null) {
   3084                 throw new IllegalArgumentException();
   3085             }
   3086             return block;
   3087         }
   3088     }
   3089 
   3090 
   3091     /**
   3092      * A family of character subsets representing the character scripts
   3093      * defined in the <a href="http://www.unicode.org/reports/tr24/">
   3094      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
   3095      * character is assigned to a single Unicode script, either a specific
   3096      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
   3097      * one of the following three special values,
   3098      * {@link Character.UnicodeScript#INHERITED Inherited},
   3099      * {@link Character.UnicodeScript#COMMON Common} or
   3100      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
   3101      *
   3102      * @since 1.7
   3103      */
   3104     public static enum UnicodeScript {
   3105         /**
   3106          * Unicode script "Common".
   3107          */
   3108         COMMON,
   3109 
   3110         /**
   3111          * Unicode script "Latin".
   3112          */
   3113         LATIN,
   3114 
   3115         /**
   3116          * Unicode script "Greek".
   3117          */
   3118         GREEK,
   3119 
   3120         /**
   3121          * Unicode script "Cyrillic".
   3122          */
   3123         CYRILLIC,
   3124 
   3125         /**
   3126          * Unicode script "Armenian".
   3127          */
   3128         ARMENIAN,
   3129 
   3130         /**
   3131          * Unicode script "Hebrew".
   3132          */
   3133         HEBREW,
   3134 
   3135         /**
   3136          * Unicode script "Arabic".
   3137          */
   3138         ARABIC,
   3139 
   3140         /**
   3141          * Unicode script "Syriac".
   3142          */
   3143         SYRIAC,
   3144 
   3145         /**
   3146          * Unicode script "Thaana".
   3147          */
   3148         THAANA,
   3149 
   3150         /**
   3151          * Unicode script "Devanagari".
   3152          */
   3153         DEVANAGARI,
   3154 
   3155         /**
   3156          * Unicode script "Bengali".
   3157          */
   3158         BENGALI,
   3159 
   3160         /**
   3161          * Unicode script "Gurmukhi".
   3162          */
   3163         GURMUKHI,
   3164 
   3165         /**
   3166          * Unicode script "Gujarati".
   3167          */
   3168         GUJARATI,
   3169 
   3170         /**
   3171          * Unicode script "Oriya".
   3172          */
   3173         ORIYA,
   3174 
   3175         /**
   3176          * Unicode script "Tamil".
   3177          */
   3178         TAMIL,
   3179 
   3180         /**
   3181          * Unicode script "Telugu".
   3182          */
   3183         TELUGU,
   3184 
   3185         /**
   3186          * Unicode script "Kannada".
   3187          */
   3188         KANNADA,
   3189 
   3190         /**
   3191          * Unicode script "Malayalam".
   3192          */
   3193         MALAYALAM,
   3194 
   3195         /**
   3196          * Unicode script "Sinhala".
   3197          */
   3198         SINHALA,
   3199 
   3200         /**
   3201          * Unicode script "Thai".
   3202          */
   3203         THAI,
   3204 
   3205         /**
   3206          * Unicode script "Lao".
   3207          */
   3208         LAO,
   3209 
   3210         /**
   3211          * Unicode script "Tibetan".
   3212          */
   3213         TIBETAN,
   3214 
   3215         /**
   3216          * Unicode script "Myanmar".
   3217          */
   3218         MYANMAR,
   3219 
   3220         /**
   3221          * Unicode script "Georgian".
   3222          */
   3223         GEORGIAN,
   3224 
   3225         /**
   3226          * Unicode script "Hangul".
   3227          */
   3228         HANGUL,
   3229 
   3230         /**
   3231          * Unicode script "Ethiopic".
   3232          */
   3233         ETHIOPIC,
   3234 
   3235         /**
   3236          * Unicode script "Cherokee".
   3237          */
   3238         CHEROKEE,
   3239 
   3240         /**
   3241          * Unicode script "Canadian_Aboriginal".
   3242          */
   3243         CANADIAN_ABORIGINAL,
   3244 
   3245         /**
   3246          * Unicode script "Ogham".
   3247          */
   3248         OGHAM,
   3249 
   3250         /**
   3251          * Unicode script "Runic".
   3252          */
   3253         RUNIC,
   3254 
   3255         /**
   3256          * Unicode script "Khmer".
   3257          */
   3258         KHMER,
   3259 
   3260         /**
   3261          * Unicode script "Mongolian".
   3262          */
   3263         MONGOLIAN,
   3264 
   3265         /**
   3266          * Unicode script "Hiragana".
   3267          */
   3268         HIRAGANA,
   3269 
   3270         /**
   3271          * Unicode script "Katakana".
   3272          */
   3273         KATAKANA,
   3274 
   3275         /**
   3276          * Unicode script "Bopomofo".
   3277          */
   3278         BOPOMOFO,
   3279 
   3280         /**
   3281          * Unicode script "Han".
   3282          */
   3283         HAN,
   3284 
   3285         /**
   3286          * Unicode script "Yi".
   3287          */
   3288         YI,
   3289 
   3290         /**
   3291          * Unicode script "Old_Italic".
   3292          */
   3293         OLD_ITALIC,
   3294 
   3295         /**
   3296          * Unicode script "Gothic".
   3297          */
   3298         GOTHIC,
   3299 
   3300         /**
   3301          * Unicode script "Deseret".
   3302          */
   3303         DESERET,
   3304 
   3305         /**
   3306          * Unicode script "Inherited".
   3307          */
   3308         INHERITED,
   3309 
   3310         /**
   3311          * Unicode script "Tagalog".
   3312          */
   3313         TAGALOG,
   3314 
   3315         /**
   3316          * Unicode script "Hanunoo".
   3317          */
   3318         HANUNOO,
   3319 
   3320         /**
   3321          * Unicode script "Buhid".
   3322          */
   3323         BUHID,
   3324 
   3325         /**
   3326          * Unicode script "Tagbanwa".
   3327          */
   3328         TAGBANWA,
   3329 
   3330         /**
   3331          * Unicode script "Limbu".
   3332          */
   3333         LIMBU,
   3334 
   3335         /**
   3336          * Unicode script "Tai_Le".
   3337          */
   3338         TAI_LE,
   3339 
   3340         /**
   3341          * Unicode script "Linear_B".
   3342          */
   3343         LINEAR_B,
   3344 
   3345         /**
   3346          * Unicode script "Ugaritic".
   3347          */
   3348         UGARITIC,
   3349 
   3350         /**
   3351          * Unicode script "Shavian".
   3352          */
   3353         SHAVIAN,
   3354 
   3355         /**
   3356          * Unicode script "Osmanya".
   3357          */
   3358         OSMANYA,
   3359 
   3360         /**
   3361          * Unicode script "Cypriot".
   3362          */
   3363         CYPRIOT,
   3364 
   3365         /**
   3366          * Unicode script "Braille".
   3367          */
   3368         BRAILLE,
   3369 
   3370         /**
   3371          * Unicode script "Buginese".
   3372          */
   3373         BUGINESE,
   3374 
   3375         /**
   3376          * Unicode script "Coptic".
   3377          */
   3378         COPTIC,
   3379 
   3380         /**
   3381          * Unicode script "New_Tai_Lue".
   3382          */
   3383         NEW_TAI_LUE,
   3384 
   3385         /**
   3386          * Unicode script "Glagolitic".
   3387          */
   3388         GLAGOLITIC,
   3389 
   3390         /**
   3391          * Unicode script "Tifinagh".
   3392          */
   3393         TIFINAGH,
   3394 
   3395         /**
   3396          * Unicode script "Syloti_Nagri".
   3397          */
   3398         SYLOTI_NAGRI,
   3399 
   3400         /**
   3401          * Unicode script "Old_Persian".
   3402          */
   3403         OLD_PERSIAN,
   3404 
   3405         /**
   3406          * Unicode script "Kharoshthi".
   3407          */
   3408         KHAROSHTHI,
   3409 
   3410         /**
   3411          * Unicode script "Balinese".
   3412          */
   3413         BALINESE,
   3414 
   3415         /**
   3416          * Unicode script "Cuneiform".
   3417          */
   3418         CUNEIFORM,
   3419 
   3420         /**
   3421          * Unicode script "Phoenician".
   3422          */
   3423         PHOENICIAN,
   3424 
   3425         /**
   3426          * Unicode script "Phags_Pa".
   3427          */
   3428         PHAGS_PA,
   3429 
   3430         /**
   3431          * Unicode script "Nko".
   3432          */
   3433         NKO,
   3434 
   3435         /**
   3436          * Unicode script "Sundanese".
   3437          */
   3438         SUNDANESE,
   3439 
   3440         /**
   3441          * Unicode script "Batak".
   3442          */
   3443         BATAK,
   3444 
   3445         /**
   3446          * Unicode script "Lepcha".
   3447          */
   3448         LEPCHA,
   3449 
   3450         /**
   3451          * Unicode script "Ol_Chiki".
   3452          */
   3453         OL_CHIKI,
   3454 
   3455         /**
   3456          * Unicode script "Vai".
   3457          */
   3458         VAI,
   3459 
   3460         /**
   3461          * Unicode script "Saurashtra".
   3462          */
   3463         SAURASHTRA,
   3464 
   3465         /**
   3466          * Unicode script "Kayah_Li".
   3467          */
   3468         KAYAH_LI,
   3469 
   3470         /**
   3471          * Unicode script "Rejang".
   3472          */
   3473         REJANG,
   3474 
   3475         /**
   3476          * Unicode script "Lycian".
   3477          */
   3478         LYCIAN,
   3479 
   3480         /**
   3481          * Unicode script "Carian".
   3482          */
   3483         CARIAN,
   3484 
   3485         /**
   3486          * Unicode script "Lydian".
   3487          */
   3488         LYDIAN,
   3489 
   3490         /**
   3491          * Unicode script "Cham".
   3492          */
   3493         CHAM,
   3494 
   3495         /**
   3496          * Unicode script "Tai_Tham".
   3497          */
   3498         TAI_THAM,
   3499 
   3500         /**
   3501          * Unicode script "Tai_Viet".
   3502          */
   3503         TAI_VIET,
   3504 
   3505         /**
   3506          * Unicode script "Avestan".
   3507          */
   3508         AVESTAN,
   3509 
   3510         /**
   3511          * Unicode script "Egyptian_Hieroglyphs".
   3512          */
   3513         EGYPTIAN_HIEROGLYPHS,
   3514 
   3515         /**
   3516          * Unicode script "Samaritan".
   3517          */
   3518         SAMARITAN,
   3519 
   3520         /**
   3521          * Unicode script "Mandaic".
   3522          */
   3523         MANDAIC,
   3524 
   3525         /**
   3526          * Unicode script "Lisu".
   3527          */
   3528         LISU,
   3529 
   3530         /**
   3531          * Unicode script "Bamum".
   3532          */
   3533         BAMUM,
   3534 
   3535         /**
   3536          * Unicode script "Javanese".
   3537          */
   3538         JAVANESE,
   3539 
   3540         /**
   3541          * Unicode script "Meetei_Mayek".
   3542          */
   3543         MEETEI_MAYEK,
   3544 
   3545         /**
   3546          * Unicode script "Imperial_Aramaic".
   3547          */
   3548         IMPERIAL_ARAMAIC,
   3549 
   3550         /**
   3551          * Unicode script "Old_South_Arabian".
   3552          */
   3553         OLD_SOUTH_ARABIAN,
   3554 
   3555         /**
   3556          * Unicode script "Inscriptional_Parthian".
   3557          */
   3558         INSCRIPTIONAL_PARTHIAN,
   3559 
   3560         /**
   3561          * Unicode script "Inscriptional_Pahlavi".
   3562          */
   3563         INSCRIPTIONAL_PAHLAVI,
   3564 
   3565         /**
   3566          * Unicode script "Old_Turkic".
   3567          */
   3568         OLD_TURKIC,
   3569 
   3570         /**
   3571          * Unicode script "Brahmi".
   3572          */
   3573         BRAHMI,
   3574 
   3575         /**
   3576          * Unicode script "Kaithi".
   3577          */
   3578         KAITHI,
   3579 
   3580         /**
   3581          * Unicode script "Unknown".
   3582          */
   3583         UNKNOWN;
   3584 
   3585         private static final int[] scriptStarts = {
   3586             0x0000,   // 0000..0040; COMMON
   3587             0x0041,   // 0041..005A; LATIN
   3588             0x005B,   // 005B..0060; COMMON
   3589             0x0061,   // 0061..007A; LATIN
   3590             0x007B,   // 007B..00A9; COMMON
   3591             0x00AA,   // 00AA..00AA; LATIN
   3592             0x00AB,   // 00AB..00B9; COMMON
   3593             0x00BA,   // 00BA..00BA; LATIN
   3594             0x00BB,   // 00BB..00BF; COMMON
   3595             0x00C0,   // 00C0..00D6; LATIN
   3596             0x00D7,   // 00D7..00D7; COMMON
   3597             0x00D8,   // 00D8..00F6; LATIN
   3598             0x00F7,   // 00F7..00F7; COMMON
   3599             0x00F8,   // 00F8..02B8; LATIN
   3600             0x02B9,   // 02B9..02DF; COMMON
   3601             0x02E0,   // 02E0..02E4; LATIN
   3602             0x02E5,   // 02E5..02E9; COMMON
   3603             0x02EA,   // 02EA..02EB; BOPOMOFO
   3604             0x02EC,   // 02EC..02FF; COMMON
   3605             0x0300,   // 0300..036F; INHERITED
   3606             0x0370,   // 0370..0373; GREEK
   3607             0x0374,   // 0374..0374; COMMON
   3608             0x0375,   // 0375..037D; GREEK
   3609             0x037E,   // 037E..0383; COMMON
   3610             0x0384,   // 0384..0384; GREEK
   3611             0x0385,   // 0385..0385; COMMON
   3612             0x0386,   // 0386..0386; GREEK
   3613             0x0387,   // 0387..0387; COMMON
   3614             0x0388,   // 0388..03E1; GREEK
   3615             0x03E2,   // 03E2..03EF; COPTIC
   3616             0x03F0,   // 03F0..03FF; GREEK
   3617             0x0400,   // 0400..0484; CYRILLIC
   3618             0x0485,   // 0485..0486; INHERITED
   3619             0x0487,   // 0487..0530; CYRILLIC
   3620             0x0531,   // 0531..0588; ARMENIAN
   3621             0x0589,   // 0589..0589; COMMON
   3622             0x058A,   // 058A..0590; ARMENIAN
   3623             0x0591,   // 0591..05FF; HEBREW
   3624             0x0600,   // 0600..060B; ARABIC
   3625             0x060C,   // 060C..060C; COMMON
   3626             0x060D,   // 060D..061A; ARABIC
   3627             0x061B,   // 061B..061D; COMMON
   3628             0x061E,   // 061E..061E; ARABIC
   3629             0x061F,   // 061F..061F; COMMON
   3630             0x0620,   // 0620..063F; ARABIC
   3631             0x0640,   // 0640..0640; COMMON
   3632             0x0641,   // 0641..064A; ARABIC
   3633             0x064B,   // 064B..0655; INHERITED
   3634             0x0656,   // 0656..065E; ARABIC
   3635             0x065F,   // 065F..065F; INHERITED
   3636             0x0660,   // 0660..0669; COMMON
   3637             0x066A,   // 066A..066F; ARABIC
   3638             0x0670,   // 0670..0670; INHERITED
   3639             0x0671,   // 0671..06DC; ARABIC
   3640             0x06DD,   // 06DD..06DD; COMMON
   3641             0x06DE,   // 06DE..06FF; ARABIC
   3642             0x0700,   // 0700..074F; SYRIAC
   3643             0x0750,   // 0750..077F; ARABIC
   3644             0x0780,   // 0780..07BF; THAANA
   3645             0x07C0,   // 07C0..07FF; NKO
   3646             0x0800,   // 0800..083F; SAMARITAN
   3647             0x0840,   // 0840..08FF; MANDAIC
   3648             0x0900,   // 0900..0950; DEVANAGARI
   3649             0x0951,   // 0951..0952; INHERITED
   3650             0x0953,   // 0953..0963; DEVANAGARI
   3651             0x0964,   // 0964..0965; COMMON
   3652             0x0966,   // 0966..096F; DEVANAGARI
   3653             0x0970,   // 0970..0970; COMMON
   3654             0x0971,   // 0971..0980; DEVANAGARI
   3655             0x0981,   // 0981..0A00; BENGALI
   3656             0x0A01,   // 0A01..0A80; GURMUKHI
   3657             0x0A81,   // 0A81..0B00; GUJARATI
   3658             0x0B01,   // 0B01..0B81; ORIYA
   3659             0x0B82,   // 0B82..0C00; TAMIL
   3660             0x0C01,   // 0C01..0C81; TELUGU
   3661             0x0C82,   // 0C82..0CF0; KANNADA
   3662             0x0D02,   // 0D02..0D81; MALAYALAM
   3663             0x0D82,   // 0D82..0E00; SINHALA
   3664             0x0E01,   // 0E01..0E3E; THAI
   3665             0x0E3F,   // 0E3F..0E3F; COMMON
   3666             0x0E40,   // 0E40..0E80; THAI
   3667             0x0E81,   // 0E81..0EFF; LAO
   3668             0x0F00,   // 0F00..0FD4; TIBETAN
   3669             0x0FD5,   // 0FD5..0FD8; COMMON
   3670             0x0FD9,   // 0FD9..0FFF; TIBETAN
   3671             0x1000,   // 1000..109F; MYANMAR
   3672             0x10A0,   // 10A0..10FA; GEORGIAN
   3673             0x10FB,   // 10FB..10FB; COMMON
   3674             0x10FC,   // 10FC..10FF; GEORGIAN
   3675             0x1100,   // 1100..11FF; HANGUL
   3676             0x1200,   // 1200..139F; ETHIOPIC
   3677             0x13A0,   // 13A0..13FF; CHEROKEE
   3678             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
   3679             0x1680,   // 1680..169F; OGHAM
   3680             0x16A0,   // 16A0..16EA; RUNIC
   3681             0x16EB,   // 16EB..16ED; COMMON
   3682             0x16EE,   // 16EE..16FF; RUNIC
   3683             0x1700,   // 1700..171F; TAGALOG
   3684             0x1720,   // 1720..1734; HANUNOO
   3685             0x1735,   // 1735..173F; COMMON
   3686             0x1740,   // 1740..175F; BUHID
   3687             0x1760,   // 1760..177F; TAGBANWA
   3688             0x1780,   // 1780..17FF; KHMER
   3689             0x1800,   // 1800..1801; MONGOLIAN
   3690             0x1802,   // 1802..1803; COMMON
   3691             0x1804,   // 1804..1804; MONGOLIAN
   3692             0x1805,   // 1805..1805; COMMON
   3693             0x1806,   // 1806..18AF; MONGOLIAN
   3694             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
   3695             0x1900,   // 1900..194F; LIMBU
   3696             0x1950,   // 1950..197F; TAI_LE
   3697             0x1980,   // 1980..19DF; NEW_TAI_LUE
   3698             0x19E0,   // 19E0..19FF; KHMER
   3699             0x1A00,   // 1A00..1A1F; BUGINESE
   3700             0x1A20,   // 1A20..1AFF; TAI_THAM
   3701             0x1B00,   // 1B00..1B7F; BALINESE
   3702             0x1B80,   // 1B80..1BBF; SUNDANESE
   3703             0x1BC0,   // 1BC0..1BFF; BATAK
   3704             0x1C00,   // 1C00..1C4F; LEPCHA
   3705             0x1C50,   // 1C50..1CCF; OL_CHIKI
   3706             0x1CD0,   // 1CD0..1CD2; INHERITED
   3707             0x1CD3,   // 1CD3..1CD3; COMMON
   3708             0x1CD4,   // 1CD4..1CE0; INHERITED
   3709             0x1CE1,   // 1CE1..1CE1; COMMON
   3710             0x1CE2,   // 1CE2..1CE8; INHERITED
   3711             0x1CE9,   // 1CE9..1CEC; COMMON
   3712             0x1CED,   // 1CED..1CED; INHERITED
   3713             0x1CEE,   // 1CEE..1CFF; COMMON
   3714             0x1D00,   // 1D00..1D25; LATIN
   3715             0x1D26,   // 1D26..1D2A; GREEK
   3716             0x1D2B,   // 1D2B..1D2B; CYRILLIC
   3717             0x1D2C,   // 1D2C..1D5C; LATIN
   3718             0x1D5D,   // 1D5D..1D61; GREEK
   3719             0x1D62,   // 1D62..1D65; LATIN
   3720             0x1D66,   // 1D66..1D6A; GREEK
   3721             0x1D6B,   // 1D6B..1D77; LATIN
   3722             0x1D78,   // 1D78..1D78; CYRILLIC
   3723             0x1D79,   // 1D79..1DBE; LATIN
   3724             0x1DBF,   // 1DBF..1DBF; GREEK
   3725             0x1DC0,   // 1DC0..1DFF; INHERITED
   3726             0x1E00,   // 1E00..1EFF; LATIN
   3727             0x1F00,   // 1F00..1FFF; GREEK
   3728             0x2000,   // 2000..200B; COMMON
   3729             0x200C,   // 200C..200D; INHERITED
   3730             0x200E,   // 200E..2070; COMMON
   3731             0x2071,   // 2071..2073; LATIN
   3732             0x2074,   // 2074..207E; COMMON
   3733             0x207F,   // 207F..207F; LATIN
   3734             0x2080,   // 2080..208F; COMMON
   3735             0x2090,   // 2090..209F; LATIN
   3736             0x20A0,   // 20A0..20CF; COMMON
   3737             0x20D0,   // 20D0..20FF; INHERITED
   3738             0x2100,   // 2100..2125; COMMON
   3739             0x2126,   // 2126..2126; GREEK
   3740             0x2127,   // 2127..2129; COMMON
   3741             0x212A,   // 212A..212B; LATIN
   3742             0x212C,   // 212C..2131; COMMON
   3743             0x2132,   // 2132..2132; LATIN
   3744             0x2133,   // 2133..214D; COMMON
   3745             0x214E,   // 214E..214E; LATIN
   3746             0x214F,   // 214F..215F; COMMON
   3747             0x2160,   // 2160..2188; LATIN
   3748             0x2189,   // 2189..27FF; COMMON
   3749             0x2800,   // 2800..28FF; BRAILLE
   3750             0x2900,   // 2900..2BFF; COMMON
   3751             0x2C00,   // 2C00..2C5F; GLAGOLITIC
   3752             0x2C60,   // 2C60..2C7F; LATIN
   3753             0x2C80,   // 2C80..2CFF; COPTIC
   3754             0x2D00,   // 2D00..2D2F; GEORGIAN
   3755             0x2D30,   // 2D30..2D7F; TIFINAGH
   3756             0x2D80,   // 2D80..2DDF; ETHIOPIC
   3757             0x2DE0,   // 2DE0..2DFF; CYRILLIC
   3758             0x2E00,   // 2E00..2E7F; COMMON
   3759             0x2E80,   // 2E80..2FEF; HAN
   3760             0x2FF0,   // 2FF0..3004; COMMON
   3761             0x3005,   // 3005..3005; HAN
   3762             0x3006,   // 3006..3006; COMMON
   3763             0x3007,   // 3007..3007; HAN
   3764             0x3008,   // 3008..3020; COMMON
   3765             0x3021,   // 3021..3029; HAN
   3766             0x302A,   // 302A..302D; INHERITED
   3767             0x302E,   // 302E..302F; HANGUL
   3768             0x3030,   // 3030..3037; COMMON
   3769             0x3038,   // 3038..303B; HAN
   3770             0x303C,   // 303C..3040; COMMON
   3771             0x3041,   // 3041..3098; HIRAGANA
   3772             0x3099,   // 3099..309A; INHERITED
   3773             0x309B,   // 309B..309C; COMMON
   3774             0x309D,   // 309D..309F; HIRAGANA
   3775             0x30A0,   // 30A0..30A0; COMMON
   3776             0x30A1,   // 30A1..30FA; KATAKANA
   3777             0x30FB,   // 30FB..30FC; COMMON
   3778             0x30FD,   // 30FD..3104; KATAKANA
   3779             0x3105,   // 3105..3130; BOPOMOFO
   3780             0x3131,   // 3131..318F; HANGUL
   3781             0x3190,   // 3190..319F; COMMON
   3782             0x31A0,   // 31A0..31BF; BOPOMOFO
   3783             0x31C0,   // 31C0..31EF; COMMON
   3784             0x31F0,   // 31F0..31FF; KATAKANA
   3785             0x3200,   // 3200..321F; HANGUL
   3786             0x3220,   // 3220..325F; COMMON
   3787             0x3260,   // 3260..327E; HANGUL
   3788             0x327F,   // 327F..32CF; COMMON
   3789             0x32D0,   // 32D0..3357; KATAKANA
   3790             0x3358,   // 3358..33FF; COMMON
   3791             0x3400,   // 3400..4DBF; HAN
   3792             0x4DC0,   // 4DC0..4DFF; COMMON
   3793             0x4E00,   // 4E00..9FFF; HAN
   3794             0xA000,   // A000..A4CF; YI
   3795             0xA4D0,   // A4D0..A4FF; LISU
   3796             0xA500,   // A500..A63F; VAI
   3797             0xA640,   // A640..A69F; CYRILLIC
   3798             0xA6A0,   // A6A0..A6FF; BAMUM
   3799             0xA700,   // A700..A721; COMMON
   3800             0xA722,   // A722..A787; LATIN
   3801             0xA788,   // A788..A78A; COMMON
   3802             0xA78B,   // A78B..A7FF; LATIN
   3803             0xA800,   // A800..A82F; SYLOTI_NAGRI
   3804             0xA830,   // A830..A83F; COMMON
   3805             0xA840,   // A840..A87F; PHAGS_PA
   3806             0xA880,   // A880..A8DF; SAURASHTRA
   3807             0xA8E0,   // A8E0..A8FF; DEVANAGARI
   3808             0xA900,   // A900..A92F; KAYAH_LI
   3809             0xA930,   // A930..A95F; REJANG
   3810             0xA960,   // A960..A97F; HANGUL
   3811             0xA980,   // A980..A9FF; JAVANESE
   3812             0xAA00,   // AA00..AA5F; CHAM
   3813             0xAA60,   // AA60..AA7F; MYANMAR
   3814             0xAA80,   // AA80..AB00; TAI_VIET
   3815             0xAB01,   // AB01..ABBF; ETHIOPIC
   3816             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
   3817             0xAC00,   // AC00..D7FB; HANGUL
   3818             0xD7FC,   // D7FC..F8FF; UNKNOWN
   3819             0xF900,   // F900..FAFF; HAN
   3820             0xFB00,   // FB00..FB12; LATIN
   3821             0xFB13,   // FB13..FB1C; ARMENIAN
   3822             0xFB1D,   // FB1D..FB4F; HEBREW
   3823             0xFB50,   // FB50..FD3D; ARABIC
   3824             0xFD3E,   // FD3E..FD4F; COMMON
   3825             0xFD50,   // FD50..FDFC; ARABIC
   3826             0xFDFD,   // FDFD..FDFF; COMMON
   3827             0xFE00,   // FE00..FE0F; INHERITED
   3828             0xFE10,   // FE10..FE1F; COMMON
   3829             0xFE20,   // FE20..FE2F; INHERITED
   3830             0xFE30,   // FE30..FE6F; COMMON
   3831             0xFE70,   // FE70..FEFE; ARABIC
   3832             0xFEFF,   // FEFF..FF20; COMMON
   3833             0xFF21,   // FF21..FF3A; LATIN
   3834             0xFF3B,   // FF3B..FF40; COMMON
   3835             0xFF41,   // FF41..FF5A; LATIN
   3836             0xFF5B,   // FF5B..FF65; COMMON
   3837             0xFF66,   // FF66..FF6F; KATAKANA
   3838             0xFF70,   // FF70..FF70; COMMON
   3839             0xFF71,   // FF71..FF9D; KATAKANA
   3840             0xFF9E,   // FF9E..FF9F; COMMON
   3841             0xFFA0,   // FFA0..FFDF; HANGUL
   3842             0xFFE0,   // FFE0..FFFF; COMMON
   3843             0x10000,  // 10000..100FF; LINEAR_B
   3844             0x10100,  // 10100..1013F; COMMON
   3845             0x10140,  // 10140..1018F; GREEK
   3846             0x10190,  // 10190..101FC; COMMON
   3847             0x101FD,  // 101FD..1027F; INHERITED
   3848             0x10280,  // 10280..1029F; LYCIAN
   3849             0x102A0,  // 102A0..102FF; CARIAN
   3850             0x10300,  // 10300..1032F; OLD_ITALIC
   3851             0x10330,  // 10330..1037F; GOTHIC
   3852             0x10380,  // 10380..1039F; UGARITIC
   3853             0x103A0,  // 103A0..103FF; OLD_PERSIAN
   3854             0x10400,  // 10400..1044F; DESERET
   3855             0x10450,  // 10450..1047F; SHAVIAN
   3856             0x10480,  // 10480..107FF; OSMANYA
   3857             0x10800,  // 10800..1083F; CYPRIOT
   3858             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
   3859             0x10900,  // 10900..1091F; PHOENICIAN
   3860             0x10920,  // 10920..109FF; LYDIAN
   3861             0x10A00,  // 10A00..10A5F; KHAROSHTHI
   3862             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
   3863             0x10B00,  // 10B00..10B3F; AVESTAN
   3864             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
   3865             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
   3866             0x10C00,  // 10C00..10E5F; OLD_TURKIC
   3867             0x10E60,  // 10E60..10FFF; ARABIC
   3868             0x11000,  // 11000..1107F; BRAHMI
   3869             0x11080,  // 11080..11FFF; KAITHI
   3870             0x12000,  // 12000..12FFF; CUNEIFORM
   3871             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
   3872             0x16800,  // 16800..16A38; BAMUM
   3873             0x1B000,  // 1B000..1B000; KATAKANA
   3874             0x1B001,  // 1B001..1CFFF; HIRAGANA
   3875             0x1D000,  // 1D000..1D166; COMMON
   3876             0x1D167,  // 1D167..1D169; INHERITED
   3877             0x1D16A,  // 1D16A..1D17A; COMMON
   3878             0x1D17B,  // 1D17B..1D182; INHERITED
   3879             0x1D183,  // 1D183..1D184; COMMON
   3880             0x1D185,  // 1D185..1D18B; INHERITED
   3881             0x1D18C,  // 1D18C..1D1A9; COMMON
   3882             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
   3883             0x1D1AE,  // 1D1AE..1D1FF; COMMON
   3884             0x1D200,  // 1D200..1D2FF; GREEK
   3885             0x1D300,  // 1D300..1F1FF; COMMON
   3886             0x1F200,  // 1F200..1F200; HIRAGANA
   3887             0x1F201,  // 1F210..1FFFF; COMMON
   3888             0x20000,  // 20000..E0000; HAN
   3889             0xE0001,  // E0001..E00FF; COMMON
   3890             0xE0100,  // E0100..E01EF; INHERITED
   3891             0xE01F0   // E01F0..10FFFF; UNKNOWN
   3892 
   3893         };
   3894 
   3895         private static final UnicodeScript[] scripts = {
   3896             COMMON,
   3897             LATIN,
   3898             COMMON,
   3899             LATIN,
   3900             COMMON,
   3901             LATIN,
   3902             COMMON,
   3903             LATIN,
   3904             COMMON,
   3905             LATIN,
   3906             COMMON,
   3907             LATIN,
   3908             COMMON,
   3909             LATIN,
   3910             COMMON,
   3911             LATIN,
   3912             COMMON,
   3913             BOPOMOFO,
   3914             COMMON,
   3915             INHERITED,
   3916             GREEK,
   3917             COMMON,
   3918             GREEK,
   3919             COMMON,
   3920             GREEK,
   3921             COMMON,
   3922             GREEK,
   3923             COMMON,
   3924             GREEK,
   3925             COPTIC,
   3926             GREEK,
   3927             CYRILLIC,
   3928             INHERITED,
   3929             CYRILLIC,
   3930             ARMENIAN,
   3931             COMMON,
   3932             ARMENIAN,
   3933             HEBREW,
   3934             ARABIC,
   3935             COMMON,
   3936             ARABIC,
   3937             COMMON,
   3938             ARABIC,
   3939             COMMON,
   3940             ARABIC,
   3941             COMMON,
   3942             ARABIC,
   3943             INHERITED,
   3944             ARABIC,
   3945             INHERITED,
   3946             COMMON,
   3947             ARABIC,
   3948             INHERITED,
   3949             ARABIC,
   3950             COMMON,
   3951             ARABIC,
   3952             SYRIAC,
   3953             ARABIC,
   3954             THAANA,
   3955             NKO,
   3956             SAMARITAN,
   3957             MANDAIC,
   3958             DEVANAGARI,
   3959             INHERITED,
   3960             DEVANAGARI,
   3961             COMMON,
   3962             DEVANAGARI,
   3963             COMMON,
   3964             DEVANAGARI,
   3965             BENGALI,
   3966             GURMUKHI,
   3967             GUJARATI,
   3968             ORIYA,
   3969             TAMIL,
   3970             TELUGU,
   3971             KANNADA,
   3972             MALAYALAM,
   3973             SINHALA,
   3974             THAI,
   3975             COMMON,
   3976             THAI,
   3977             LAO,
   3978             TIBETAN,
   3979             COMMON,
   3980             TIBETAN,
   3981             MYANMAR,
   3982             GEORGIAN,
   3983             COMMON,
   3984             GEORGIAN,
   3985             HANGUL,
   3986             ETHIOPIC,
   3987             CHEROKEE,
   3988             CANADIAN_ABORIGINAL,
   3989             OGHAM,
   3990             RUNIC,
   3991             COMMON,
   3992             RUNIC,
   3993             TAGALOG,
   3994             HANUNOO,
   3995             COMMON,
   3996             BUHID,
   3997             TAGBANWA,
   3998             KHMER,
   3999             MONGOLIAN,
   4000             COMMON,
   4001             MONGOLIAN,
   4002             COMMON,
   4003             MONGOLIAN,
   4004             CANADIAN_ABORIGINAL,
   4005             LIMBU,
   4006             TAI_LE,
   4007             NEW_TAI_LUE,
   4008             KHMER,
   4009             BUGINESE,
   4010             TAI_THAM,
   4011             BALINESE,
   4012             SUNDANESE,
   4013             BATAK,
   4014             LEPCHA,
   4015             OL_CHIKI,
   4016             INHERITED,
   4017             COMMON,
   4018             INHERITED,
   4019             COMMON,
   4020             INHERITED,
   4021             COMMON,
   4022             INHERITED,
   4023             COMMON,
   4024             LATIN,
   4025             GREEK,
   4026             CYRILLIC,
   4027             LATIN,
   4028             GREEK,
   4029             LATIN,
   4030             GREEK,
   4031             LATIN,
   4032             CYRILLIC,
   4033             LATIN,
   4034             GREEK,
   4035             INHERITED,
   4036             LATIN,
   4037             GREEK,
   4038             COMMON,
   4039             INHERITED,
   4040             COMMON,
   4041             LATIN,
   4042             COMMON,
   4043             LATIN,
   4044             COMMON,
   4045             LATIN,
   4046             COMMON,
   4047             INHERITED,
   4048             COMMON,
   4049             GREEK,
   4050             COMMON,
   4051             LATIN,
   4052             COMMON,
   4053             LATIN,
   4054             COMMON,
   4055             LATIN,
   4056             COMMON,
   4057             LATIN,
   4058             COMMON,
   4059             BRAILLE,
   4060             COMMON,
   4061             GLAGOLITIC,
   4062             LATIN,
   4063             COPTIC,
   4064             GEORGIAN,
   4065             TIFINAGH,
   4066             ETHIOPIC,
   4067             CYRILLIC,
   4068             COMMON,
   4069             HAN,
   4070             COMMON,
   4071             HAN,
   4072             COMMON,
   4073             HAN,
   4074             COMMON,
   4075             HAN,
   4076             INHERITED,
   4077             HANGUL,
   4078             COMMON,
   4079             HAN,
   4080             COMMON,
   4081             HIRAGANA,
   4082             INHERITED,
   4083             COMMON,
   4084             HIRAGANA,
   4085             COMMON,
   4086             KATAKANA,
   4087             COMMON,
   4088             KATAKANA,
   4089             BOPOMOFO,
   4090             HANGUL,
   4091             COMMON,
   4092             BOPOMOFO,
   4093             COMMON,
   4094             KATAKANA,
   4095             HANGUL,
   4096             COMMON,
   4097             HANGUL,
   4098             COMMON,
   4099             KATAKANA,
   4100             COMMON,
   4101             HAN,
   4102             COMMON,
   4103             HAN,
   4104             YI,
   4105             LISU,
   4106             VAI,
   4107             CYRILLIC,
   4108             BAMUM,
   4109             COMMON,
   4110             LATIN,
   4111             COMMON,
   4112             LATIN,
   4113             SYLOTI_NAGRI,
   4114             COMMON,
   4115             PHAGS_PA,
   4116             SAURASHTRA,
   4117             DEVANAGARI,
   4118             KAYAH_LI,
   4119             REJANG,
   4120             HANGUL,
   4121             JAVANESE,
   4122             CHAM,
   4123             MYANMAR,
   4124             TAI_VIET,
   4125             ETHIOPIC,
   4126             MEETEI_MAYEK,
   4127             HANGUL,
   4128             UNKNOWN,
   4129             HAN,
   4130             LATIN,
   4131             ARMENIAN,
   4132             HEBREW,
   4133             ARABIC,
   4134             COMMON,
   4135             ARABIC,
   4136             COMMON,
   4137             INHERITED,
   4138             COMMON,
   4139             INHERITED,
   4140             COMMON,
   4141             ARABIC,
   4142             COMMON,
   4143             LATIN,
   4144             COMMON,
   4145             LATIN,
   4146             COMMON,
   4147             KATAKANA,
   4148             COMMON,
   4149             KATAKANA,
   4150             COMMON,
   4151             HANGUL,
   4152             COMMON,
   4153             LINEAR_B,
   4154             COMMON,
   4155             GREEK,
   4156             COMMON,
   4157             INHERITED,
   4158             LYCIAN,
   4159             CARIAN,
   4160             OLD_ITALIC,
   4161             GOTHIC,
   4162             UGARITIC,
   4163             OLD_PERSIAN,
   4164             DESERET,
   4165             SHAVIAN,
   4166             OSMANYA,
   4167             CYPRIOT,
   4168             IMPERIAL_ARAMAIC,
   4169             PHOENICIAN,
   4170             LYDIAN,
   4171             KHAROSHTHI,
   4172             OLD_SOUTH_ARABIAN,
   4173             AVESTAN,
   4174             INSCRIPTIONAL_PARTHIAN,
   4175             INSCRIPTIONAL_PAHLAVI,
   4176             OLD_TURKIC,
   4177             ARABIC,
   4178             BRAHMI,
   4179             KAITHI,
   4180             CUNEIFORM,
   4181             EGYPTIAN_HIEROGLYPHS,
   4182             BAMUM,
   4183             KATAKANA,
   4184             HIRAGANA,
   4185             COMMON,
   4186             INHERITED,
   4187             COMMON,
   4188             INHERITED,
   4189             COMMON,
   4190             INHERITED,
   4191             COMMON,
   4192             INHERITED,
   4193             COMMON,
   4194             GREEK,
   4195             COMMON,
   4196             HIRAGANA,
   4197             COMMON,
   4198             HAN,
   4199             COMMON,
   4200             INHERITED,
   4201             UNKNOWN
   4202         };
   4203 
   4204         private static HashMap<String, Character.UnicodeScript> aliases;
   4205         static {
   4206             aliases = new HashMap<>(128);
   4207             aliases.put("ARAB", ARABIC);
   4208             aliases.put("ARMI", IMPERIAL_ARAMAIC);
   4209             aliases.put("ARMN", ARMENIAN);
   4210             aliases.put("AVST", AVESTAN);
   4211             aliases.put("BALI", BALINESE);
   4212             aliases.put("BAMU", BAMUM);
   4213             aliases.put("BATK", BATAK);
   4214             aliases.put("BENG", BENGALI);
   4215             aliases.put("BOPO", BOPOMOFO);
   4216             aliases.put("BRAI", BRAILLE);
   4217             aliases.put("BRAH", BRAHMI);
   4218             aliases.put("BUGI", BUGINESE);
   4219             aliases.put("BUHD", BUHID);
   4220             aliases.put("CANS", CANADIAN_ABORIGINAL);
   4221             aliases.put("CARI", CARIAN);
   4222             aliases.put("CHAM", CHAM);
   4223             aliases.put("CHER", CHEROKEE);
   4224             aliases.put("COPT", COPTIC);
   4225             aliases.put("CPRT", CYPRIOT);
   4226             aliases.put("CYRL", CYRILLIC);
   4227             aliases.put("DEVA", DEVANAGARI);
   4228             aliases.put("DSRT", DESERET);
   4229             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
   4230             aliases.put("ETHI", ETHIOPIC);
   4231             aliases.put("GEOR", GEORGIAN);
   4232             aliases.put("GLAG", GLAGOLITIC);
   4233             aliases.put("GOTH", GOTHIC);
   4234             aliases.put("GREK", GREEK);
   4235             aliases.put("GUJR", GUJARATI);
   4236             aliases.put("GURU", GURMUKHI);
   4237             aliases.put("HANG", HANGUL);
   4238             aliases.put("HANI", HAN);
   4239             aliases.put("HANO", HANUNOO);
   4240             aliases.put("HEBR", HEBREW);
   4241             aliases.put("HIRA", HIRAGANA);
   4242             // it appears we don't have the KATAKANA_OR_HIRAGANA
   4243             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
   4244             aliases.put("ITAL", OLD_ITALIC);
   4245             aliases.put("JAVA", JAVANESE);
   4246             aliases.put("KALI", KAYAH_LI);
   4247             aliases.put("KANA", KATAKANA);
   4248             aliases.put("KHAR", KHAROSHTHI);
   4249             aliases.put("KHMR", KHMER);
   4250             aliases.put("KNDA", KANNADA);
   4251             aliases.put("KTHI", KAITHI);
   4252             aliases.put("LANA", TAI_THAM);
   4253             aliases.put("LAOO", LAO);
   4254             aliases.put("LATN", LATIN);
   4255             aliases.put("LEPC", LEPCHA);
   4256             aliases.put("LIMB", LIMBU);
   4257             aliases.put("LINB", LINEAR_B);
   4258             aliases.put("LISU", LISU);
   4259             aliases.put("LYCI", LYCIAN);
   4260             aliases.put("LYDI", LYDIAN);
   4261             aliases.put("MAND", MANDAIC);
   4262             aliases.put("MLYM", MALAYALAM);
   4263             aliases.put("MONG", MONGOLIAN);
   4264             aliases.put("MTEI", MEETEI_MAYEK);
   4265             aliases.put("MYMR", MYANMAR);
   4266             aliases.put("NKOO", NKO);
   4267             aliases.put("OGAM", OGHAM);
   4268             aliases.put("OLCK", OL_CHIKI);
   4269             aliases.put("ORKH", OLD_TURKIC);
   4270             aliases.put("ORYA", ORIYA);
   4271             aliases.put("OSMA", OSMANYA);
   4272             aliases.put("PHAG", PHAGS_PA);
   4273             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
   4274             aliases.put("PHNX", PHOENICIAN);
   4275             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
   4276             aliases.put("RJNG", REJANG);
   4277             aliases.put("RUNR", RUNIC);
   4278             aliases.put("SAMR", SAMARITAN);
   4279             aliases.put("SARB", OLD_SOUTH_ARABIAN);
   4280             aliases.put("SAUR", SAURASHTRA);
   4281             aliases.put("SHAW", SHAVIAN);
   4282             aliases.put("SINH", SINHALA);
   4283             aliases.put("SUND", SUNDANESE);
   4284             aliases.put("SYLO", SYLOTI_NAGRI);
   4285             aliases.put("SYRC", SYRIAC);
   4286             aliases.put("TAGB", TAGBANWA);
   4287             aliases.put("TALE", TAI_LE);
   4288             aliases.put("TALU", NEW_TAI_LUE);
   4289             aliases.put("TAML", TAMIL);
   4290             aliases.put("TAVT", TAI_VIET);
   4291             aliases.put("TELU", TELUGU);
   4292             aliases.put("TFNG", TIFINAGH);
   4293             aliases.put("TGLG", TAGALOG);
   4294             aliases.put("THAA", THAANA);
   4295             aliases.put("THAI", THAI);
   4296             aliases.put("TIBT", TIBETAN);
   4297             aliases.put("UGAR", UGARITIC);
   4298             aliases.put("VAII", VAI);
   4299             aliases.put("XPEO", OLD_PERSIAN);
   4300             aliases.put("XSUX", CUNEIFORM);
   4301             aliases.put("YIII", YI);
   4302             aliases.put("ZINH", INHERITED);
   4303             aliases.put("ZYYY", COMMON);
   4304             aliases.put("ZZZZ", UNKNOWN);
   4305         }
   4306 
   4307         /**
   4308          * Returns the enum constant representing the Unicode script of which
   4309          * the given character (Unicode code point) is assigned to.
   4310          *
   4311          * @param   codePoint the character (Unicode code point) in question.
   4312          * @return  The {@code UnicodeScript} constant representing the
   4313          *          Unicode script of which this character is assigned to.
   4314          *
   4315          * @exception IllegalArgumentException if the specified
   4316          * {@code codePoint} is an invalid Unicode code point.
   4317          * @see Character#isValidCodePoint(int)
   4318          *
   4319          */
   4320         public static UnicodeScript of(int codePoint) {
   4321             if (!isValidCodePoint(codePoint))
   4322                 throw new IllegalArgumentException();
   4323             int type = getType(codePoint);
   4324             // leave SURROGATE and PRIVATE_USE for table lookup
   4325             if (type == UNASSIGNED)
   4326                 return UNKNOWN;
   4327             int index = Arrays.binarySearch(scriptStarts, codePoint);
   4328             if (index < 0)
   4329                 index = -index - 2;
   4330             return scripts[index];
   4331         }
   4332 
   4333         /**
   4334          * Returns the UnicodeScript constant with the given Unicode script
   4335          * name or the script name alias. Script names and their aliases are
   4336          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
   4337          * and PropertyValueAliases&lt;version&gt;.txt define script names
   4338          * and the script name aliases for a particular version of the
   4339          * standard. The {@link Character} class specifies the version of
   4340          * the standard that it supports.
   4341          * <p>
   4342          * Character case is ignored for all of the valid script names.
   4343          * The en_US locale's case mapping rules are used to provide
   4344          * case-insensitive string comparisons for script name validation.
   4345          * <p>
   4346          *
   4347          * @param scriptName A {@code UnicodeScript} name.
   4348          * @return The {@code UnicodeScript} constant identified
   4349          *         by {@code scriptName}
   4350          * @throws IllegalArgumentException if {@code scriptName} is an
   4351          *         invalid name
   4352          * @throws NullPointerException if {@code scriptName} is null
   4353          */
   4354         public static final UnicodeScript forName(String scriptName) {
   4355             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
   4356                                  //.replace(' ', '_'));
   4357             UnicodeScript sc = aliases.get(scriptName);
   4358             if (sc != null)
   4359                 return sc;
   4360             return valueOf(scriptName);
   4361         }
   4362     }
   4363 
   4364     /**
   4365      * The value of the {@code Character}.
   4366      *
   4367      * @serial
   4368      */
   4369     private final char value;
   4370 
   4371     /** use serialVersionUID from JDK 1.0.2 for interoperability */
   4372     private static final long serialVersionUID = 3786198910865385080L;
   4373 
   4374     /**
   4375      * Constructs a newly allocated {@code Character} object that
   4376      * represents the specified {@code char} value.
   4377      *
   4378      * @param  value   the value to be represented by the
   4379      *                  {@code Character} object.
   4380      */
   4381     public Character(char value) {
   4382         this.value = value;
   4383     }
   4384 
   4385     private static class CharacterCache {
   4386         private CharacterCache(){}
   4387 
   4388         static final Character cache[] = new Character[127 + 1];
   4389 
   4390         static {
   4391             for (int i = 0; i < cache.length; i++)
   4392                 cache[i] = new Character((char)i);
   4393         }
   4394     }
   4395 
   4396     /**
   4397      * Returns a <tt>Character</tt> instance representing the specified
   4398      * <tt>char</tt> value.
   4399      * If a new <tt>Character</tt> instance is not required, this method
   4400      * should generally be used in preference to the constructor
   4401      * {@link #Character(char)}, as this method is likely to yield
   4402      * significantly better space and time performance by caching
   4403      * frequently requested values.
   4404      *
   4405      * This method will always cache values in the range {@code
   4406      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
   4407      * cache other values outside of this range.
   4408      *
   4409      * @param  c a char value.
   4410      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
   4411      * @since  1.5
   4412      */
   4413     public static Character valueOf(char c) {
   4414         if (c <= 127) { // must cache
   4415             return CharacterCache.cache[(int)c];
   4416         }
   4417         return new Character(c);
   4418     }
   4419 
   4420     /**
   4421      * Returns the value of this {@code Character} object.
   4422      * @return  the primitive {@code char} value represented by
   4423      *          this object.
   4424      */
   4425     public char charValue() {
   4426         return value;
   4427     }
   4428 
   4429     /**
   4430      * Returns a hash code for this {@code Character}; equal to the result
   4431      * of invoking {@code charValue()}.
   4432      *
   4433      * @return a hash code value for this {@code Character}
   4434      */
   4435     public int hashCode() {
   4436         return Character.hashCode(value);
   4437     }
   4438 
   4439     /**
   4440      * Returns a hash code for a {@code char} value; compatible with
   4441      * {@code Character.hashCode()}.
   4442      *
   4443      * @since 1.8
   4444      *
   4445      * @param value The {@code char} for which to return a hash code.
   4446      * @return a hash code value for a {@code char} value.
   4447      */
   4448     public static int hashCode(char value) {
   4449         return (int)value;
   4450     }
   4451 
   4452     /**
   4453      * Compares this object against the specified object.
   4454      * The result is {@code true} if and only if the argument is not
   4455      * {@code null} and is a {@code Character} object that
   4456      * represents the same {@code char} value as this object.
   4457      *
   4458      * @param   obj   the object to compare with.
   4459      * @return  {@code true} if the objects are the same;
   4460      *          {@code false} otherwise.
   4461      */
   4462     public boolean equals(Object obj) {
   4463         if (obj instanceof Character) {
   4464             return value == ((Character)obj).charValue();
   4465         }
   4466         return false;
   4467     }
   4468 
   4469     /**
   4470      * Returns a {@code String} object representing this
   4471      * {@code Character}'s value.  The result is a string of
   4472      * length 1 whose sole component is the primitive
   4473      * {@code char} value represented by this
   4474      * {@code Character} object.
   4475      *
   4476      * @return  a string representation of this object.
   4477      */
   4478     public String toString() {
   4479         char buf[] = {value};
   4480         return String.valueOf(buf);
   4481     }
   4482 
   4483     /**
   4484      * Returns a {@code String} object representing the
   4485      * specified {@code char}.  The result is a string of length
   4486      * 1 consisting solely of the specified {@code char}.
   4487      *
   4488      * @param c the {@code char} to be converted
   4489      * @return the string representation of the specified {@code char}
   4490      * @since 1.4
   4491      */
   4492     public static String toString(char c) {
   4493         return String.valueOf(c);
   4494     }
   4495 
   4496     /**
   4497      * Determines whether the specified code point is a valid
   4498      * <a href="http://www.unicode.org/glossary/#code_point">
   4499      * Unicode code point value</a>.
   4500      *
   4501      * @param  codePoint the Unicode code point to be tested
   4502      * @return {@code true} if the specified code point value is between
   4503      *         {@link #MIN_CODE_POINT} and
   4504      *         {@link #MAX_CODE_POINT} inclusive;
   4505      *         {@code false} otherwise.
   4506      * @since  1.5
   4507      */
   4508     public static boolean isValidCodePoint(int codePoint) {
   4509         // Optimized form of:
   4510         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
   4511         int plane = codePoint >>> 16;
   4512         return plane < ((MAX_CODE_POINT + 1) >>> 16);
   4513     }
   4514 
   4515     /**
   4516      * Determines whether the specified character (Unicode code point)
   4517      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
   4518      * Such code points can be represented using a single {@code char}.
   4519      *
   4520      * @param  codePoint the character (Unicode code point) to be tested
   4521      * @return {@code true} if the specified code point is between
   4522      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
   4523      *         {@code false} otherwise.
   4524      * @since  1.7
   4525      */
   4526     public static boolean isBmpCodePoint(int codePoint) {
   4527         return codePoint >>> 16 == 0;
   4528         // Optimized form of:
   4529         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
   4530         // We consistently use logical shift (>>>) to facilitate
   4531         // additional runtime optimizations.
   4532     }
   4533 
   4534     /**
   4535      * Determines whether the specified character (Unicode code point)
   4536      * is in the <a href="#supplementary">supplementary character</a> range.
   4537      *
   4538      * @param  codePoint the character (Unicode code point) to be tested
   4539      * @return {@code true} if the specified code point is between
   4540      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
   4541      *         {@link #MAX_CODE_POINT} inclusive;
   4542      *         {@code false} otherwise.
   4543      * @since  1.5
   4544      */
   4545     public static boolean isSupplementaryCodePoint(int codePoint) {
   4546         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
   4547             && codePoint <  MAX_CODE_POINT + 1;
   4548     }
   4549 
   4550     /**
   4551      * Determines if the given {@code char} value is a
   4552      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   4553      * Unicode high-surrogate code unit</a>
   4554      * (also known as <i>leading-surrogate code unit</i>).
   4555      *
   4556      * <p>Such values do not represent characters by themselves,
   4557      * but are used in the representation of
   4558      * <a href="#supplementary">supplementary characters</a>
   4559      * in the UTF-16 encoding.
   4560      *
   4561      * @param  ch the {@code char} value to be tested.
   4562      * @return {@code true} if the {@code char} value is between
   4563      *         {@link #MIN_HIGH_SURROGATE} and
   4564      *         {@link #MAX_HIGH_SURROGATE} inclusive;
   4565      *         {@code false} otherwise.
   4566      * @see    Character#isLowSurrogate(char)
   4567      * @see    Character.UnicodeBlock#of(int)
   4568      * @since  1.5
   4569      */
   4570     public static boolean isHighSurrogate(char ch) {
   4571         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
   4572         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
   4573     }
   4574 
   4575     /**
   4576      * Determines if the given {@code char} value is a
   4577      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   4578      * Unicode low-surrogate code unit</a>
   4579      * (also known as <i>trailing-surrogate code unit</i>).
   4580      *
   4581      * <p>Such values do not represent characters by themselves,
   4582      * but are used in the representation of
   4583      * <a href="#supplementary">supplementary characters</a>
   4584      * in the UTF-16 encoding.
   4585      *
   4586      * @param  ch the {@code char} value to be tested.
   4587      * @return {@code true} if the {@code char} value is between
   4588      *         {@link #MIN_LOW_SURROGATE} and
   4589      *         {@link #MAX_LOW_SURROGATE} inclusive;
   4590      *         {@code false} otherwise.
   4591      * @see    Character#isHighSurrogate(char)
   4592      * @since  1.5
   4593      */
   4594     public static boolean isLowSurrogate(char ch) {
   4595         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
   4596     }
   4597 
   4598     /**
   4599      * Determines if the given {@code char} value is a Unicode
   4600      * <i>surrogate code unit</i>.
   4601      *
   4602      * <p>Such values do not represent characters by themselves,
   4603      * but are used in the representation of
   4604      * <a href="#supplementary">supplementary characters</a>
   4605      * in the UTF-16 encoding.
   4606      *
   4607      * <p>A char value is a surrogate code unit if and only if it is either
   4608      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
   4609      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
   4610      *
   4611      * @param  ch the {@code char} value to be tested.
   4612      * @return {@code true} if the {@code char} value is between
   4613      *         {@link #MIN_SURROGATE} and
   4614      *         {@link #MAX_SURROGATE} inclusive;
   4615      *         {@code false} otherwise.
   4616      * @since  1.7
   4617      */
   4618     public static boolean isSurrogate(char ch) {
   4619         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
   4620     }
   4621 
   4622     /**
   4623      * Determines whether the specified pair of {@code char}
   4624      * values is a valid
   4625      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   4626      * Unicode surrogate pair</a>.
   4627 
   4628      * <p>This method is equivalent to the expression:
   4629      * <blockquote><pre>
   4630      * isHighSurrogate(high) && isLowSurrogate(low)
   4631      * </pre></blockquote>
   4632      *
   4633      * @param  high the high-surrogate code value to be tested
   4634      * @param  low the low-surrogate code value to be tested
   4635      * @return {@code true} if the specified high and
   4636      * low-surrogate code values represent a valid surrogate pair;
   4637      * {@code false} otherwise.
   4638      * @since  1.5
   4639      */
   4640     public static boolean isSurrogatePair(char high, char low) {
   4641         return isHighSurrogate(high) && isLowSurrogate(low);
   4642     }
   4643 
   4644     /**
   4645      * Determines the number of {@code char} values needed to
   4646      * represent the specified character (Unicode code point). If the
   4647      * specified character is equal to or greater than 0x10000, then
   4648      * the method returns 2. Otherwise, the method returns 1.
   4649      *
   4650      * <p>This method doesn't validate the specified character to be a
   4651      * valid Unicode code point. The caller must validate the
   4652      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
   4653      * if necessary.
   4654      *
   4655      * @param   codePoint the character (Unicode code point) to be tested.
   4656      * @return  2 if the character is a valid supplementary character; 1 otherwise.
   4657      * @see     Character#isSupplementaryCodePoint(int)
   4658      * @since   1.5
   4659      */
   4660     public static int charCount(int codePoint) {
   4661         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
   4662     }
   4663 
   4664     /**
   4665      * Converts the specified surrogate pair to its supplementary code
   4666      * point value. This method does not validate the specified
   4667      * surrogate pair. The caller must validate it using {@link
   4668      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
   4669      *
   4670      * @param  high the high-surrogate code unit
   4671      * @param  low the low-surrogate code unit
   4672      * @return the supplementary code point composed from the
   4673      *         specified surrogate pair.
   4674      * @since  1.5
   4675      */
   4676     public static int toCodePoint(char high, char low) {
   4677         // Optimized form of:
   4678         // return ((high - MIN_HIGH_SURROGATE) << 10)
   4679         //         + (low - MIN_LOW_SURROGATE)
   4680         //         + MIN_SUPPLEMENTARY_CODE_POINT;
   4681         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
   4682                                        - (MIN_HIGH_SURROGATE << 10)
   4683                                        - MIN_LOW_SURROGATE);
   4684     }
   4685 
   4686     /**
   4687      * Returns the code point at the given index of the
   4688      * {@code CharSequence}. If the {@code char} value at
   4689      * the given index in the {@code CharSequence} is in the
   4690      * high-surrogate range, the following index is less than the
   4691      * length of the {@code CharSequence}, and the
   4692      * {@code char} value at the following index is in the
   4693      * low-surrogate range, then the supplementary code point
   4694      * corresponding to this surrogate pair is returned. Otherwise,
   4695      * the {@code char} value at the given index is returned.
   4696      *
   4697      * @param seq a sequence of {@code char} values (Unicode code
   4698      * units)
   4699      * @param index the index to the {@code char} values (Unicode
   4700      * code units) in {@code seq} to be converted
   4701      * @return the Unicode code point at the given index
   4702      * @exception NullPointerException if {@code seq} is null.
   4703      * @exception IndexOutOfBoundsException if the value
   4704      * {@code index} is negative or not less than
   4705      * {@link CharSequence#length() seq.length()}.
   4706      * @since  1.5
   4707      */
   4708     public static int codePointAt(CharSequence seq, int index) {
   4709         char c1 = seq.charAt(index++);
   4710         if (isHighSurrogate(c1)) {
   4711             if (index < seq.length()) {
   4712                 char c2 = seq.charAt(index);
   4713                 if (isLowSurrogate(c2)) {
   4714                     return toCodePoint(c1, c2);
   4715                 }
   4716             }
   4717         }
   4718         return c1;
   4719     }
   4720 
   4721     /**
   4722      * Returns the code point at the given index of the
   4723      * {@code char} array. If the {@code char} value at
   4724      * the given index in the {@code char} array is in the
   4725      * high-surrogate range, the following index is less than the
   4726      * length of the {@code char} array, and the
   4727      * {@code char} value at the following index is in the
   4728      * low-surrogate range, then the supplementary code point
   4729      * corresponding to this surrogate pair is returned. Otherwise,
   4730      * the {@code char} value at the given index is returned.
   4731      *
   4732      * @param a the {@code char} array
   4733      * @param index the index to the {@code char} values (Unicode
   4734      * code units) in the {@code char} array to be converted
   4735      * @return the Unicode code point at the given index
   4736      * @exception NullPointerException if {@code a} is null.
   4737      * @exception IndexOutOfBoundsException if the value
   4738      * {@code index} is negative or not less than
   4739      * the length of the {@code char} array.
   4740      * @since  1.5
   4741      */
   4742     public static int codePointAt(char[] a, int index) {
   4743         return codePointAtImpl(a, index, a.length);
   4744     }
   4745 
   4746     /**
   4747      * Returns the code point at the given index of the
   4748      * {@code char} array, where only array elements with
   4749      * {@code index} less than {@code limit} can be used. If
   4750      * the {@code char} value at the given index in the
   4751      * {@code char} array is in the high-surrogate range, the
   4752      * following index is less than the {@code limit}, and the
   4753      * {@code char} value at the following index is in the
   4754      * low-surrogate range, then the supplementary code point
   4755      * corresponding to this surrogate pair is returned. Otherwise,
   4756      * the {@code char} value at the given index is returned.
   4757      *
   4758      * @param a the {@code char} array
   4759      * @param index the index to the {@code char} values (Unicode
   4760      * code units) in the {@code char} array to be converted
   4761      * @param limit the index after the last array element that
   4762      * can be used in the {@code char} array
   4763      * @return the Unicode code point at the given index
   4764      * @exception NullPointerException if {@code a} is null.
   4765      * @exception IndexOutOfBoundsException if the {@code index}
   4766      * argument is negative or not less than the {@code limit}
   4767      * argument, or if the {@code limit} argument is negative or
   4768      * greater than the length of the {@code char} array.
   4769      * @since  1.5
   4770      */
   4771     public static int codePointAt(char[] a, int index, int limit) {
   4772         if (index >= limit || limit < 0 || limit > a.length) {
   4773             throw new IndexOutOfBoundsException();
   4774         }
   4775         return codePointAtImpl(a, index, limit);
   4776     }
   4777 
   4778     // throws ArrayIndexOutofBoundsException if index out of bounds
   4779     static int codePointAtImpl(char[] a, int index, int limit) {
   4780         char c1 = a[index++];
   4781         if (isHighSurrogate(c1)) {
   4782             if (index < limit) {
   4783                 char c2 = a[index];
   4784                 if (isLowSurrogate(c2)) {
   4785                     return toCodePoint(c1, c2);
   4786                 }
   4787             }
   4788         }
   4789         return c1;
   4790     }
   4791 
   4792     /**
   4793      * Returns the code point preceding the given index of the
   4794      * {@code CharSequence}. If the {@code char} value at
   4795      * {@code (index - 1)} in the {@code CharSequence} is in
   4796      * the low-surrogate range, {@code (index - 2)} is not
   4797      * negative, and the {@code char} value at {@code (index - 2)}
   4798      * in the {@code CharSequence} is in the
   4799      * high-surrogate range, then the supplementary code point
   4800      * corresponding to this surrogate pair is returned. Otherwise,
   4801      * the {@code char} value at {@code (index - 1)} is
   4802      * returned.
   4803      *
   4804      * @param seq the {@code CharSequence} instance
   4805      * @param index the index following the code point that should be returned
   4806      * @return the Unicode code point value before the given index.
   4807      * @exception NullPointerException if {@code seq} is null.
   4808      * @exception IndexOutOfBoundsException if the {@code index}
   4809      * argument is less than 1 or greater than {@link
   4810      * CharSequence#length() seq.length()}.
   4811      * @since  1.5
   4812      */
   4813     public static int codePointBefore(CharSequence seq, int index) {
   4814         char c2 = seq.charAt(--index);
   4815         if (isLowSurrogate(c2)) {
   4816             if (index > 0) {
   4817                 char c1 = seq.charAt(--index);
   4818                 if (isHighSurrogate(c1)) {
   4819                     return toCodePoint(c1, c2);
   4820                 }
   4821             }
   4822         }
   4823         return c2;
   4824     }
   4825 
   4826     /**
   4827      * Returns the code point preceding the given index of the
   4828      * {@code char} array. If the {@code char} value at
   4829      * {@code (index - 1)} in the {@code char} array is in
   4830      * the low-surrogate range, {@code (index - 2)} is not
   4831      * negative, and the {@code char} value at {@code (index - 2)}
   4832      * in the {@code char} array is in the
   4833      * high-surrogate range, then the supplementary code point
   4834      * corresponding to this surrogate pair is returned. Otherwise,
   4835      * the {@code char} value at {@code (index - 1)} is
   4836      * returned.
   4837      *
   4838      * @param a the {@code char} array
   4839      * @param index the index following the code point that should be returned
   4840      * @return the Unicode code point value before the given index.
   4841      * @exception NullPointerException if {@code a} is null.
   4842      * @exception IndexOutOfBoundsException if the {@code index}
   4843      * argument is less than 1 or greater than the length of the
   4844      * {@code char} array
   4845      * @since  1.5
   4846      */
   4847     public static int codePointBefore(char[] a, int index) {
   4848         return codePointBeforeImpl(a, index, 0);
   4849     }
   4850 
   4851     /**
   4852      * Returns the code point preceding the given index of the
   4853      * {@code char} array, where only array elements with
   4854      * {@code index} greater than or equal to {@code start}
   4855      * can be used. If the {@code char} value at {@code (index - 1)}
   4856      * in the {@code char} array is in the
   4857      * low-surrogate range, {@code (index - 2)} is not less than
   4858      * {@code start}, and the {@code char} value at
   4859      * {@code (index - 2)} in the {@code char} array is in
   4860      * the high-surrogate range, then the supplementary code point
   4861      * corresponding to this surrogate pair is returned. Otherwise,
   4862      * the {@code char} value at {@code (index - 1)} is
   4863      * returned.
   4864      *
   4865      * @param a the {@code char} array
   4866      * @param index the index following the code point that should be returned
   4867      * @param start the index of the first array element in the
   4868      * {@code char} array
   4869      * @return the Unicode code point value before the given index.
   4870      * @exception NullPointerException if {@code a} is null.
   4871      * @exception IndexOutOfBoundsException if the {@code index}
   4872      * argument is not greater than the {@code start} argument or
   4873      * is greater than the length of the {@code char} array, or
   4874      * if the {@code start} argument is negative or not less than
   4875      * the length of the {@code char} array.
   4876      * @since  1.5
   4877      */
   4878     public static int codePointBefore(char[] a, int index, int start) {
   4879         if (index <= start || start < 0 || start >= a.length) {
   4880             throw new IndexOutOfBoundsException();
   4881         }
   4882         return codePointBeforeImpl(a, index, start);
   4883     }
   4884 
   4885     // throws ArrayIndexOutofBoundsException if index-1 out of bounds
   4886     static int codePointBeforeImpl(char[] a, int index, int start) {
   4887         char c2 = a[--index];
   4888         if (isLowSurrogate(c2)) {
   4889             if (index > start) {
   4890                 char c1 = a[--index];
   4891                 if (isHighSurrogate(c1)) {
   4892                     return toCodePoint(c1, c2);
   4893                 }
   4894             }
   4895         }
   4896         return c2;
   4897     }
   4898 
   4899     /**
   4900      * Returns the leading surrogate (a
   4901      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   4902      * high surrogate code unit</a>) of the
   4903      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   4904      * surrogate pair</a>
   4905      * representing the specified supplementary character (Unicode
   4906      * code point) in the UTF-16 encoding.  If the specified character
   4907      * is not a
   4908      * <a href="Character.html#supplementary">supplementary character</a>,
   4909      * an unspecified {@code char} is returned.
   4910      *
   4911      * <p>If
   4912      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
   4913      * is {@code true}, then
   4914      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
   4915      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
   4916      * are also always {@code true}.
   4917      *
   4918      * @param   codePoint a supplementary character (Unicode code point)
   4919      * @return  the leading surrogate code unit used to represent the
   4920      *          character in the UTF-16 encoding
   4921      * @since   1.7
   4922      */
   4923     public static char highSurrogate(int codePoint) {
   4924         return (char) ((codePoint >>> 10)
   4925             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
   4926     }
   4927 
   4928     /**
   4929      * Returns the trailing surrogate (a
   4930      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   4931      * low surrogate code unit</a>) of the
   4932      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   4933      * surrogate pair</a>
   4934      * representing the specified supplementary character (Unicode
   4935      * code point) in the UTF-16 encoding.  If the specified character
   4936      * is not a
   4937      * <a href="Character.html#supplementary">supplementary character</a>,
   4938      * an unspecified {@code char} is returned.
   4939      *
   4940      * <p>If
   4941      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
   4942      * is {@code true}, then
   4943      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
   4944      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
   4945      * are also always {@code true}.
   4946      *
   4947      * @param   codePoint a supplementary character (Unicode code point)
   4948      * @return  the trailing surrogate code unit used to represent the
   4949      *          character in the UTF-16 encoding
   4950      * @since   1.7
   4951      */
   4952     public static char lowSurrogate(int codePoint) {
   4953         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
   4954     }
   4955 
   4956     /**
   4957      * Converts the specified character (Unicode code point) to its
   4958      * UTF-16 representation. If the specified code point is a BMP
   4959      * (Basic Multilingual Plane or Plane 0) value, the same value is
   4960      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
   4961      * specified code point is a supplementary character, its
   4962      * surrogate values are stored in {@code dst[dstIndex]}
   4963      * (high-surrogate) and {@code dst[dstIndex+1]}
   4964      * (low-surrogate), and 2 is returned.
   4965      *
   4966      * @param  codePoint the character (Unicode code point) to be converted.
   4967      * @param  dst an array of {@code char} in which the
   4968      * {@code codePoint}'s UTF-16 value is stored.
   4969      * @param dstIndex the start index into the {@code dst}
   4970      * array where the converted value is stored.
   4971      * @return 1 if the code point is a BMP code point, 2 if the
   4972      * code point is a supplementary code point.
   4973      * @exception IllegalArgumentException if the specified
   4974      * {@code codePoint} is not a valid Unicode code point.
   4975      * @exception NullPointerException if the specified {@code dst} is null.
   4976      * @exception IndexOutOfBoundsException if {@code dstIndex}
   4977      * is negative or not less than {@code dst.length}, or if
   4978      * {@code dst} at {@code dstIndex} doesn't have enough
   4979      * array element(s) to store the resulting {@code char}
   4980      * value(s). (If {@code dstIndex} is equal to
   4981      * {@code dst.length-1} and the specified
   4982      * {@code codePoint} is a supplementary character, the
   4983      * high-surrogate value is not stored in
   4984      * {@code dst[dstIndex]}.)
   4985      * @since  1.5
   4986      */
   4987     public static int toChars(int codePoint, char[] dst, int dstIndex) {
   4988         if (isBmpCodePoint(codePoint)) {
   4989             dst[dstIndex] = (char) codePoint;
   4990             return 1;
   4991         } else if (isValidCodePoint(codePoint)) {
   4992             toSurrogates(codePoint, dst, dstIndex);
   4993             return 2;
   4994         } else {
   4995             throw new IllegalArgumentException();
   4996         }
   4997     }
   4998 
   4999     /**
   5000      * Converts the specified character (Unicode code point) to its
   5001      * UTF-16 representation stored in a {@code char} array. If
   5002      * the specified code point is a BMP (Basic Multilingual Plane or
   5003      * Plane 0) value, the resulting {@code char} array has
   5004      * the same value as {@code codePoint}. If the specified code
   5005      * point is a supplementary code point, the resulting
   5006      * {@code char} array has the corresponding surrogate pair.
   5007      *
   5008      * @param  codePoint a Unicode code point
   5009      * @return a {@code char} array having
   5010      *         {@code codePoint}'s UTF-16 representation.
   5011      * @exception IllegalArgumentException if the specified
   5012      * {@code codePoint} is not a valid Unicode code point.
   5013      * @since  1.5
   5014      */
   5015     public static char[] toChars(int codePoint) {
   5016         if (isBmpCodePoint(codePoint)) {
   5017             return new char[] { (char) codePoint };
   5018         } else if (isValidCodePoint(codePoint)) {
   5019             char[] result = new char[2];
   5020             toSurrogates(codePoint, result, 0);
   5021             return result;
   5022         } else {
   5023             throw new IllegalArgumentException();
   5024         }
   5025     }
   5026 
   5027     static void toSurrogates(int codePoint, char[] dst, int index) {
   5028         // We write elements "backwards" to guarantee all-or-nothing
   5029         dst[index+1] = lowSurrogate(codePoint);
   5030         dst[index] = highSurrogate(codePoint);
   5031     }
   5032 
   5033     /**
   5034      * Returns the number of Unicode code points in the text range of
   5035      * the specified char sequence. The text range begins at the
   5036      * specified {@code beginIndex} and extends to the
   5037      * {@code char} at index {@code endIndex - 1}. Thus the
   5038      * length (in {@code char}s) of the text range is
   5039      * {@code endIndex-beginIndex}. Unpaired surrogates within
   5040      * the text range count as one code point each.
   5041      *
   5042      * @param seq the char sequence
   5043      * @param beginIndex the index to the first {@code char} of
   5044      * the text range.
   5045      * @param endIndex the index after the last {@code char} of
   5046      * the text range.
   5047      * @return the number of Unicode code points in the specified text
   5048      * range
   5049      * @exception NullPointerException if {@code seq} is null.
   5050      * @exception IndexOutOfBoundsException if the
   5051      * {@code beginIndex} is negative, or {@code endIndex}
   5052      * is larger than the length of the given sequence, or
   5053      * {@code beginIndex} is larger than {@code endIndex}.
   5054      * @since  1.5
   5055      */
   5056     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
   5057         int length = seq.length();
   5058         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
   5059             throw new IndexOutOfBoundsException();
   5060         }
   5061         int n = endIndex - beginIndex;
   5062         for (int i = beginIndex; i < endIndex; ) {
   5063             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
   5064                 isLowSurrogate(seq.charAt(i))) {
   5065                 n--;
   5066                 i++;
   5067             }
   5068         }
   5069         return n;
   5070     }
   5071 
   5072     /**
   5073      * Returns the number of Unicode code points in a subarray of the
   5074      * {@code char} array argument. The {@code offset}
   5075      * argument is the index of the first {@code char} of the
   5076      * subarray and the {@code count} argument specifies the
   5077      * length of the subarray in {@code char}s. Unpaired
   5078      * surrogates within the subarray count as one code point each.
   5079      *
   5080      * @param a the {@code char} array
   5081      * @param offset the index of the first {@code char} in the
   5082      * given {@code char} array
   5083      * @param count the length of the subarray in {@code char}s
   5084      * @return the number of Unicode code points in the specified subarray
   5085      * @exception NullPointerException if {@code a} is null.
   5086      * @exception IndexOutOfBoundsException if {@code offset} or
   5087      * {@code count} is negative, or if {@code offset +
   5088      * count} is larger than the length of the given array.
   5089      * @since  1.5
   5090      */
   5091     public static int codePointCount(char[] a, int offset, int count) {
   5092         if (count > a.length - offset || offset < 0 || count < 0) {
   5093             throw new IndexOutOfBoundsException();
   5094         }
   5095         return codePointCountImpl(a, offset, count);
   5096     }
   5097 
   5098     static int codePointCountImpl(char[] a, int offset, int count) {
   5099         int endIndex = offset + count;
   5100         int n = count;
   5101         for (int i = offset; i < endIndex; ) {
   5102             if (isHighSurrogate(a[i++]) && i < endIndex &&
   5103                 isLowSurrogate(a[i])) {
   5104                 n--;
   5105                 i++;
   5106             }
   5107         }
   5108         return n;
   5109     }
   5110 
   5111     /**
   5112      * Returns the index within the given char sequence that is offset
   5113      * from the given {@code index} by {@code codePointOffset}
   5114      * code points. Unpaired surrogates within the text range given by
   5115      * {@code index} and {@code codePointOffset} count as
   5116      * one code point each.
   5117      *
   5118      * @param seq the char sequence
   5119      * @param index the index to be offset
   5120      * @param codePointOffset the offset in code points
   5121      * @return the index within the char sequence
   5122      * @exception NullPointerException if {@code seq} is null.
   5123      * @exception IndexOutOfBoundsException if {@code index}
   5124      *   is negative or larger then the length of the char sequence,
   5125      *   or if {@code codePointOffset} is positive and the
   5126      *   subsequence starting with {@code index} has fewer than
   5127      *   {@code codePointOffset} code points, or if
   5128      *   {@code codePointOffset} is negative and the subsequence
   5129      *   before {@code index} has fewer than the absolute value
   5130      *   of {@code codePointOffset} code points.
   5131      * @since 1.5
   5132      */
   5133     public static int offsetByCodePoints(CharSequence seq, int index,
   5134                                          int codePointOffset) {
   5135         int length = seq.length();
   5136         if (index < 0 || index > length) {
   5137             throw new IndexOutOfBoundsException();
   5138         }
   5139 
   5140         int x = index;
   5141         if (codePointOffset >= 0) {
   5142             int i;
   5143             for (i = 0; x < length && i < codePointOffset; i++) {
   5144                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
   5145                     isLowSurrogate(seq.charAt(x))) {
   5146                     x++;
   5147                 }
   5148             }
   5149             if (i < codePointOffset) {
   5150                 throw new IndexOutOfBoundsException();
   5151             }
   5152         } else {
   5153             int i;
   5154             for (i = codePointOffset; x > 0 && i < 0; i++) {
   5155                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
   5156                     isHighSurrogate(seq.charAt(x-1))) {
   5157                     x--;
   5158                 }
   5159             }
   5160             if (i < 0) {
   5161                 throw new IndexOutOfBoundsException();
   5162             }
   5163         }
   5164         return x;
   5165     }
   5166 
   5167     /**
   5168      * Returns the index within the given {@code char} subarray
   5169      * that is offset from the given {@code index} by
   5170      * {@code codePointOffset} code points. The
   5171      * {@code start} and {@code count} arguments specify a
   5172      * subarray of the {@code char} array. Unpaired surrogates
   5173      * within the text range given by {@code index} and
   5174      * {@code codePointOffset} count as one code point each.
   5175      *
   5176      * @param a the {@code char} array
   5177      * @param start the index of the first {@code char} of the
   5178      * subarray
   5179      * @param count the length of the subarray in {@code char}s
   5180      * @param index the index to be offset
   5181      * @param codePointOffset the offset in code points
   5182      * @return the index within the subarray
   5183      * @exception NullPointerException if {@code a} is null.
   5184      * @exception IndexOutOfBoundsException
   5185      *   if {@code start} or {@code count} is negative,
   5186      *   or if {@code start + count} is larger than the length of
   5187      *   the given array,
   5188      *   or if {@code index} is less than {@code start} or
   5189      *   larger then {@code start + count},
   5190      *   or if {@code codePointOffset} is positive and the text range
   5191      *   starting with {@code index} and ending with {@code start + count - 1}
   5192      *   has fewer than {@code codePointOffset} code
   5193      *   points,
   5194      *   or if {@code codePointOffset} is negative and the text range
   5195      *   starting with {@code start} and ending with {@code index - 1}
   5196      *   has fewer than the absolute value of
   5197      *   {@code codePointOffset} code points.
   5198      * @since 1.5
   5199      */
   5200     public static int offsetByCodePoints(char[] a, int start, int count,
   5201                                          int index, int codePointOffset) {
   5202         if (count > a.length-start || start < 0 || count < 0
   5203             || index < start || index > start+count) {
   5204             throw new IndexOutOfBoundsException();
   5205         }
   5206         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
   5207     }
   5208 
   5209     static int offsetByCodePointsImpl(char[]a, int start, int count,
   5210                                       int index, int codePointOffset) {
   5211         int x = index;
   5212         if (codePointOffset >= 0) {
   5213             int limit = start + count;
   5214             int i;
   5215             for (i = 0; x < limit && i < codePointOffset; i++) {
   5216                 if (isHighSurrogate(a[x++]) && x < limit &&
   5217                     isLowSurrogate(a[x])) {
   5218                     x++;
   5219                 }
   5220             }
   5221             if (i < codePointOffset) {
   5222                 throw new IndexOutOfBoundsException();
   5223             }
   5224         } else {
   5225             int i;
   5226             for (i = codePointOffset; x > start && i < 0; i++) {
   5227                 if (isLowSurrogate(a[--x]) && x > start &&
   5228                     isHighSurrogate(a[x-1])) {
   5229                     x--;
   5230                 }
   5231             }
   5232             if (i < 0) {
   5233                 throw new IndexOutOfBoundsException();
   5234             }
   5235         }
   5236         return x;
   5237     }
   5238 
   5239     /**
   5240      * Determines if the specified character is a lowercase character.
   5241      * <p>
   5242      * A character is lowercase if its general category type, provided
   5243      * by {@code Character.getType(ch)}, is
   5244      * {@code LOWERCASE_LETTER}, or it has contributory property
   5245      * Other_Lowercase as defined by the Unicode Standard.
   5246      * <p>
   5247      * The following are examples of lowercase characters:
   5248      * <p><blockquote><pre>
   5249      * a b c d e f g h i j k l m n o p q r s t u v w x y z
   5250      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
   5251      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
   5252      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
   5253      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
   5254      * </pre></blockquote>
   5255      * <p> Many other Unicode characters are lowercase too.
   5256      *
   5257      * <p><b>Note:</b> This method cannot handle <a
   5258      * href="#supplementary"> supplementary characters</a>. To support
   5259      * all Unicode characters, including supplementary characters, use
   5260      * the {@link #isLowerCase(int)} method.
   5261      *
   5262      * @param   ch   the character to be tested.
   5263      * @return  {@code true} if the character is lowercase;
   5264      *          {@code false} otherwise.
   5265      * @see     Character#isLowerCase(char)
   5266      * @see     Character#isTitleCase(char)
   5267      * @see     Character#toLowerCase(char)
   5268      * @see     Character#getType(char)
   5269      */
   5270     public static boolean isLowerCase(char ch) {
   5271         return isLowerCase((int)ch);
   5272     }
   5273 
   5274     /**
   5275      * Determines if the specified character (Unicode code point) is a
   5276      * lowercase character.
   5277      * <p>
   5278      * A character is lowercase if its general category type, provided
   5279      * by {@link Character#getType getType(codePoint)}, is
   5280      * {@code LOWERCASE_LETTER}, or it has contributory property
   5281      * Other_Lowercase as defined by the Unicode Standard.
   5282      * <p>
   5283      * The following are examples of lowercase characters:
   5284      * <p><blockquote><pre>
   5285      * a b c d e f g h i j k l m n o p q r s t u v w x y z
   5286      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
   5287      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
   5288      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
   5289      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
   5290      * </pre></blockquote>
   5291      * <p> Many other Unicode characters are lowercase too.
   5292      *
   5293      * @param   codePoint the character (Unicode code point) to be tested.
   5294      * @return  {@code true} if the character is lowercase;
   5295      *          {@code false} otherwise.
   5296      * @see     Character#isLowerCase(int)
   5297      * @see     Character#isTitleCase(int)
   5298      * @see     Character#toLowerCase(int)
   5299      * @see     Character#getType(int)
   5300      * @since   1.5
   5301      */
   5302     public static boolean isLowerCase(int codePoint) {
   5303         return isLowerCaseImpl(codePoint);
   5304     }
   5305 
   5306     static native boolean isLowerCaseImpl(int codePoint);
   5307 
   5308     /**
   5309      * Determines if the specified character is an uppercase character.
   5310      * <p>
   5311      * A character is uppercase if its general category type, provided by
   5312      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
   5313      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
   5314      * <p>
   5315      * The following are examples of uppercase characters:
   5316      * <p><blockquote><pre>
   5317      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
   5318      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
   5319      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
   5320      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
   5321      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
   5322      * </pre></blockquote>
   5323      * <p> Many other Unicode characters are uppercase too.<p>
   5324      *
   5325      * <p><b>Note:</b> This method cannot handle <a
   5326      * href="#supplementary"> supplementary characters</a>. To support
   5327      * all Unicode characters, including supplementary characters, use
   5328      * the {@link #isUpperCase(int)} method.
   5329      *
   5330      * @param   ch   the character to be tested.
   5331      * @return  {@code true} if the character is uppercase;
   5332      *          {@code false} otherwise.
   5333      * @see     Character#isLowerCase(char)
   5334      * @see     Character#isTitleCase(char)
   5335      * @see     Character#toUpperCase(char)
   5336      * @see     Character#getType(char)
   5337      * @since   1.0
   5338      */
   5339     public static boolean isUpperCase(char ch) {
   5340         return isUpperCase((int)ch);
   5341     }
   5342 
   5343     /**
   5344      * Determines if the specified character (Unicode code point) is an uppercase character.
   5345      * <p>
   5346      * A character is uppercase if its general category type, provided by
   5347      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
   5348      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
   5349      * <p>
   5350      * The following are examples of uppercase characters:
   5351      * <p><blockquote><pre>
   5352      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
   5353      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
   5354      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
   5355      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
   5356      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
   5357      * </pre></blockquote>
   5358      * <p> Many other Unicode characters are uppercase too.<p>
   5359      *
   5360      * @param   codePoint the character (Unicode code point) to be tested.
   5361      * @return  {@code true} if the character is uppercase;
   5362      *          {@code false} otherwise.
   5363      * @see     Character#isLowerCase(int)
   5364      * @see     Character#isTitleCase(int)
   5365      * @see     Character#toUpperCase(int)
   5366      * @see     Character#getType(int)
   5367      * @since   1.5
   5368      */
   5369     public static boolean isUpperCase(int codePoint) {
   5370         return isUpperCaseImpl(codePoint);
   5371     }
   5372 
   5373     static native boolean isUpperCaseImpl(int codePoint);
   5374 
   5375 
   5376     /**
   5377      * Determines if the specified character is a titlecase character.
   5378      * <p>
   5379      * A character is a titlecase character if its general
   5380      * category type, provided by {@code Character.getType(ch)},
   5381      * is {@code TITLECASE_LETTER}.
   5382      * <p>
   5383      * Some characters look like pairs of Latin letters. For example, there
   5384      * is an uppercase letter that looks like "LJ" and has a corresponding
   5385      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
   5386      * is the appropriate form to use when rendering a word in lowercase
   5387      * with initial capitals, as for a book title.
   5388      * <p>
   5389      * These are some of the Unicode characters for which this method returns
   5390      * {@code true}:
   5391      * <ul>
   5392      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
   5393      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
   5394      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
   5395      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
   5396      * </ul>
   5397      * <p> Many other Unicode characters are titlecase too.<p>
   5398      *
   5399      * <p><b>Note:</b> This method cannot handle <a
   5400      * href="#supplementary"> supplementary characters</a>. To support
   5401      * all Unicode characters, including supplementary characters, use
   5402      * the {@link #isTitleCase(int)} method.
   5403      *
   5404      * @param   ch   the character to be tested.
   5405      * @return  {@code true} if the character is titlecase;
   5406      *          {@code false} otherwise.
   5407      * @see     Character#isLowerCase(char)
   5408      * @see     Character#isUpperCase(char)
   5409      * @see     Character#toTitleCase(char)
   5410      * @see     Character#getType(char)
   5411      * @since   1.0.2
   5412      */
   5413     public static boolean isTitleCase(char ch) {
   5414         return isTitleCase((int)ch);
   5415     }
   5416 
   5417     /**
   5418      * Determines if the specified character (Unicode code point) is a titlecase character.
   5419      * <p>
   5420      * A character is a titlecase character if its general
   5421      * category type, provided by {@link Character#getType(int) getType(codePoint)},
   5422      * is {@code TITLECASE_LETTER}.
   5423      * <p>
   5424      * Some characters look like pairs of Latin letters. For example, there
   5425      * is an uppercase letter that looks like "LJ" and has a corresponding
   5426      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
   5427      * is the appropriate form to use when rendering a word in lowercase
   5428      * with initial capitals, as for a book title.
   5429      * <p>
   5430      * These are some of the Unicode characters for which this method returns
   5431      * {@code true}:
   5432      * <ul>
   5433      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
   5434      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
   5435      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
   5436      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
   5437      * </ul>
   5438      * <p> Many other Unicode characters are titlecase too.<p>
   5439      *
   5440      * @param   codePoint the character (Unicode code point) to be tested.
   5441      * @return  {@code true} if the character is titlecase;
   5442      *          {@code false} otherwise.
   5443      * @see     Character#isLowerCase(int)
   5444      * @see     Character#isUpperCase(int)
   5445      * @see     Character#toTitleCase(int)
   5446      * @see     Character#getType(int)
   5447      * @since   1.5
   5448      */
   5449     public static boolean isTitleCase(int codePoint) {
   5450         return isTitleCaseImpl(codePoint);
   5451     }
   5452 
   5453     static native boolean isTitleCaseImpl(int codePoint);
   5454 
   5455     /**
   5456      * Determines if the specified character is a digit.
   5457      * <p>
   5458      * A character is a digit if its general category type, provided
   5459      * by {@code Character.getType(ch)}, is
   5460      * {@code DECIMAL_DIGIT_NUMBER}.
   5461      * <p>
   5462      * Some Unicode character ranges that contain digits:
   5463      * <ul>
   5464      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
   5465      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
   5466      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
   5467      *     Arabic-Indic digits
   5468      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
   5469      *     Extended Arabic-Indic digits
   5470      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
   5471      *     Devanagari digits
   5472      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
   5473      *     Fullwidth digits
   5474      * </ul>
   5475      *
   5476      * Many other character ranges contain digits as well.
   5477      *
   5478      * <p><b>Note:</b> This method cannot handle <a
   5479      * href="#supplementary"> supplementary characters</a>. To support
   5480      * all Unicode characters, including supplementary characters, use
   5481      * the {@link #isDigit(int)} method.
   5482      *
   5483      * @param   ch   the character to be tested.
   5484      * @return  {@code true} if the character is a digit;
   5485      *          {@code false} otherwise.
   5486      * @see     Character#digit(char, int)
   5487      * @see     Character#forDigit(int, int)
   5488      * @see     Character#getType(char)
   5489      */
   5490     public static boolean isDigit(char ch) {
   5491         return isDigit((int)ch);
   5492     }
   5493 
   5494     /**
   5495      * Determines if the specified character (Unicode code point) is a digit.
   5496      * <p>
   5497      * A character is a digit if its general category type, provided
   5498      * by {@link Character#getType(int) getType(codePoint)}, is
   5499      * {@code DECIMAL_DIGIT_NUMBER}.
   5500      * <p>
   5501      * Some Unicode character ranges that contain digits:
   5502      * <ul>
   5503      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
   5504      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
   5505      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
   5506      *     Arabic-Indic digits
   5507      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
   5508      *     Extended Arabic-Indic digits
   5509      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
   5510      *     Devanagari digits
   5511      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
   5512      *     Fullwidth digits
   5513      * </ul>
   5514      *
   5515      * Many other character ranges contain digits as well.
   5516      *
   5517      * @param   codePoint the character (Unicode code point) to be tested.
   5518      * @return  {@code true} if the character is a digit;
   5519      *          {@code false} otherwise.
   5520      * @see     Character#forDigit(int, int)
   5521      * @see     Character#getType(int)
   5522      * @since   1.5
   5523      */
   5524     public static boolean isDigit(int codePoint) {
   5525         return isDigitImpl(codePoint);
   5526     }
   5527 
   5528     static native boolean isDigitImpl(int codePoint);
   5529 
   5530     /**
   5531      * Determines if a character is defined in Unicode.
   5532      * <p>
   5533      * A character is defined if at least one of the following is true:
   5534      * <ul>
   5535      * <li>It has an entry in the UnicodeData file.
   5536      * <li>It has a value in a range defined by the UnicodeData file.
   5537      * </ul>
   5538      *
   5539      * <p><b>Note:</b> This method cannot handle <a
   5540      * href="#supplementary"> supplementary characters</a>. To support
   5541      * all Unicode characters, including supplementary characters, use
   5542      * the {@link #isDefined(int)} method.
   5543      *
   5544      * @param   ch   the character to be tested
   5545      * @return  {@code true} if the character has a defined meaning
   5546      *          in Unicode; {@code false} otherwise.
   5547      * @see     Character#isDigit(char)
   5548      * @see     Character#isLetter(char)
   5549      * @see     Character#isLetterOrDigit(char)
   5550      * @see     Character#isLowerCase(char)
   5551      * @see     Character#isTitleCase(char)
   5552      * @see     Character#isUpperCase(char)
   5553      * @since   1.0.2
   5554      */
   5555     public static boolean isDefined(char ch) {
   5556         return isDefined((int)ch);
   5557     }
   5558 
   5559     /**
   5560      * Determines if a character (Unicode code point) is defined in Unicode.
   5561      * <p>
   5562      * A character is defined if at least one of the following is true:
   5563      * <ul>
   5564      * <li>It has an entry in the UnicodeData file.
   5565      * <li>It has a value in a range defined by the UnicodeData file.
   5566      * </ul>
   5567      *
   5568      * @param   codePoint the character (Unicode code point) to be tested.
   5569      * @return  {@code true} if the character has a defined meaning
   5570      *          in Unicode; {@code false} otherwise.
   5571      * @see     Character#isDigit(int)
   5572      * @see     Character#isLetter(int)
   5573      * @see     Character#isLetterOrDigit(int)
   5574      * @see     Character#isLowerCase(int)
   5575      * @see     Character#isTitleCase(int)
   5576      * @see     Character#isUpperCase(int)
   5577      * @since   1.5
   5578      */
   5579     public static boolean isDefined(int codePoint) {
   5580         return isDefinedImpl(codePoint);
   5581     }
   5582 
   5583     static native boolean isDefinedImpl(int codePoint);
   5584 
   5585     /**
   5586      * Determines if the specified character is a letter.
   5587      * <p>
   5588      * A character is considered to be a letter if its general
   5589      * category type, provided by {@code Character.getType(ch)},
   5590      * is any of the following:
   5591      * <ul>
   5592      * <li> {@code UPPERCASE_LETTER}
   5593      * <li> {@code LOWERCASE_LETTER}
   5594      * <li> {@code TITLECASE_LETTER}
   5595      * <li> {@code MODIFIER_LETTER}
   5596      * <li> {@code OTHER_LETTER}
   5597      * </ul>
   5598      *
   5599      * Not all letters have case. Many characters are
   5600      * letters but are neither uppercase nor lowercase nor titlecase.
   5601      *
   5602      * <p><b>Note:</b> This method cannot handle <a
   5603      * href="#supplementary"> supplementary characters</a>. To support
   5604      * all Unicode characters, including supplementary characters, use
   5605      * the {@link #isLetter(int)} method.
   5606      *
   5607      * @param   ch   the character to be tested.
   5608      * @return  {@code true} if the character is a letter;
   5609      *          {@code false} otherwise.
   5610      * @see     Character#isDigit(char)
   5611      * @see     Character#isJavaIdentifierStart(char)
   5612      * @see     Character#isJavaLetter(char)
   5613      * @see     Character#isJavaLetterOrDigit(char)
   5614      * @see     Character#isLetterOrDigit(char)
   5615      * @see     Character#isLowerCase(char)
   5616      * @see     Character#isTitleCase(char)
   5617      * @see     Character#isUnicodeIdentifierStart(char)
   5618      * @see     Character#isUpperCase(char)
   5619      */
   5620     public static boolean isLetter(char ch) {
   5621         return isLetter((int)ch);
   5622     }
   5623 
   5624     /**
   5625      * Determines if the specified character (Unicode code point) is a letter.
   5626      * <p>
   5627      * A character is considered to be a letter if its general
   5628      * category type, provided by {@link Character#getType(int) getType(codePoint)},
   5629      * is any of the following:
   5630      * <ul>
   5631      * <li> {@code UPPERCASE_LETTER}
   5632      * <li> {@code LOWERCASE_LETTER}
   5633      * <li> {@code TITLECASE_LETTER}
   5634      * <li> {@code MODIFIER_LETTER}
   5635      * <li> {@code OTHER_LETTER}
   5636      * </ul>
   5637      *
   5638      * Not all letters have case. Many characters are
   5639      * letters but are neither uppercase nor lowercase nor titlecase.
   5640      *
   5641      * @param   codePoint the character (Unicode code point) to be tested.
   5642      * @return  {@code true} if the character is a letter;
   5643      *          {@code false} otherwise.
   5644      * @see     Character#isDigit(int)
   5645      * @see     Character#isJavaIdentifierStart(int)
   5646      * @see     Character#isLetterOrDigit(int)
   5647      * @see     Character#isLowerCase(int)
   5648      * @see     Character#isTitleCase(int)
   5649      * @see     Character#isUnicodeIdentifierStart(int)
   5650      * @see     Character#isUpperCase(int)
   5651      * @since   1.5
   5652      */
   5653     public static boolean isLetter(int codePoint) {
   5654         return isLetterImpl(codePoint);
   5655     }
   5656 
   5657     static native boolean isLetterImpl(int codePoint);
   5658 
   5659     /**
   5660      * Determines if the specified character is a letter or digit.
   5661      * <p>
   5662      * A character is considered to be a letter or digit if either
   5663      * {@code Character.isLetter(char ch)} or
   5664      * {@code Character.isDigit(char ch)} returns
   5665      * {@code true} for the character.
   5666      *
   5667      * <p><b>Note:</b> This method cannot handle <a
   5668      * href="#supplementary"> supplementary characters</a>. To support
   5669      * all Unicode characters, including supplementary characters, use
   5670      * the {@link #isLetterOrDigit(int)} method.
   5671      *
   5672      * @param   ch   the character to be tested.
   5673      * @return  {@code true} if the character is a letter or digit;
   5674      *          {@code false} otherwise.
   5675      * @see     Character#isDigit(char)
   5676      * @see     Character#isJavaIdentifierPart(char)
   5677      * @see     Character#isJavaLetter(char)
   5678      * @see     Character#isJavaLetterOrDigit(char)
   5679      * @see     Character#isLetter(char)
   5680      * @see     Character#isUnicodeIdentifierPart(char)
   5681      * @since   1.0.2
   5682      */
   5683     public static boolean isLetterOrDigit(char ch) {
   5684         return isLetterOrDigit((int)ch);
   5685     }
   5686 
   5687     /**
   5688      * Determines if the specified character (Unicode code point) is a letter or digit.
   5689      * <p>
   5690      * A character is considered to be a letter or digit if either
   5691      * {@link #isLetter(int) isLetter(codePoint)} or
   5692      * {@link #isDigit(int) isDigit(codePoint)} returns
   5693      * {@code true} for the character.
   5694      *
   5695      * @param   codePoint the character (Unicode code point) to be tested.
   5696      * @return  {@code true} if the character is a letter or digit;
   5697      *          {@code false} otherwise.
   5698      * @see     Character#isDigit(int)
   5699      * @see     Character#isJavaIdentifierPart(int)
   5700      * @see     Character#isLetter(int)
   5701      * @see     Character#isUnicodeIdentifierPart(int)
   5702      * @since   1.5
   5703      */
   5704     public static boolean isLetterOrDigit(int codePoint) {
   5705         return isLetterOrDigitImpl(codePoint);
   5706     }
   5707 
   5708     static native boolean isLetterOrDigitImpl(int codePoint);
   5709 
   5710     /**
   5711      * Determines if the specified character is permissible as the first
   5712      * character in a Java identifier.
   5713      * <p>
   5714      * A character may start a Java identifier if and only if
   5715      * one of the following is true:
   5716      * <ul>
   5717      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
   5718      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
   5719      * <li> {@code ch} is a currency symbol (such as {@code '$'})
   5720      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
   5721      * </ul>
   5722      *
   5723      * @param   ch the character to be tested.
   5724      * @return  {@code true} if the character may start a Java
   5725      *          identifier; {@code false} otherwise.
   5726      * @see     Character#isJavaLetterOrDigit(char)
   5727      * @see     Character#isJavaIdentifierStart(char)
   5728      * @see     Character#isJavaIdentifierPart(char)
   5729      * @see     Character#isLetter(char)
   5730      * @see     Character#isLetterOrDigit(char)
   5731      * @see     Character#isUnicodeIdentifierStart(char)
   5732      * @since   1.02
   5733      * @deprecated Replaced by isJavaIdentifierStart(char).
   5734      */
   5735     @Deprecated
   5736     public static boolean isJavaLetter(char ch) {
   5737         return isJavaIdentifierStart(ch);
   5738     }
   5739 
   5740     /**
   5741      * Determines if the specified character may be part of a Java
   5742      * identifier as other than the first character.
   5743      * <p>
   5744      * A character may be part of a Java identifier if and only if any
   5745      * of the following are true:
   5746      * <ul>
   5747      * <li>  it is a letter
   5748      * <li>  it is a currency symbol (such as {@code '$'})
   5749      * <li>  it is a connecting punctuation character (such as {@code '_'})
   5750      * <li>  it is a digit
   5751      * <li>  it is a numeric letter (such as a Roman numeral character)
   5752      * <li>  it is a combining mark
   5753      * <li>  it is a non-spacing mark
   5754      * <li> {@code isIdentifierIgnorable} returns
   5755      * {@code true} for the character.
   5756      * </ul>
   5757      *
   5758      * @param   ch the character to be tested.
   5759      * @return  {@code true} if the character may be part of a
   5760      *          Java identifier; {@code false} otherwise.
   5761      * @see     Character#isJavaLetter(char)
   5762      * @see     Character#isJavaIdentifierStart(char)
   5763      * @see     Character#isJavaIdentifierPart(char)
   5764      * @see     Character#isLetter(char)
   5765      * @see     Character#isLetterOrDigit(char)
   5766      * @see     Character#isUnicodeIdentifierPart(char)
   5767      * @see     Character#isIdentifierIgnorable(char)
   5768      * @since   1.02
   5769      * @deprecated Replaced by isJavaIdentifierPart(char).
   5770      */
   5771     @Deprecated
   5772     public static boolean isJavaLetterOrDigit(char ch) {
   5773         return isJavaIdentifierPart(ch);
   5774     }
   5775 
   5776     /**
   5777      * Determines if the specified character (Unicode code point) is an alphabet.
   5778      * <p>
   5779      * A character is considered to be alphabetic if its general category type,
   5780      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
   5781      * the following:
   5782      * <ul>
   5783      * <li> <code>UPPERCASE_LETTER</code>
   5784      * <li> <code>LOWERCASE_LETTER</code>
   5785      * <li> <code>TITLECASE_LETTER</code>
   5786      * <li> <code>MODIFIER_LETTER</code>
   5787      * <li> <code>OTHER_LETTER</code>
   5788      * <li> <code>LETTER_NUMBER</code>
   5789      * </ul>
   5790      * or it has contributory property Other_Alphabetic as defined by the
   5791      * Unicode Standard.
   5792      *
   5793      * @param   codePoint the character (Unicode code point) to be tested.
   5794      * @return  <code>true</code> if the character is a Unicode alphabet
   5795      *          character, <code>false</code> otherwise.
   5796      * @since   1.7
   5797      */
   5798     public static boolean isAlphabetic(int codePoint) {
   5799         return isAlphabeticImpl(codePoint);
   5800     }
   5801 
   5802     static native boolean isAlphabeticImpl(int codePoint);
   5803 
   5804 
   5805     /**
   5806      * Determines if the specified character (Unicode code point) is a CJKV
   5807      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
   5808      * the Unicode Standard.
   5809      *
   5810      * @param   codePoint the character (Unicode code point) to be tested.
   5811      * @return  <code>true</code> if the character is a Unicode ideograph
   5812      *          character, <code>false</code> otherwise.
   5813      * @since   1.7
   5814      */
   5815     public static boolean isIdeographic(int codePoint) {
   5816         return isIdeographicImpl(codePoint);
   5817     }
   5818 
   5819     static native boolean isIdeographicImpl(int codePoint);
   5820 
   5821     /**
   5822      * Determines if the specified character is
   5823      * permissible as the first character in a Java identifier.
   5824      * <p>
   5825      * A character may start a Java identifier if and only if
   5826      * one of the following conditions is true:
   5827      * <ul>
   5828      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
   5829      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
   5830      * <li> {@code ch} is a currency symbol (such as {@code '$'})
   5831      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
   5832      * </ul>
   5833      *
   5834      * <p><b>Note:</b> This method cannot handle <a
   5835      * href="#supplementary"> supplementary characters</a>. To support
   5836      * all Unicode characters, including supplementary characters, use
   5837      * the {@link #isJavaIdentifierStart(int)} method.
   5838      *
   5839      * @param   ch the character to be tested.
   5840      * @return  {@code true} if the character may start a Java identifier;
   5841      *          {@code false} otherwise.
   5842      * @see     Character#isJavaIdentifierPart(char)
   5843      * @see     Character#isLetter(char)
   5844      * @see     Character#isUnicodeIdentifierStart(char)
   5845      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   5846      * @since   1.1
   5847      */
   5848     public static boolean isJavaIdentifierStart(char ch) {
   5849         return isJavaIdentifierStart((int)ch);
   5850     }
   5851 
   5852     /**
   5853      * Determines if the character (Unicode code point) is
   5854      * permissible as the first character in a Java identifier.
   5855      * <p>
   5856      * A character may start a Java identifier if and only if
   5857      * one of the following conditions is true:
   5858      * <ul>
   5859      * <li> {@link #isLetter(int) isLetter(codePoint)}
   5860      *      returns {@code true}
   5861      * <li> {@link #getType(int) getType(codePoint)}
   5862      *      returns {@code LETTER_NUMBER}
   5863      * <li> the referenced character is a currency symbol (such as {@code '$'})
   5864      * <li> the referenced character is a connecting punctuation character
   5865      *      (such as {@code '_'}).
   5866      * </ul>
   5867      *
   5868      * @param   codePoint the character (Unicode code point) to be tested.
   5869      * @return  {@code true} if the character may start a Java identifier;
   5870      *          {@code false} otherwise.
   5871      * @see     Character#isJavaIdentifierPart(int)
   5872      * @see     Character#isLetter(int)
   5873      * @see     Character#isUnicodeIdentifierStart(int)
   5874      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   5875      * @since   1.5
   5876      */
   5877     public static boolean isJavaIdentifierStart(int codePoint) {
   5878         // Use precomputed bitmasks to optimize the ASCII range.
   5879         if (codePoint < 64) {
   5880             return (codePoint == '$'); // There's only one character in this range.
   5881         } else if (codePoint < 128) {
   5882             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   5883         }
   5884         return ((1 << getType(codePoint))
   5885                 & ((1 << UPPERCASE_LETTER)
   5886                    | (1 << LOWERCASE_LETTER)
   5887                    | (1  << TITLECASE_LETTER)
   5888                    | (1  << MODIFIER_LETTER)
   5889                    | (1  << OTHER_LETTER)
   5890                    | (1  << CURRENCY_SYMBOL)
   5891                    | (1  << CONNECTOR_PUNCTUATION)
   5892                    | (1  << LETTER_NUMBER))) != 0;
   5893     }
   5894 
   5895     /**
   5896      * Determines if the specified character may be part of a Java
   5897      * identifier as other than the first character.
   5898      * <p>
   5899      * A character may be part of a Java identifier if any of the following
   5900      * are true:
   5901      * <ul>
   5902      * <li>  it is a letter
   5903      * <li>  it is a currency symbol (such as {@code '$'})
   5904      * <li>  it is a connecting punctuation character (such as {@code '_'})
   5905      * <li>  it is a digit
   5906      * <li>  it is a numeric letter (such as a Roman numeral character)
   5907      * <li>  it is a combining mark
   5908      * <li>  it is a non-spacing mark
   5909      * <li> {@code isIdentifierIgnorable} returns
   5910      * {@code true} for the character
   5911      * </ul>
   5912      *
   5913      * <p><b>Note:</b> This method cannot handle <a
   5914      * href="#supplementary"> supplementary characters</a>. To support
   5915      * all Unicode characters, including supplementary characters, use
   5916      * the {@link #isJavaIdentifierPart(int)} method.
   5917      *
   5918      * @param   ch      the character to be tested.
   5919      * @return {@code true} if the character may be part of a
   5920      *          Java identifier; {@code false} otherwise.
   5921      * @see     Character#isIdentifierIgnorable(char)
   5922      * @see     Character#isJavaIdentifierStart(char)
   5923      * @see     Character#isLetterOrDigit(char)
   5924      * @see     Character#isUnicodeIdentifierPart(char)
   5925      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   5926      * @since   1.1
   5927      */
   5928     public static boolean isJavaIdentifierPart(char ch) {
   5929         return isJavaIdentifierPart((int)ch);
   5930     }
   5931 
   5932     /**
   5933      * Determines if the character (Unicode code point) may be part of a Java
   5934      * identifier as other than the first character.
   5935      * <p>
   5936      * A character may be part of a Java identifier if any of the following
   5937      * are true:
   5938      * <ul>
   5939      * <li>  it is a letter
   5940      * <li>  it is a currency symbol (such as {@code '$'})
   5941      * <li>  it is a connecting punctuation character (such as {@code '_'})
   5942      * <li>  it is a digit
   5943      * <li>  it is a numeric letter (such as a Roman numeral character)
   5944      * <li>  it is a combining mark
   5945      * <li>  it is a non-spacing mark
   5946      * <li> {@link #isIdentifierIgnorable(int)
   5947      * isIdentifierIgnorable(codePoint)} returns {@code true} for
   5948      * the character
   5949      * </ul>
   5950      *
   5951      * @param   codePoint the character (Unicode code point) to be tested.
   5952      * @return {@code true} if the character may be part of a
   5953      *          Java identifier; {@code false} otherwise.
   5954      * @see     Character#isIdentifierIgnorable(int)
   5955      * @see     Character#isJavaIdentifierStart(int)
   5956      * @see     Character#isLetterOrDigit(int)
   5957      * @see     Character#isUnicodeIdentifierPart(int)
   5958      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   5959      * @since   1.5
   5960      */
   5961     public static boolean isJavaIdentifierPart(int codePoint) {
   5962         // Use precomputed bitmasks to optimize the ASCII range.
   5963         if (codePoint < 64) {
   5964             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
   5965         } else if (codePoint < 128) {
   5966             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   5967         }
   5968         return ((1 << getType(codePoint))
   5969                 & ((1 << UPPERCASE_LETTER)
   5970                    | (1 << LOWERCASE_LETTER)
   5971                    | (1 << TITLECASE_LETTER)
   5972                    | (1 << MODIFIER_LETTER)
   5973                    | (1 << OTHER_LETTER)
   5974                    | (1 << CURRENCY_SYMBOL)
   5975                    | (1 << CONNECTOR_PUNCTUATION)
   5976                    | (1 << DECIMAL_DIGIT_NUMBER)
   5977                    | (1 << LETTER_NUMBER)
   5978                    | (1 << FORMAT)
   5979                    | (1 << COMBINING_SPACING_MARK)
   5980                    | (1 << NON_SPACING_MARK))) != 0
   5981                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
   5982                 || (codePoint >= 0x7f && codePoint <= 0x9f);
   5983     }
   5984 
   5985     /**
   5986      * Determines if the specified character is permissible as the
   5987      * first character in a Unicode identifier.
   5988      * <p>
   5989      * A character may start a Unicode identifier if and only if
   5990      * one of the following conditions is true:
   5991      * <ul>
   5992      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
   5993      * <li> {@link #getType(char) getType(ch)} returns
   5994      *      {@code LETTER_NUMBER}.
   5995      * </ul>
   5996      *
   5997      * <p><b>Note:</b> This method cannot handle <a
   5998      * href="#supplementary"> supplementary characters</a>. To support
   5999      * all Unicode characters, including supplementary characters, use
   6000      * the {@link #isUnicodeIdentifierStart(int)} method.
   6001      *
   6002      * @param   ch      the character to be tested.
   6003      * @return  {@code true} if the character may start a Unicode
   6004      *          identifier; {@code false} otherwise.
   6005      * @see     Character#isJavaIdentifierStart(char)
   6006      * @see     Character#isLetter(char)
   6007      * @see     Character#isUnicodeIdentifierPart(char)
   6008      * @since   1.1
   6009      */
   6010     public static boolean isUnicodeIdentifierStart(char ch) {
   6011         return isUnicodeIdentifierStart((int)ch);
   6012     }
   6013 
   6014     /**
   6015      * Determines if the specified character (Unicode code point) is permissible as the
   6016      * first character in a Unicode identifier.
   6017      * <p>
   6018      * A character may start a Unicode identifier if and only if
   6019      * one of the following conditions is true:
   6020      * <ul>
   6021      * <li> {@link #isLetter(int) isLetter(codePoint)}
   6022      *      returns {@code true}
   6023      * <li> {@link #getType(int) getType(codePoint)}
   6024      *      returns {@code LETTER_NUMBER}.
   6025      * </ul>
   6026      * @param   codePoint the character (Unicode code point) to be tested.
   6027      * @return  {@code true} if the character may start a Unicode
   6028      *          identifier; {@code false} otherwise.
   6029      * @see     Character#isJavaIdentifierStart(int)
   6030      * @see     Character#isLetter(int)
   6031      * @see     Character#isUnicodeIdentifierPart(int)
   6032      * @since   1.5
   6033      */
   6034     public static boolean isUnicodeIdentifierStart(int codePoint) {
   6035         return isUnicodeIdentifierStartImpl(codePoint);
   6036     }
   6037 
   6038     static native boolean isUnicodeIdentifierStartImpl(int codePoint);
   6039 
   6040     /**
   6041      * Determines if the specified character may be part of a Unicode
   6042      * identifier as other than the first character.
   6043      * <p>
   6044      * A character may be part of a Unicode identifier if and only if
   6045      * one of the following statements is true:
   6046      * <ul>
   6047      * <li>  it is a letter
   6048      * <li>  it is a connecting punctuation character (such as {@code '_'})
   6049      * <li>  it is a digit
   6050      * <li>  it is a numeric letter (such as a Roman numeral character)
   6051      * <li>  it is a combining mark
   6052      * <li>  it is a non-spacing mark
   6053      * <li> {@code isIdentifierIgnorable} returns
   6054      * {@code true} for this character.
   6055      * </ul>
   6056      *
   6057      * <p><b>Note:</b> This method cannot handle <a
   6058      * href="#supplementary"> supplementary characters</a>. To support
   6059      * all Unicode characters, including supplementary characters, use
   6060      * the {@link #isUnicodeIdentifierPart(int)} method.
   6061      *
   6062      * @param   ch      the character to be tested.
   6063      * @return  {@code true} if the character may be part of a
   6064      *          Unicode identifier; {@code false} otherwise.
   6065      * @see     Character#isIdentifierIgnorable(char)
   6066      * @see     Character#isJavaIdentifierPart(char)
   6067      * @see     Character#isLetterOrDigit(char)
   6068      * @see     Character#isUnicodeIdentifierStart(char)
   6069      * @since   1.1
   6070      */
   6071     public static boolean isUnicodeIdentifierPart(char ch) {
   6072         return isUnicodeIdentifierPart((int)ch);
   6073     }
   6074 
   6075     /**
   6076      * Determines if the specified character (Unicode code point) may be part of a Unicode
   6077      * identifier as other than the first character.
   6078      * <p>
   6079      * A character may be part of a Unicode identifier if and only if
   6080      * one of the following statements is true:
   6081      * <ul>
   6082      * <li>  it is a letter
   6083      * <li>  it is a connecting punctuation character (such as {@code '_'})
   6084      * <li>  it is a digit
   6085      * <li>  it is a numeric letter (such as a Roman numeral character)
   6086      * <li>  it is a combining mark
   6087      * <li>  it is a non-spacing mark
   6088      * <li> {@code isIdentifierIgnorable} returns
   6089      * {@code true} for this character.
   6090      * </ul>
   6091      * @param   codePoint the character (Unicode code point) to be tested.
   6092      * @return  {@code true} if the character may be part of a
   6093      *          Unicode identifier; {@code false} otherwise.
   6094      * @see     Character#isIdentifierIgnorable(int)
   6095      * @see     Character#isJavaIdentifierPart(int)
   6096      * @see     Character#isLetterOrDigit(int)
   6097      * @see     Character#isUnicodeIdentifierStart(int)
   6098      * @since   1.5
   6099      */
   6100     public static boolean isUnicodeIdentifierPart(int codePoint) {
   6101         return isUnicodeIdentifierPartImpl(codePoint);
   6102     }
   6103 
   6104     static native boolean isUnicodeIdentifierPartImpl(int codePoint);
   6105 
   6106     /**
   6107      * Determines if the specified character should be regarded as
   6108      * an ignorable character in a Java identifier or a Unicode identifier.
   6109      * <p>
   6110      * The following Unicode characters are ignorable in a Java identifier
   6111      * or a Unicode identifier:
   6112      * <ul>
   6113      * <li>ISO control characters that are not whitespace
   6114      * <ul>
   6115      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
   6116      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
   6117      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
   6118      * </ul>
   6119      *
   6120      * <li>all characters that have the {@code FORMAT} general
   6121      * category value
   6122      * </ul>
   6123      *
   6124      * <p><b>Note:</b> This method cannot handle <a
   6125      * href="#supplementary"> supplementary characters</a>. To support
   6126      * all Unicode characters, including supplementary characters, use
   6127      * the {@link #isIdentifierIgnorable(int)} method.
   6128      *
   6129      * @param   ch      the character to be tested.
   6130      * @return  {@code true} if the character is an ignorable control
   6131      *          character that may be part of a Java or Unicode identifier;
   6132      *           {@code false} otherwise.
   6133      * @see     Character#isJavaIdentifierPart(char)
   6134      * @see     Character#isUnicodeIdentifierPart(char)
   6135      * @since   1.1
   6136      */
   6137     public static boolean isIdentifierIgnorable(char ch) {
   6138         return isIdentifierIgnorable((int)ch);
   6139     }
   6140 
   6141     /**
   6142      * Determines if the specified character (Unicode code point) should be regarded as
   6143      * an ignorable character in a Java identifier or a Unicode identifier.
   6144      * <p>
   6145      * The following Unicode characters are ignorable in a Java identifier
   6146      * or a Unicode identifier:
   6147      * <ul>
   6148      * <li>ISO control characters that are not whitespace
   6149      * <ul>
   6150      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
   6151      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
   6152      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
   6153      * </ul>
   6154      *
   6155      * <li>all characters that have the {@code FORMAT} general
   6156      * category value
   6157      * </ul>
   6158      *
   6159      * @param   codePoint the character (Unicode code point) to be tested.
   6160      * @return  {@code true} if the character is an ignorable control
   6161      *          character that may be part of a Java or Unicode identifier;
   6162      *          {@code false} otherwise.
   6163      * @see     Character#isJavaIdentifierPart(int)
   6164      * @see     Character#isUnicodeIdentifierPart(int)
   6165      * @since   1.5
   6166      */
   6167     public static boolean isIdentifierIgnorable(int codePoint) {
   6168         return isIdentifierIgnorableImpl(codePoint);
   6169     }
   6170 
   6171     static native boolean isIdentifierIgnorableImpl(int codePoint);
   6172 
   6173     /**
   6174      * Converts the character argument to lowercase using case
   6175      * mapping information from the UnicodeData file.
   6176      * <p>
   6177      * Note that
   6178      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
   6179      * does not always return {@code true} for some ranges of
   6180      * characters, particularly those that are symbols or ideographs.
   6181      *
   6182      * <p>In general, {@link String#toLowerCase()} should be used to map
   6183      * characters to lowercase. {@code String} case mapping methods
   6184      * have several benefits over {@code Character} case mapping methods.
   6185      * {@code String} case mapping methods can perform locale-sensitive
   6186      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6187      * the {@code Character} case mapping methods cannot.
   6188      *
   6189      * <p><b>Note:</b> This method cannot handle <a
   6190      * href="#supplementary"> supplementary characters</a>. To support
   6191      * all Unicode characters, including supplementary characters, use
   6192      * the {@link #toLowerCase(int)} method.
   6193      *
   6194      * @param   ch   the character to be converted.
   6195      * @return  the lowercase equivalent of the character, if any;
   6196      *          otherwise, the character itself.
   6197      * @see     Character#isLowerCase(char)
   6198      * @see     String#toLowerCase()
   6199      */
   6200     public static char toLowerCase(char ch) {
   6201         return (char)toLowerCase((int)ch);
   6202     }
   6203 
   6204     /**
   6205      * Converts the character (Unicode code point) argument to
   6206      * lowercase using case mapping information from the UnicodeData
   6207      * file.
   6208      *
   6209      * <p> Note that
   6210      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
   6211      * does not always return {@code true} for some ranges of
   6212      * characters, particularly those that are symbols or ideographs.
   6213      *
   6214      * <p>In general, {@link String#toLowerCase()} should be used to map
   6215      * characters to lowercase. {@code String} case mapping methods
   6216      * have several benefits over {@code Character} case mapping methods.
   6217      * {@code String} case mapping methods can perform locale-sensitive
   6218      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6219      * the {@code Character} case mapping methods cannot.
   6220      *
   6221      * @param   codePoint   the character (Unicode code point) to be converted.
   6222      * @return  the lowercase equivalent of the character (Unicode code
   6223      *          point), if any; otherwise, the character itself.
   6224      * @see     Character#isLowerCase(int)
   6225      * @see     String#toLowerCase()
   6226      *
   6227      * @since   1.5
   6228      */
   6229     public static int toLowerCase(int codePoint) {
   6230         if (codePoint >= 'A' && codePoint <= 'Z') {
   6231             return codePoint + ('a' - 'A');
   6232         }
   6233 
   6234         // All ASCII codepoints except the ones above remain unchanged.
   6235         if (codePoint < 0x80) {
   6236             return codePoint;
   6237         }
   6238 
   6239         return toLowerCaseImpl(codePoint);
   6240     }
   6241 
   6242     static native int toLowerCaseImpl(int codePoint);
   6243 
   6244     /**
   6245      * Converts the character argument to uppercase using case mapping
   6246      * information from the UnicodeData file.
   6247      * <p>
   6248      * Note that
   6249      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
   6250      * does not always return {@code true} for some ranges of
   6251      * characters, particularly those that are symbols or ideographs.
   6252      *
   6253      * <p>In general, {@link String#toUpperCase()} should be used to map
   6254      * characters to uppercase. {@code String} case mapping methods
   6255      * have several benefits over {@code Character} case mapping methods.
   6256      * {@code String} case mapping methods can perform locale-sensitive
   6257      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6258      * the {@code Character} case mapping methods cannot.
   6259      *
   6260      * <p><b>Note:</b> This method cannot handle <a
   6261      * href="#supplementary"> supplementary characters</a>. To support
   6262      * all Unicode characters, including supplementary characters, use
   6263      * the {@link #toUpperCase(int)} method.
   6264      *
   6265      * @param   ch   the character to be converted.
   6266      * @return  the uppercase equivalent of the character, if any;
   6267      *          otherwise, the character itself.
   6268      * @see     Character#isUpperCase(char)
   6269      * @see     String#toUpperCase()
   6270      */
   6271     public static char toUpperCase(char ch) {
   6272         return (char)toUpperCase((int)ch);
   6273     }
   6274 
   6275     /**
   6276      * Converts the character (Unicode code point) argument to
   6277      * uppercase using case mapping information from the UnicodeData
   6278      * file.
   6279      *
   6280      * <p>Note that
   6281      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
   6282      * does not always return {@code true} for some ranges of
   6283      * characters, particularly those that are symbols or ideographs.
   6284      *
   6285      * <p>In general, {@link String#toUpperCase()} should be used to map
   6286      * characters to uppercase. {@code String} case mapping methods
   6287      * have several benefits over {@code Character} case mapping methods.
   6288      * {@code String} case mapping methods can perform locale-sensitive
   6289      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6290      * the {@code Character} case mapping methods cannot.
   6291      *
   6292      * @param   codePoint   the character (Unicode code point) to be converted.
   6293      * @return  the uppercase equivalent of the character, if any;
   6294      *          otherwise, the character itself.
   6295      * @see     Character#isUpperCase(int)
   6296      * @see     String#toUpperCase()
   6297      *
   6298      * @since   1.5
   6299      */
   6300     public static int toUpperCase(int codePoint) {
   6301         if (codePoint >= 'a' && codePoint <= 'z') {
   6302             return codePoint - ('a' - 'A');
   6303         }
   6304 
   6305         // All ASCII codepoints except the ones above remain unchanged.
   6306         if (codePoint < 0x80) {
   6307             return codePoint;
   6308         }
   6309 
   6310         return toUpperCaseImpl(codePoint);
   6311     }
   6312 
   6313     static native int toUpperCaseImpl(int codePoint);
   6314 
   6315     /**
   6316      * Converts the character argument to titlecase using case mapping
   6317      * information from the UnicodeData file. If a character has no
   6318      * explicit titlecase mapping and is not itself a titlecase char
   6319      * according to UnicodeData, then the uppercase mapping is
   6320      * returned as an equivalent titlecase mapping. If the
   6321      * {@code char} argument is already a titlecase
   6322      * {@code char}, the same {@code char} value will be
   6323      * returned.
   6324      * <p>
   6325      * Note that
   6326      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
   6327      * does not always return {@code true} for some ranges of
   6328      * characters.
   6329      *
   6330      * <p><b>Note:</b> This method cannot handle <a
   6331      * href="#supplementary"> supplementary characters</a>. To support
   6332      * all Unicode characters, including supplementary characters, use
   6333      * the {@link #toTitleCase(int)} method.
   6334      *
   6335      * @param   ch   the character to be converted.
   6336      * @return  the titlecase equivalent of the character, if any;
   6337      *          otherwise, the character itself.
   6338      * @see     Character#isTitleCase(char)
   6339      * @see     Character#toLowerCase(char)
   6340      * @see     Character#toUpperCase(char)
   6341      * @since   1.0.2
   6342      */
   6343     public static char toTitleCase(char ch) {
   6344         return (char)toTitleCase((int)ch);
   6345     }
   6346 
   6347     /**
   6348      * Converts the character (Unicode code point) argument to titlecase using case mapping
   6349      * information from the UnicodeData file. If a character has no
   6350      * explicit titlecase mapping and is not itself a titlecase char
   6351      * according to UnicodeData, then the uppercase mapping is
   6352      * returned as an equivalent titlecase mapping. If the
   6353      * character argument is already a titlecase
   6354      * character, the same character value will be
   6355      * returned.
   6356      *
   6357      * <p>Note that
   6358      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
   6359      * does not always return {@code true} for some ranges of
   6360      * characters.
   6361      *
   6362      * @param   codePoint   the character (Unicode code point) to be converted.
   6363      * @return  the titlecase equivalent of the character, if any;
   6364      *          otherwise, the character itself.
   6365      * @see     Character#isTitleCase(int)
   6366      * @see     Character#toLowerCase(int)
   6367      * @see     Character#toUpperCase(int)
   6368      * @since   1.5
   6369      */
   6370     public static int toTitleCase(int codePoint) {
   6371         return toTitleCaseImpl(codePoint);
   6372     }
   6373 
   6374     static native int toTitleCaseImpl(int codePoint);
   6375 
   6376     /**
   6377      * Returns the numeric value of the character {@code ch} in the
   6378      * specified radix.
   6379      * <p>
   6380      * If the radix is not in the range {@code MIN_RADIX} &le;
   6381      * {@code radix} &le; {@code MAX_RADIX} or if the
   6382      * value of {@code ch} is not a valid digit in the specified
   6383      * radix, {@code -1} is returned. A character is a valid digit
   6384      * if at least one of the following is true:
   6385      * <ul>
   6386      * <li>The method {@code isDigit} is {@code true} of the character
   6387      *     and the Unicode decimal digit value of the character (or its
   6388      *     single-character decomposition) is less than the specified radix.
   6389      *     In this case the decimal digit value is returned.
   6390      * <li>The character is one of the uppercase Latin letters
   6391      *     {@code 'A'} through {@code 'Z'} and its code is less than
   6392      *     {@code radix + 'A' - 10}.
   6393      *     In this case, {@code ch - 'A' + 10}
   6394      *     is returned.
   6395      * <li>The character is one of the lowercase Latin letters
   6396      *     {@code 'a'} through {@code 'z'} and its code is less than
   6397      *     {@code radix + 'a' - 10}.
   6398      *     In this case, {@code ch - 'a' + 10}
   6399      *     is returned.
   6400      * <li>The character is one of the fullwidth uppercase Latin letters A
   6401      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
   6402      *     and its code is less than
   6403      *     {@code radix + '\u005CuFF21' - 10}.
   6404      *     In this case, {@code ch - '\u005CuFF21' + 10}
   6405      *     is returned.
   6406      * <li>The character is one of the fullwidth lowercase Latin letters a
   6407      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
   6408      *     and its code is less than
   6409      *     {@code radix + '\u005CuFF41' - 10}.
   6410      *     In this case, {@code ch - '\u005CuFF41' + 10}
   6411      *     is returned.
   6412      * </ul>
   6413      *
   6414      * <p><b>Note:</b> This method cannot handle <a
   6415      * href="#supplementary"> supplementary characters</a>. To support
   6416      * all Unicode characters, including supplementary characters, use
   6417      * the {@link #digit(int, int)} method.
   6418      *
   6419      * @param   ch      the character to be converted.
   6420      * @param   radix   the radix.
   6421      * @return  the numeric value represented by the character in the
   6422      *          specified radix.
   6423      * @see     Character#forDigit(int, int)
   6424      * @see     Character#isDigit(char)
   6425      */
   6426     public static int digit(char ch, int radix) {
   6427         return digit((int)ch, radix);
   6428     }
   6429 
   6430     /**
   6431      * Returns the numeric value of the specified character (Unicode
   6432      * code point) in the specified radix.
   6433      *
   6434      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
   6435      * {@code radix} &le; {@code MAX_RADIX} or if the
   6436      * character is not a valid digit in the specified
   6437      * radix, {@code -1} is returned. A character is a valid digit
   6438      * if at least one of the following is true:
   6439      * <ul>
   6440      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
   6441      *     and the Unicode decimal digit value of the character (or its
   6442      *     single-character decomposition) is less than the specified radix.
   6443      *     In this case the decimal digit value is returned.
   6444      * <li>The character is one of the uppercase Latin letters
   6445      *     {@code 'A'} through {@code 'Z'} and its code is less than
   6446      *     {@code radix + 'A' - 10}.
   6447      *     In this case, {@code codePoint - 'A' + 10}
   6448      *     is returned.
   6449      * <li>The character is one of the lowercase Latin letters
   6450      *     {@code 'a'} through {@code 'z'} and its code is less than
   6451      *     {@code radix + 'a' - 10}.
   6452      *     In this case, {@code codePoint - 'a' + 10}
   6453      *     is returned.
   6454      * <li>The character is one of the fullwidth uppercase Latin letters A
   6455      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
   6456      *     and its code is less than
   6457      *     {@code radix + '\u005CuFF21' - 10}.
   6458      *     In this case,
   6459      *     {@code codePoint - '\u005CuFF21' + 10}
   6460      *     is returned.
   6461      * <li>The character is one of the fullwidth lowercase Latin letters a
   6462      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
   6463      *     and its code is less than
   6464      *     {@code radix + '\u005CuFF41'- 10}.
   6465      *     In this case,
   6466      *     {@code codePoint - '\u005CuFF41' + 10}
   6467      *     is returned.
   6468      * </ul>
   6469      *
   6470      * @param   codePoint the character (Unicode code point) to be converted.
   6471      * @param   radix   the radix.
   6472      * @return  the numeric value represented by the character in the
   6473      *          specified radix.
   6474      * @see     Character#forDigit(int, int)
   6475      * @see     Character#isDigit(int)
   6476      * @since   1.5
   6477      */
   6478     public static int digit(int codePoint, int radix) {
   6479         if (radix < MIN_RADIX || radix > MAX_RADIX) {
   6480             return -1;
   6481         }
   6482         if (codePoint < 128) {
   6483             // Optimized for ASCII
   6484             int result = -1;
   6485             if ('0' <= codePoint && codePoint <= '9') {
   6486                 result = codePoint - '0';
   6487             } else if ('a' <= codePoint && codePoint <= 'z') {
   6488                 result = 10 + (codePoint - 'a');
   6489             } else if ('A' <= codePoint && codePoint <= 'Z') {
   6490                 result = 10 + (codePoint - 'A');
   6491             }
   6492             return result < radix ? result : -1;
   6493         }
   6494         return digitImpl(codePoint, radix);
   6495     }
   6496 
   6497     native static int digitImpl(int codePoint, int radix);
   6498 
   6499     /**
   6500      * Returns the {@code int} value that the specified Unicode
   6501      * character represents. For example, the character
   6502      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
   6503      * an int with a value of 50.
   6504      * <p>
   6505      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
   6506      * {@code '\u005Cu005A'}), lowercase
   6507      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
   6508      * full width variant ({@code '\u005CuFF21'} through
   6509      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
   6510      * {@code '\u005CuFF5A'}) forms have numeric values from 10
   6511      * through 35. This is independent of the Unicode specification,
   6512      * which does not assign numeric values to these {@code char}
   6513      * values.
   6514      * <p>
   6515      * If the character does not have a numeric value, then -1 is returned.
   6516      * If the character has a numeric value that cannot be represented as a
   6517      * nonnegative integer (for example, a fractional value), then -2
   6518      * is returned.
   6519      *
   6520      * <p><b>Note:</b> This method cannot handle <a
   6521      * href="#supplementary"> supplementary characters</a>. To support
   6522      * all Unicode characters, including supplementary characters, use
   6523      * the {@link #getNumericValue(int)} method.
   6524      *
   6525      * @param   ch      the character to be converted.
   6526      * @return  the numeric value of the character, as a nonnegative {@code int}
   6527      *           value; -2 if the character has a numeric value that is not a
   6528      *          nonnegative integer; -1 if the character has no numeric value.
   6529      * @see     Character#forDigit(int, int)
   6530      * @see     Character#isDigit(char)
   6531      * @since   1.1
   6532      */
   6533     public static int getNumericValue(char ch) {
   6534         return getNumericValue((int)ch);
   6535     }
   6536 
   6537     /**
   6538      * Returns the {@code int} value that the specified
   6539      * character (Unicode code point) represents. For example, the character
   6540      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
   6541      * an {@code int} with a value of 50.
   6542      * <p>
   6543      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
   6544      * {@code '\u005Cu005A'}), lowercase
   6545      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
   6546      * full width variant ({@code '\u005CuFF21'} through
   6547      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
   6548      * {@code '\u005CuFF5A'}) forms have numeric values from 10
   6549      * through 35. This is independent of the Unicode specification,
   6550      * which does not assign numeric values to these {@code char}
   6551      * values.
   6552      * <p>
   6553      * If the character does not have a numeric value, then -1 is returned.
   6554      * If the character has a numeric value that cannot be represented as a
   6555      * nonnegative integer (for example, a fractional value), then -2
   6556      * is returned.
   6557      *
   6558      * @param   codePoint the character (Unicode code point) to be converted.
   6559      * @return  the numeric value of the character, as a nonnegative {@code int}
   6560      *          value; -2 if the character has a numeric value that is not a
   6561      *          nonnegative integer; -1 if the character has no numeric value.
   6562      * @see     Character#forDigit(int, int)
   6563      * @see     Character#isDigit(int)
   6564      * @since   1.5
   6565      */
   6566     public static int getNumericValue(int codePoint) {
   6567         // This is both an optimization and papers over differences between Java and ICU.
   6568         if (codePoint < 128) {
   6569             if (codePoint >= '0' && codePoint <= '9') {
   6570                 return codePoint - '0';
   6571             }
   6572             if (codePoint >= 'a' && codePoint <= 'z') {
   6573                 return codePoint - ('a' - 10);
   6574             }
   6575             if (codePoint >= 'A' && codePoint <= 'Z') {
   6576                 return codePoint - ('A' - 10);
   6577             }
   6578             return -1;
   6579         }
   6580         // Full-width uppercase A-Z.
   6581         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
   6582             return codePoint - 0xff17;
   6583         }
   6584         // Full-width lowercase a-z.
   6585         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
   6586             return codePoint - 0xff37;
   6587         }
   6588         return getNumericValueImpl(codePoint);
   6589     }
   6590 
   6591     native static int getNumericValueImpl(int codePoint);
   6592 
   6593     /**
   6594      * Determines if the specified character is ISO-LATIN-1 white space.
   6595      * This method returns {@code true} for the following five
   6596      * characters only:
   6597      * <table>
   6598      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
   6599      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
   6600      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
   6601      *     <td>{@code NEW LINE}</td></tr>
   6602      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
   6603      *     <td>{@code FORM FEED}</td></tr>
   6604      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
   6605      *     <td>{@code CARRIAGE RETURN}</td></tr>
   6606      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
   6607      *     <td>{@code SPACE}</td></tr>
   6608      * </table>
   6609      *
   6610      * @param      ch   the character to be tested.
   6611      * @return     {@code true} if the character is ISO-LATIN-1 white
   6612      *             space; {@code false} otherwise.
   6613      * @see        Character#isSpaceChar(char)
   6614      * @see        Character#isWhitespace(char)
   6615      * @deprecated Replaced by isWhitespace(char).
   6616      */
   6617     @Deprecated
   6618     public static boolean isSpace(char ch) {
   6619         return (ch <= 0x0020) &&
   6620             (((((1L << 0x0009) |
   6621             (1L << 0x000A) |
   6622             (1L << 0x000C) |
   6623             (1L << 0x000D) |
   6624             (1L << 0x0020)) >> ch) & 1L) != 0);
   6625     }
   6626 
   6627 
   6628     /**
   6629      * Determines if the specified character is a Unicode space character.
   6630      * A character is considered to be a space character if and only if
   6631      * it is specified to be a space character by the Unicode Standard. This
   6632      * method returns true if the character's general category type is any of
   6633      * the following:
   6634      * <ul>
   6635      * <li> {@code SPACE_SEPARATOR}
   6636      * <li> {@code LINE_SEPARATOR}
   6637      * <li> {@code PARAGRAPH_SEPARATOR}
   6638      * </ul>
   6639      *
   6640      * <p><b>Note:</b> This method cannot handle <a
   6641      * href="#supplementary"> supplementary characters</a>. To support
   6642      * all Unicode characters, including supplementary characters, use
   6643      * the {@link #isSpaceChar(int)} method.
   6644      *
   6645      * @param   ch      the character to be tested.
   6646      * @return  {@code true} if the character is a space character;
   6647      *          {@code false} otherwise.
   6648      * @see     Character#isWhitespace(char)
   6649      * @since   1.1
   6650      */
   6651     public static boolean isSpaceChar(char ch) {
   6652         return isSpaceChar((int)ch);
   6653     }
   6654 
   6655     /**
   6656      * Determines if the specified character (Unicode code point) is a
   6657      * Unicode space character.  A character is considered to be a
   6658      * space character if and only if it is specified to be a space
   6659      * character by the Unicode Standard. This method returns true if
   6660      * the character's general category type is any of the following:
   6661      *
   6662      * <ul>
   6663      * <li> {@link #SPACE_SEPARATOR}
   6664      * <li> {@link #LINE_SEPARATOR}
   6665      * <li> {@link #PARAGRAPH_SEPARATOR}
   6666      * </ul>
   6667      *
   6668      * @param   codePoint the character (Unicode code point) to be tested.
   6669      * @return  {@code true} if the character is a space character;
   6670      *          {@code false} otherwise.
   6671      * @see     Character#isWhitespace(int)
   6672      * @since   1.5
   6673      */
   6674     public static boolean isSpaceChar(int codePoint) {
   6675         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
   6676         // SPACE or NO-BREAK SPACE?
   6677         if (codePoint == 0x20 || codePoint == 0xa0) {
   6678             return true;
   6679         }
   6680         if (codePoint < 0x1000) {
   6681             return false;
   6682         }
   6683         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
   6684         if (codePoint == 0x1680 || codePoint == 0x180e) {
   6685             return true;
   6686         }
   6687         if (codePoint < 0x2000) {
   6688             return false;
   6689         }
   6690         if (codePoint <= 0xffff) {
   6691             // Other whitespace from General Punctuation...
   6692             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
   6693                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
   6694         }
   6695         // Let icu4c worry about non-BMP code points.
   6696         return isSpaceCharImpl(codePoint);
   6697     }
   6698 
   6699     static native boolean isSpaceCharImpl(int codePoint);
   6700 
   6701     /**
   6702      * Determines if the specified character is white space according to Java.
   6703      * A character is a Java whitespace character if and only if it satisfies
   6704      * one of the following criteria:
   6705      * <ul>
   6706      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
   6707      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
   6708      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
   6709      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
   6710      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
   6711      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
   6712      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
   6713      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
   6714      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
   6715      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
   6716      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
   6717      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
   6718      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
   6719      * </ul>
   6720      *
   6721      * <p><b>Note:</b> This method cannot handle <a
   6722      * href="#supplementary"> supplementary characters</a>. To support
   6723      * all Unicode characters, including supplementary characters, use
   6724      * the {@link #isWhitespace(int)} method.
   6725      *
   6726      * @param   ch the character to be tested.
   6727      * @return  {@code true} if the character is a Java whitespace
   6728      *          character; {@code false} otherwise.
   6729      * @see     Character#isSpaceChar(char)
   6730      * @since   1.1
   6731      */
   6732     public static boolean isWhitespace(char ch) {
   6733         return isWhitespace((int)ch);
   6734     }
   6735 
   6736     /**
   6737      * Determines if the specified character (Unicode code point) is
   6738      * white space according to Java.  A character is a Java
   6739      * whitespace character if and only if it satisfies one of the
   6740      * following criteria:
   6741      * <ul>
   6742      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
   6743      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
   6744      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
   6745      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
   6746      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
   6747      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
   6748      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
   6749      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
   6750      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
   6751      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
   6752      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
   6753      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
   6754      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
   6755      * </ul>
   6756      * <p>
   6757      *
   6758      * @param   codePoint the character (Unicode code point) to be tested.
   6759      * @return  {@code true} if the character is a Java whitespace
   6760      *          character; {@code false} otherwise.
   6761      * @see     Character#isSpaceChar(int)
   6762      * @since   1.5
   6763      */
   6764     public static boolean isWhitespace(int codePoint) {
   6765         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
   6766         // Any ASCII whitespace character?
   6767         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
   6768             return true;
   6769         }
   6770         if (codePoint < 0x1000) {
   6771             return false;
   6772         }
   6773         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
   6774         if (codePoint == 0x1680 || codePoint == 0x180e) {
   6775             return true;
   6776         }
   6777         if (codePoint < 0x2000) {
   6778             return false;
   6779         }
   6780         // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
   6781         if (codePoint == 0x2007 || codePoint == 0x202f) {
   6782             return false;
   6783         }
   6784         if (codePoint <= 0xffff) {
   6785             // Other whitespace from General Punctuation...
   6786             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
   6787                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
   6788         }
   6789         // Let icu4c worry about non-BMP code points.
   6790         return isWhitespaceImpl(codePoint);
   6791     }
   6792 
   6793     native static boolean isWhitespaceImpl(int codePoint);
   6794 
   6795     /**
   6796      * Determines if the specified character is an ISO control
   6797      * character.  A character is considered to be an ISO control
   6798      * character if its code is in the range {@code '\u005Cu0000'}
   6799      * through {@code '\u005Cu001F'} or in the range
   6800      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
   6801      *
   6802      * <p><b>Note:</b> This method cannot handle <a
   6803      * href="#supplementary"> supplementary characters</a>. To support
   6804      * all Unicode characters, including supplementary characters, use
   6805      * the {@link #isISOControl(int)} method.
   6806      *
   6807      * @param   ch      the character to be tested.
   6808      * @return  {@code true} if the character is an ISO control character;
   6809      *          {@code false} otherwise.
   6810      *
   6811      * @see     Character#isSpaceChar(char)
   6812      * @see     Character#isWhitespace(char)
   6813      * @since   1.1
   6814      */
   6815     public static boolean isISOControl(char ch) {
   6816         return isISOControl((int)ch);
   6817     }
   6818 
   6819     /**
   6820      * Determines if the referenced character (Unicode code point) is an ISO control
   6821      * character.  A character is considered to be an ISO control
   6822      * character if its code is in the range {@code '\u005Cu0000'}
   6823      * through {@code '\u005Cu001F'} or in the range
   6824      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
   6825      *
   6826      * @param   codePoint the character (Unicode code point) to be tested.
   6827      * @return  {@code true} if the character is an ISO control character;
   6828      *          {@code false} otherwise.
   6829      * @see     Character#isSpaceChar(int)
   6830      * @see     Character#isWhitespace(int)
   6831      * @since   1.5
   6832      */
   6833     public static boolean isISOControl(int codePoint) {
   6834         // Optimized form of:
   6835         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
   6836         //     (codePoint >= 0x7F && codePoint <= 0x9F);
   6837         return codePoint <= 0x9F &&
   6838             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
   6839     }
   6840 
   6841     /**
   6842      * Returns a value indicating a character's general category.
   6843      *
   6844      * <p><b>Note:</b> This method cannot handle <a
   6845      * href="#supplementary"> supplementary characters</a>. To support
   6846      * all Unicode characters, including supplementary characters, use
   6847      * the {@link #getType(int)} method.
   6848      *
   6849      * @param   ch      the character to be tested.
   6850      * @return  a value of type {@code int} representing the
   6851      *          character's general category.
   6852      * @see     Character#COMBINING_SPACING_MARK
   6853      * @see     Character#CONNECTOR_PUNCTUATION
   6854      * @see     Character#CONTROL
   6855      * @see     Character#CURRENCY_SYMBOL
   6856      * @see     Character#DASH_PUNCTUATION
   6857      * @see     Character#DECIMAL_DIGIT_NUMBER
   6858      * @see     Character#ENCLOSING_MARK
   6859      * @see     Character#END_PUNCTUATION
   6860      * @see     Character#FINAL_QUOTE_PUNCTUATION
   6861      * @see     Character#FORMAT
   6862      * @see     Character#INITIAL_QUOTE_PUNCTUATION
   6863      * @see     Character#LETTER_NUMBER
   6864      * @see     Character#LINE_SEPARATOR
   6865      * @see     Character#LOWERCASE_LETTER
   6866      * @see     Character#MATH_SYMBOL
   6867      * @see     Character#MODIFIER_LETTER
   6868      * @see     Character#MODIFIER_SYMBOL
   6869      * @see     Character#NON_SPACING_MARK
   6870      * @see     Character#OTHER_LETTER
   6871      * @see     Character#OTHER_NUMBER
   6872      * @see     Character#OTHER_PUNCTUATION
   6873      * @see     Character#OTHER_SYMBOL
   6874      * @see     Character#PARAGRAPH_SEPARATOR
   6875      * @see     Character#PRIVATE_USE
   6876      * @see     Character#SPACE_SEPARATOR
   6877      * @see     Character#START_PUNCTUATION
   6878      * @see     Character#SURROGATE
   6879      * @see     Character#TITLECASE_LETTER
   6880      * @see     Character#UNASSIGNED
   6881      * @see     Character#UPPERCASE_LETTER
   6882      * @since   1.1
   6883      */
   6884     public static int getType(char ch) {
   6885         return getType((int)ch);
   6886     }
   6887 
   6888     /**
   6889      * Returns a value indicating a character's general category.
   6890      *
   6891      * @param   codePoint the character (Unicode code point) to be tested.
   6892      * @return  a value of type {@code int} representing the
   6893      *          character's general category.
   6894      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
   6895      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
   6896      * @see     Character#CONTROL CONTROL
   6897      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
   6898      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
   6899      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
   6900      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
   6901      * @see     Character#END_PUNCTUATION END_PUNCTUATION
   6902      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
   6903      * @see     Character#FORMAT FORMAT
   6904      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
   6905      * @see     Character#LETTER_NUMBER LETTER_NUMBER
   6906      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
   6907      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
   6908      * @see     Character#MATH_SYMBOL MATH_SYMBOL
   6909      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
   6910      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
   6911      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
   6912      * @see     Character#OTHER_LETTER OTHER_LETTER
   6913      * @see     Character#OTHER_NUMBER OTHER_NUMBER
   6914      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
   6915      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
   6916      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
   6917      * @see     Character#PRIVATE_USE PRIVATE_USE
   6918      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
   6919      * @see     Character#START_PUNCTUATION START_PUNCTUATION
   6920      * @see     Character#SURROGATE SURROGATE
   6921      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
   6922      * @see     Character#UNASSIGNED UNASSIGNED
   6923      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
   6924      * @since   1.5
   6925      */
   6926     public static int getType(int codePoint) {
   6927         int type = getTypeImpl(codePoint);
   6928         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
   6929         if (type <= Character.FORMAT) {
   6930             return type;
   6931         }
   6932         return (type + 1);
   6933     }
   6934 
   6935     static native int getTypeImpl(int codePoint);
   6936 
   6937     /**
   6938      * Determines the character representation for a specific digit in
   6939      * the specified radix. If the value of {@code radix} is not a
   6940      * valid radix, or the value of {@code digit} is not a valid
   6941      * digit in the specified radix, the null character
   6942      * ({@code '\u005Cu0000'}) is returned.
   6943      * <p>
   6944      * The {@code radix} argument is valid if it is greater than or
   6945      * equal to {@code MIN_RADIX} and less than or equal to
   6946      * {@code MAX_RADIX}. The {@code digit} argument is valid if
   6947      * {@code 0 <= digit < radix}.
   6948      * <p>
   6949      * If the digit is less than 10, then
   6950      * {@code '0' + digit} is returned. Otherwise, the value
   6951      * {@code 'a' + digit - 10} is returned.
   6952      *
   6953      * @param   digit   the number to convert to a character.
   6954      * @param   radix   the radix.
   6955      * @return  the {@code char} representation of the specified digit
   6956      *          in the specified radix.
   6957      * @see     Character#MIN_RADIX
   6958      * @see     Character#MAX_RADIX
   6959      * @see     Character#digit(char, int)
   6960      */
   6961     public static char forDigit(int digit, int radix) {
   6962         if ((digit >= radix) || (digit < 0)) {
   6963             return '\0';
   6964         }
   6965         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
   6966             return '\0';
   6967         }
   6968         if (digit < 10) {
   6969             return (char)('0' + digit);
   6970         }
   6971         return (char)('a' - 10 + digit);
   6972     }
   6973 
   6974     /**
   6975      * Returns the Unicode directionality property for the given
   6976      * character.  Character directionality is used to calculate the
   6977      * visual ordering of text. The directionality value of undefined
   6978      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
   6979      *
   6980      * <p><b>Note:</b> This method cannot handle <a
   6981      * href="#supplementary"> supplementary characters</a>. To support
   6982      * all Unicode characters, including supplementary characters, use
   6983      * the {@link #getDirectionality(int)} method.
   6984      *
   6985      * @param  ch {@code char} for which the directionality property
   6986      *            is requested.
   6987      * @return the directionality property of the {@code char} value.
   6988      *
   6989      * @see Character#DIRECTIONALITY_UNDEFINED
   6990      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
   6991      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
   6992      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
   6993      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
   6994      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
   6995      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
   6996      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
   6997      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
   6998      * @see Character#DIRECTIONALITY_NONSPACING_MARK
   6999      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
   7000      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
   7001      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
   7002      * @see Character#DIRECTIONALITY_WHITESPACE
   7003      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
   7004      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
   7005      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
   7006      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
   7007      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
   7008      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
   7009      * @since 1.4
   7010      */
   7011     public static byte getDirectionality(char ch) {
   7012         return getDirectionality((int)ch);
   7013     }
   7014 
   7015     /**
   7016      * Returns the Unicode directionality property for the given
   7017      * character (Unicode code point).  Character directionality is
   7018      * used to calculate the visual ordering of text. The
   7019      * directionality value of undefined character is {@link
   7020      * #DIRECTIONALITY_UNDEFINED}.
   7021      *
   7022      * @param   codePoint the character (Unicode code point) for which
   7023      *          the directionality property is requested.
   7024      * @return the directionality property of the character.
   7025      *
   7026      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
   7027      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
   7028      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
   7029      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
   7030      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
   7031      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
   7032      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
   7033      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
   7034      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
   7035      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
   7036      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
   7037      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
   7038      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
   7039      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
   7040      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
   7041      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
   7042      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
   7043      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
   7044      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
   7045      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
   7046      * @since    1.5
   7047      */
   7048     public static byte getDirectionality(int codePoint) {
   7049         if (getType(codePoint) == Character.UNASSIGNED) {
   7050             return Character.DIRECTIONALITY_UNDEFINED;
   7051         }
   7052 
   7053         byte directionality = getDirectionalityImpl(codePoint);
   7054         if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
   7055             return DIRECTIONALITY[directionality];
   7056         }
   7057         return Character.DIRECTIONALITY_UNDEFINED;
   7058     }
   7059 
   7060     native static byte getDirectionalityImpl(int codePoint);
   7061     /**
   7062      * Determines whether the character is mirrored according to the
   7063      * Unicode specification.  Mirrored characters should have their
   7064      * glyphs horizontally mirrored when displayed in text that is
   7065      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
   7066      * PARENTHESIS is semantically defined to be an <i>opening
   7067      * parenthesis</i>.  This will appear as a "(" in text that is
   7068      * left-to-right but as a ")" in text that is right-to-left.
   7069      *
   7070      * <p><b>Note:</b> This method cannot handle <a
   7071      * href="#supplementary"> supplementary characters</a>. To support
   7072      * all Unicode characters, including supplementary characters, use
   7073      * the {@link #isMirrored(int)} method.
   7074      *
   7075      * @param  ch {@code char} for which the mirrored property is requested
   7076      * @return {@code true} if the char is mirrored, {@code false}
   7077      *         if the {@code char} is not mirrored or is not defined.
   7078      * @since 1.4
   7079      */
   7080     public static boolean isMirrored(char ch) {
   7081         return isMirrored((int)ch);
   7082     }
   7083 
   7084     /**
   7085      * Determines whether the specified character (Unicode code point)
   7086      * is mirrored according to the Unicode specification.  Mirrored
   7087      * characters should have their glyphs horizontally mirrored when
   7088      * displayed in text that is right-to-left.  For example,
   7089      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
   7090      * defined to be an <i>opening parenthesis</i>.  This will appear
   7091      * as a "(" in text that is left-to-right but as a ")" in text
   7092      * that is right-to-left.
   7093      *
   7094      * @param   codePoint the character (Unicode code point) to be tested.
   7095      * @return  {@code true} if the character is mirrored, {@code false}
   7096      *          if the character is not mirrored or is not defined.
   7097      * @since   1.5
   7098      */
   7099     public static boolean isMirrored(int codePoint) {
   7100         return isMirroredImpl(codePoint);
   7101     }
   7102 
   7103     native static boolean isMirroredImpl(int codePoint);
   7104     /**
   7105      * Compares two {@code Character} objects numerically.
   7106      *
   7107      * @param   anotherCharacter   the {@code Character} to be compared.
   7108 
   7109      * @return  the value {@code 0} if the argument {@code Character}
   7110      *          is equal to this {@code Character}; a value less than
   7111      *          {@code 0} if this {@code Character} is numerically less
   7112      *          than the {@code Character} argument; and a value greater than
   7113      *          {@code 0} if this {@code Character} is numerically greater
   7114      *          than the {@code Character} argument (unsigned comparison).
   7115      *          Note that this is strictly a numerical comparison; it is not
   7116      *          locale-dependent.
   7117      * @since   1.2
   7118      */
   7119     public int compareTo(Character anotherCharacter) {
   7120         return compare(this.value, anotherCharacter.value);
   7121     }
   7122 
   7123     /**
   7124      * Compares two {@code char} values numerically.
   7125      * The value returned is identical to what would be returned by:
   7126      * <pre>
   7127      *    Character.valueOf(x).compareTo(Character.valueOf(y))
   7128      * </pre>
   7129      *
   7130      * @param  x the first {@code char} to compare
   7131      * @param  y the second {@code char} to compare
   7132      * @return the value {@code 0} if {@code x == y};
   7133      *         a value less than {@code 0} if {@code x < y}; and
   7134      *         a value greater than {@code 0} if {@code x > y}
   7135      * @since 1.7
   7136      */
   7137     public static int compare(char x, char y) {
   7138         return x - y;
   7139     }
   7140 
   7141     /**
   7142      * The number of bits used to represent a <tt>char</tt> value in unsigned
   7143      * binary form, constant {@code 16}.
   7144      *
   7145      * @since 1.5
   7146      */
   7147     public static final int SIZE = 16;
   7148 
   7149     /**
   7150      * The number of bytes used to represent a {@code char} value in unsigned
   7151      * binary form.
   7152      *
   7153      * @since 1.8
   7154      */
   7155     public static final int BYTES = SIZE / Byte.SIZE;
   7156 
   7157     /**
   7158      * Returns the value obtained by reversing the order of the bytes in the
   7159      * specified <tt>char</tt> value.
   7160      *
   7161      * @return the value obtained by reversing (or, equivalently, swapping)
   7162      *     the bytes in the specified <tt>char</tt> value.
   7163      * @since 1.5
   7164      */
   7165     public static char reverseBytes(char ch) {
   7166         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
   7167     }
   7168 
   7169     /**
   7170      * Returns the Unicode name of the specified character
   7171      * {@code codePoint}, or null if the code point is
   7172      * {@link #UNASSIGNED unassigned}.
   7173      * <p>
   7174      * Note: if the specified character is not assigned a name by
   7175      * the <i>UnicodeData</i> file (part of the Unicode Character
   7176      * Database maintained by the Unicode Consortium), the returned
   7177      * name is the same as the result of expression.
   7178      *
   7179      * <blockquote>{@code
   7180      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
   7181      *     + " "
   7182      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
   7183      *
   7184      * }</blockquote>
   7185      *
   7186      * @param  codePoint the character (Unicode code point)
   7187      *
   7188      * @return the Unicode name of the specified character, or null if
   7189      *         the code point is unassigned.
   7190      *
   7191      * @exception IllegalArgumentException if the specified
   7192      *            {@code codePoint} is not a valid Unicode
   7193      *            code point.
   7194      *
   7195      * @since 1.7
   7196      */
   7197     public static String getName(int codePoint) {
   7198         if (!isValidCodePoint(codePoint)) {
   7199             throw new IllegalArgumentException();
   7200         }
   7201         String name = getNameImpl(codePoint);
   7202         if (name != null)
   7203             return name;
   7204         if (getType(codePoint) == UNASSIGNED)
   7205             return null;
   7206         UnicodeBlock block = UnicodeBlock.of(codePoint);
   7207         if (block != null)
   7208             return block.toString().replace('_', ' ') + " "
   7209                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
   7210         // should never come here
   7211         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
   7212     }
   7213 
   7214     private static native String getNameImpl(int codePoint);
   7215 }
   7216