Home | History | Annotate | Download | only in lang
      1 /*
      2  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
      3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      4  *
      5  * This code is free software; you can redistribute it and/or modify it
      6  * under the terms of the GNU General Public License version 2 only, as
      7  * published by the Free Software Foundation.  Oracle designates this
      8  * particular file as subject to the "Classpath" exception as provided
      9  * by Oracle in the LICENSE file that accompanied this code.
     10  *
     11  * This code is distributed in the hope that it will be useful, but WITHOUT
     12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     14  * version 2 for more details (a copy is included in the LICENSE file that
     15  * accompanied this code).
     16  *
     17  * You should have received a copy of the GNU General Public License version
     18  * 2 along with this work; if not, write to the Free Software Foundation,
     19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     20  *
     21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     22  * or visit www.oracle.com if you need additional information or have any
     23  * questions.
     24  */
     25 
     26 package java.lang;
     27 
     28 import dalvik.annotation.optimization.FastNative;
     29 import java.util.Arrays;
     30 import java.util.HashMap;
     31 import java.util.Locale;
     32 import java.util.Map;
     33 
     34 /**
     35  * The {@code Character} class wraps a value of the primitive
     36  * type {@code char} in an object. An object of type
     37  * {@code Character} contains a single field whose type is
     38  * {@code char}.
     39  * <p>
     40  * In addition, this class provides several methods for determining
     41  * a character's category (lowercase letter, digit, etc.) and for converting
     42  * characters from uppercase to lowercase and vice versa.
     43  * <p>
     44  * Character information is based on the Unicode Standard, version 6.2.0.
     45  * <p>
     46  * The methods and data of class {@code Character} are defined by
     47  * the information in the <i>UnicodeData</i> file that is part of the
     48  * Unicode Character Database maintained by the Unicode
     49  * Consortium. This file specifies various properties including name
     50  * and general category for every defined Unicode code point or
     51  * character range.
     52  * <p>
     53  * The file and its description are available from the Unicode Consortium at:
     54  * <ul>
     55  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
     56  * </ul>
     57  *
     58  * <h3><a name="unicode">Unicode Character Representations</a></h3>
     59  *
     60  * <p>The {@code char} data type (and therefore the value that a
     61  * {@code Character} object encapsulates) are based on the
     62  * original Unicode specification, which defined characters as
     63  * fixed-width 16-bit entities. The Unicode Standard has since been
     64  * changed to allow for characters whose representation requires more
     65  * than 16 bits.  The range of legal <em>code point</em>s is now
     66  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
     67  * (Refer to the <a
     68  * href="http://www.unicode.org/reports/tr27/#notation"><i>
     69  * definition</i></a> of the U+<i>n</i> notation in the Unicode
     70  * Standard.)
     71  *
     72  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
     73  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
     74  * <a name="supplementary">Characters</a> whose code points are greater
     75  * than U+FFFF are called <em>supplementary character</em>s.  The Java
     76  * platform uses the UTF-16 representation in {@code char} arrays and
     77  * in the {@code String} and {@code StringBuffer} classes. In
     78  * this representation, supplementary characters are represented as a pair
     79  * of {@code char} values, the first from the <em>high-surrogates</em>
     80  * range, (&#92;uD800-&#92;uDBFF), the second from the
     81  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
     82  *
     83  * <p>A {@code char} value, therefore, represents Basic
     84  * Multilingual Plane (BMP) code points, including the surrogate
     85  * code points, or code units of the UTF-16 encoding. An
     86  * {@code int} value represents all Unicode code points,
     87  * including supplementary code points. The lower (least significant)
     88  * 21 bits of {@code int} are used to represent Unicode code
     89  * points and the upper (most significant) 11 bits must be zero.
     90  * Unless otherwise specified, the behavior with respect to
     91  * supplementary characters and surrogate {@code char} values is
     92  * as follows:
     93  *
     94  * <ul>
     95  * <li>The methods that only accept a {@code char} value cannot support
     96  * supplementary characters. They treat {@code char} values from the
     97  * surrogate ranges as undefined characters. For example,
     98  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
     99  * this specific value if followed by any low-surrogate value in a string
    100  * would represent a letter.
    101  *
    102  * <li>The methods that accept an {@code int} value support all
    103  * Unicode characters, including supplementary characters. For
    104  * example, {@code Character.isLetter(0x2F81A)} returns
    105  * {@code true} because the code point value represents a letter
    106  * (a CJK ideograph).
    107  * </ul>
    108  *
    109  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
    110  * used for character values in the range between U+0000 and U+10FFFF,
    111  * and <em>Unicode code unit</em> is used for 16-bit
    112  * {@code char} values that are code units of the <em>UTF-16</em>
    113  * encoding. For more information on Unicode terminology, refer to the
    114  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
    115  *
    116  * @author  Lee Boynton
    117  * @author  Guy Steele
    118  * @author  Akira Tanaka
    119  * @author  Martin Buchholz
    120  * @author  Ulf Zibis
    121  * @since   1.0
    122  */
    123 public final
    124 class Character implements java.io.Serializable, Comparable<Character> {
    125     /**
    126      * The minimum radix available for conversion to and from strings.
    127      * The constant value of this field is the smallest value permitted
    128      * for the radix argument in radix-conversion methods such as the
    129      * {@code digit} method, the {@code forDigit} method, and the
    130      * {@code toString} method of class {@code Integer}.
    131      *
    132      * @see     Character#digit(char, int)
    133      * @see     Character#forDigit(int, int)
    134      * @see     Integer#toString(int, int)
    135      * @see     Integer#valueOf(String)
    136      */
    137     public static final int MIN_RADIX = 2;
    138 
    139     /**
    140      * The maximum radix available for conversion to and from strings.
    141      * The constant value of this field is the largest value permitted
    142      * for the radix argument in radix-conversion methods such as the
    143      * {@code digit} method, the {@code forDigit} method, and the
    144      * {@code toString} method of class {@code Integer}.
    145      *
    146      * @see     Character#digit(char, int)
    147      * @see     Character#forDigit(int, int)
    148      * @see     Integer#toString(int, int)
    149      * @see     Integer#valueOf(String)
    150      */
    151     public static final int MAX_RADIX = 36;
    152 
    153     /**
    154      * The constant value of this field is the smallest value of type
    155      * {@code char}, {@code '\u005Cu0000'}.
    156      *
    157      * @since   1.0.2
    158      */
    159     public static final char MIN_VALUE = '\u0000';
    160 
    161     /**
    162      * The constant value of this field is the largest value of type
    163      * {@code char}, {@code '\u005CuFFFF'}.
    164      *
    165      * @since   1.0.2
    166      */
    167     public static final char MAX_VALUE = '\uFFFF';
    168 
    169     /**
    170      * The {@code Class} instance representing the primitive type
    171      * {@code char}.
    172      *
    173      * @since   1.1
    174      */
    175     @SuppressWarnings("unchecked")
    176     public static final Class<Character> TYPE = (Class<Character>) char[].class.getComponentType();
    177 
    178     /*
    179      * Normative general types
    180      */
    181 
    182     /*
    183      * General character types
    184      */
    185 
    186     /**
    187      * General category "Cn" in the Unicode specification.
    188      * @since   1.1
    189      */
    190     public static final byte UNASSIGNED = 0;
    191 
    192     /**
    193      * General category "Lu" in the Unicode specification.
    194      * @since   1.1
    195      */
    196     public static final byte UPPERCASE_LETTER = 1;
    197 
    198     /**
    199      * General category "Ll" in the Unicode specification.
    200      * @since   1.1
    201      */
    202     public static final byte LOWERCASE_LETTER = 2;
    203 
    204     /**
    205      * General category "Lt" in the Unicode specification.
    206      * @since   1.1
    207      */
    208     public static final byte TITLECASE_LETTER = 3;
    209 
    210     /**
    211      * General category "Lm" in the Unicode specification.
    212      * @since   1.1
    213      */
    214     public static final byte MODIFIER_LETTER = 4;
    215 
    216     /**
    217      * General category "Lo" in the Unicode specification.
    218      * @since   1.1
    219      */
    220     public static final byte OTHER_LETTER = 5;
    221 
    222     /**
    223      * General category "Mn" in the Unicode specification.
    224      * @since   1.1
    225      */
    226     public static final byte NON_SPACING_MARK = 6;
    227 
    228     /**
    229      * General category "Me" in the Unicode specification.
    230      * @since   1.1
    231      */
    232     public static final byte ENCLOSING_MARK = 7;
    233 
    234     /**
    235      * General category "Mc" in the Unicode specification.
    236      * @since   1.1
    237      */
    238     public static final byte COMBINING_SPACING_MARK = 8;
    239 
    240     /**
    241      * General category "Nd" in the Unicode specification.
    242      * @since   1.1
    243      */
    244     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
    245 
    246     /**
    247      * General category "Nl" in the Unicode specification.
    248      * @since   1.1
    249      */
    250     public static final byte LETTER_NUMBER = 10;
    251 
    252     /**
    253      * General category "No" in the Unicode specification.
    254      * @since   1.1
    255      */
    256     public static final byte OTHER_NUMBER = 11;
    257 
    258     /**
    259      * General category "Zs" in the Unicode specification.
    260      * @since   1.1
    261      */
    262     public static final byte SPACE_SEPARATOR = 12;
    263 
    264     /**
    265      * General category "Zl" in the Unicode specification.
    266      * @since   1.1
    267      */
    268     public static final byte LINE_SEPARATOR = 13;
    269 
    270     /**
    271      * General category "Zp" in the Unicode specification.
    272      * @since   1.1
    273      */
    274     public static final byte PARAGRAPH_SEPARATOR = 14;
    275 
    276     /**
    277      * General category "Cc" in the Unicode specification.
    278      * @since   1.1
    279      */
    280     public static final byte CONTROL = 15;
    281 
    282     /**
    283      * General category "Cf" in the Unicode specification.
    284      * @since   1.1
    285      */
    286     public static final byte FORMAT = 16;
    287 
    288     /**
    289      * General category "Co" in the Unicode specification.
    290      * @since   1.1
    291      */
    292     public static final byte PRIVATE_USE = 18;
    293 
    294     /**
    295      * General category "Cs" in the Unicode specification.
    296      * @since   1.1
    297      */
    298     public static final byte SURROGATE = 19;
    299 
    300     /**
    301      * General category "Pd" in the Unicode specification.
    302      * @since   1.1
    303      */
    304     public static final byte DASH_PUNCTUATION = 20;
    305 
    306     /**
    307      * General category "Ps" in the Unicode specification.
    308      * @since   1.1
    309      */
    310     public static final byte START_PUNCTUATION = 21;
    311 
    312     /**
    313      * General category "Pe" in the Unicode specification.
    314      * @since   1.1
    315      */
    316     public static final byte END_PUNCTUATION = 22;
    317 
    318     /**
    319      * General category "Pc" in the Unicode specification.
    320      * @since   1.1
    321      */
    322     public static final byte CONNECTOR_PUNCTUATION = 23;
    323 
    324     /**
    325      * General category "Po" in the Unicode specification.
    326      * @since   1.1
    327      */
    328     public static final byte OTHER_PUNCTUATION = 24;
    329 
    330     /**
    331      * General category "Sm" in the Unicode specification.
    332      * @since   1.1
    333      */
    334     public static final byte MATH_SYMBOL = 25;
    335 
    336     /**
    337      * General category "Sc" in the Unicode specification.
    338      * @since   1.1
    339      */
    340     public static final byte CURRENCY_SYMBOL = 26;
    341 
    342     /**
    343      * General category "Sk" in the Unicode specification.
    344      * @since   1.1
    345      */
    346     public static final byte MODIFIER_SYMBOL = 27;
    347 
    348     /**
    349      * General category "So" in the Unicode specification.
    350      * @since   1.1
    351      */
    352     public static final byte OTHER_SYMBOL = 28;
    353 
    354     /**
    355      * General category "Pi" in the Unicode specification.
    356      * @since   1.4
    357      */
    358     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
    359 
    360     /**
    361      * General category "Pf" in the Unicode specification.
    362      * @since   1.4
    363      */
    364     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
    365 
    366     /**
    367      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
    368      */
    369     static final int ERROR = 0xFFFFFFFF;
    370 
    371 
    372     /**
    373      * Undefined bidirectional character type. Undefined {@code char}
    374      * values have undefined directionality in the Unicode specification.
    375      * @since 1.4
    376      */
    377     public static final byte DIRECTIONALITY_UNDEFINED = -1;
    378 
    379     /**
    380      * Strong bidirectional character type "L" in the Unicode specification.
    381      * @since 1.4
    382      */
    383     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
    384 
    385     /**
    386      * Strong bidirectional character type "R" in the Unicode specification.
    387      * @since 1.4
    388      */
    389     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
    390 
    391     /**
    392     * Strong bidirectional character type "AL" in the Unicode specification.
    393      * @since 1.4
    394      */
    395     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
    396 
    397     /**
    398      * Weak bidirectional character type "EN" in the Unicode specification.
    399      * @since 1.4
    400      */
    401     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
    402 
    403     /**
    404      * Weak bidirectional character type "ES" in the Unicode specification.
    405      * @since 1.4
    406      */
    407     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
    408 
    409     /**
    410      * Weak bidirectional character type "ET" in the Unicode specification.
    411      * @since 1.4
    412      */
    413     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
    414 
    415     /**
    416      * Weak bidirectional character type "AN" in the Unicode specification.
    417      * @since 1.4
    418      */
    419     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
    420 
    421     /**
    422      * Weak bidirectional character type "CS" in the Unicode specification.
    423      * @since 1.4
    424      */
    425     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
    426 
    427     /**
    428      * Weak bidirectional character type "NSM" in the Unicode specification.
    429      * @since 1.4
    430      */
    431     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
    432 
    433     /**
    434      * Weak bidirectional character type "BN" in the Unicode specification.
    435      * @since 1.4
    436      */
    437     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
    438 
    439     /**
    440      * Neutral bidirectional character type "B" in the Unicode specification.
    441      * @since 1.4
    442      */
    443     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
    444 
    445     /**
    446      * Neutral bidirectional character type "S" in the Unicode specification.
    447      * @since 1.4
    448      */
    449     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
    450 
    451     /**
    452      * Neutral bidirectional character type "WS" in the Unicode specification.
    453      * @since 1.4
    454      */
    455     public static final byte DIRECTIONALITY_WHITESPACE = 12;
    456 
    457     /**
    458      * Neutral bidirectional character type "ON" in the Unicode specification.
    459      * @since 1.4
    460      */
    461     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
    462 
    463     /**
    464      * Strong bidirectional character type "LRE" in the Unicode specification.
    465      * @since 1.4
    466      */
    467     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
    468 
    469     /**
    470      * Strong bidirectional character type "LRO" in the Unicode specification.
    471      * @since 1.4
    472      */
    473     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
    474 
    475     /**
    476      * Strong bidirectional character type "RLE" in the Unicode specification.
    477      * @since 1.4
    478      */
    479     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
    480 
    481     /**
    482      * Strong bidirectional character type "RLO" in the Unicode specification.
    483      * @since 1.4
    484      */
    485     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
    486 
    487     /**
    488      * Weak bidirectional character type "PDF" in the Unicode specification.
    489      * @since 1.4
    490      */
    491     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
    492 
    493     /**
    494      * The minimum value of a
    495      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
    496      * Unicode high-surrogate code unit</a>
    497      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
    498      * A high-surrogate is also known as a <i>leading-surrogate</i>.
    499      *
    500      * @since 1.5
    501      */
    502     public static final char MIN_HIGH_SURROGATE = '\uD800';
    503 
    504     /**
    505      * The maximum value of a
    506      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
    507      * Unicode high-surrogate code unit</a>
    508      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
    509      * A high-surrogate is also known as a <i>leading-surrogate</i>.
    510      *
    511      * @since 1.5
    512      */
    513     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
    514 
    515     /**
    516      * The minimum value of a
    517      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
    518      * Unicode low-surrogate code unit</a>
    519      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
    520      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
    521      *
    522      * @since 1.5
    523      */
    524     public static final char MIN_LOW_SURROGATE  = '\uDC00';
    525 
    526     /**
    527      * The maximum value of a
    528      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
    529      * Unicode low-surrogate code unit</a>
    530      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
    531      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
    532      *
    533      * @since 1.5
    534      */
    535     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
    536 
    537     /**
    538      * The minimum value of a Unicode surrogate code unit in the
    539      * UTF-16 encoding, constant {@code '\u005CuD800'}.
    540      *
    541      * @since 1.5
    542      */
    543     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
    544 
    545     /**
    546      * The maximum value of a Unicode surrogate code unit in the
    547      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
    548      *
    549      * @since 1.5
    550      */
    551     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
    552 
    553     /**
    554      * The minimum value of a
    555      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
    556      * Unicode supplementary code point</a>, constant {@code U+10000}.
    557      *
    558      * @since 1.5
    559      */
    560     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
    561 
    562     /**
    563      * The minimum value of a
    564      * <a href="http://www.unicode.org/glossary/#code_point">
    565      * Unicode code point</a>, constant {@code U+0000}.
    566      *
    567      * @since 1.5
    568      */
    569     public static final int MIN_CODE_POINT = 0x000000;
    570 
    571     /**
    572      * The maximum value of a
    573      * <a href="http://www.unicode.org/glossary/#code_point">
    574      * Unicode code point</a>, constant {@code U+10FFFF}.
    575      *
    576      * @since 1.5
    577      */
    578     public static final int MAX_CODE_POINT = 0X10FFFF;
    579 
    580     private static final byte[] DIRECTIONALITY = new byte[] {
    581             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
    582             DIRECTIONALITY_EUROPEAN_NUMBER,
    583             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
    584             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
    585             DIRECTIONALITY_ARABIC_NUMBER,
    586             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
    587             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
    588             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
    589             DIRECTIONALITY_OTHER_NEUTRALS,
    590             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
    591             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
    592             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
    593             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
    594             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
    595             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
    596             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
    597 
    598     /**
    599      * Instances of this class represent particular subsets of the Unicode
    600      * character set.  The only family of subsets defined in the
    601      * {@code Character} class is {@link Character.UnicodeBlock}.
    602      * Other portions of the Java API may define other subsets for their
    603      * own purposes.
    604      *
    605      * @since 1.2
    606      */
    607     public static class Subset  {
    608 
    609         private String name;
    610 
    611         /**
    612          * Constructs a new {@code Subset} instance.
    613          *
    614          * @param  name  The name of this subset
    615          * @exception NullPointerException if name is {@code null}
    616          */
    617         protected Subset(String name) {
    618             if (name == null) {
    619                 throw new NullPointerException("name");
    620             }
    621             this.name = name;
    622         }
    623 
    624         /**
    625          * Compares two {@code Subset} objects for equality.
    626          * This method returns {@code true} if and only if
    627          * {@code this} and the argument refer to the same
    628          * object; since this method is {@code final}, this
    629          * guarantee holds for all subclasses.
    630          */
    631         public final boolean equals(Object obj) {
    632             return (this == obj);
    633         }
    634 
    635         /**
    636          * Returns the standard hash code as defined by the
    637          * {@link Object#hashCode} method.  This method
    638          * is {@code final} in order to ensure that the
    639          * {@code equals} and {@code hashCode} methods will
    640          * be consistent in all subclasses.
    641          */
    642         public final int hashCode() {
    643             return super.hashCode();
    644         }
    645 
    646         /**
    647          * Returns the name of this subset.
    648          */
    649         public final String toString() {
    650             return name;
    651         }
    652     }
    653 
    654     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
    655     // for the latest specification of Unicode Blocks.
    656 
    657     /**
    658      * A family of character subsets representing the character blocks in the
    659      * Unicode specification. Character blocks generally define characters
    660      * used for a specific script or purpose. A character is contained by
    661      * at most one Unicode block.
    662      *
    663      * @since 1.2
    664      */
    665     public static final class UnicodeBlock extends Subset {
    666 
    667         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
    668 
    669         /**
    670          * Creates a UnicodeBlock with the given identifier name.
    671          * This name must be the same as the block identifier.
    672          */
    673         private UnicodeBlock(String idName) {
    674             this(idName, true);
    675         }
    676 
    677         private UnicodeBlock(String idName, boolean isMap) {
    678             super(idName);
    679             if (isMap) {
    680                 map.put(idName, this);
    681             }
    682         }
    683 
    684         /**
    685          * Creates a UnicodeBlock with the given identifier name and
    686          * alias name.
    687          */
    688         private UnicodeBlock(String idName, String alias) {
    689             this(idName);
    690             map.put(alias, this);
    691         }
    692 
    693         /**
    694          * Creates a UnicodeBlock with the given identifier name and
    695          * alias names.
    696          */
    697         private UnicodeBlock(String idName, String... aliases) {
    698             this(idName);
    699             for (String alias : aliases)
    700                 map.put(alias, this);
    701         }
    702 
    703         /**
    704          * Constant for the "Basic Latin" Unicode character block.
    705          * @since 1.2
    706          */
    707         public static final UnicodeBlock  BASIC_LATIN =
    708             new UnicodeBlock("BASIC_LATIN",
    709                              "BASIC LATIN",
    710                              "BASICLATIN");
    711 
    712         /**
    713          * Constant for the "Latin-1 Supplement" Unicode character block.
    714          * @since 1.2
    715          */
    716         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
    717             new UnicodeBlock("LATIN_1_SUPPLEMENT",
    718                              "LATIN-1 SUPPLEMENT",
    719                              "LATIN-1SUPPLEMENT");
    720 
    721         /**
    722          * Constant for the "Latin Extended-A" Unicode character block.
    723          * @since 1.2
    724          */
    725         public static final UnicodeBlock LATIN_EXTENDED_A =
    726             new UnicodeBlock("LATIN_EXTENDED_A",
    727                              "LATIN EXTENDED-A",
    728                              "LATINEXTENDED-A");
    729 
    730         /**
    731          * Constant for the "Latin Extended-B" Unicode character block.
    732          * @since 1.2
    733          */
    734         public static final UnicodeBlock LATIN_EXTENDED_B =
    735             new UnicodeBlock("LATIN_EXTENDED_B",
    736                              "LATIN EXTENDED-B",
    737                              "LATINEXTENDED-B");
    738 
    739         /**
    740          * Constant for the "IPA Extensions" Unicode character block.
    741          * @since 1.2
    742          */
    743         public static final UnicodeBlock IPA_EXTENSIONS =
    744             new UnicodeBlock("IPA_EXTENSIONS",
    745                              "IPA EXTENSIONS",
    746                              "IPAEXTENSIONS");
    747 
    748         /**
    749          * Constant for the "Spacing Modifier Letters" Unicode character block.
    750          * @since 1.2
    751          */
    752         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
    753             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
    754                              "SPACING MODIFIER LETTERS",
    755                              "SPACINGMODIFIERLETTERS");
    756 
    757         /**
    758          * Constant for the "Combining Diacritical Marks" Unicode character block.
    759          * @since 1.2
    760          */
    761         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
    762             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
    763                              "COMBINING DIACRITICAL MARKS",
    764                              "COMBININGDIACRITICALMARKS");
    765 
    766         /**
    767          * Constant for the "Greek and Coptic" Unicode character block.
    768          * <p>
    769          * This block was previously known as the "Greek" block.
    770          *
    771          * @since 1.2
    772          */
    773         public static final UnicodeBlock GREEK =
    774             new UnicodeBlock("GREEK",
    775                              "GREEK AND COPTIC",
    776                              "GREEKANDCOPTIC");
    777 
    778         /**
    779          * Constant for the "Cyrillic" Unicode character block.
    780          * @since 1.2
    781          */
    782         public static final UnicodeBlock CYRILLIC =
    783             new UnicodeBlock("CYRILLIC");
    784 
    785         /**
    786          * Constant for the "Armenian" Unicode character block.
    787          * @since 1.2
    788          */
    789         public static final UnicodeBlock ARMENIAN =
    790             new UnicodeBlock("ARMENIAN");
    791 
    792         /**
    793          * Constant for the "Hebrew" Unicode character block.
    794          * @since 1.2
    795          */
    796         public static final UnicodeBlock HEBREW =
    797             new UnicodeBlock("HEBREW");
    798 
    799         /**
    800          * Constant for the "Arabic" Unicode character block.
    801          * @since 1.2
    802          */
    803         public static final UnicodeBlock ARABIC =
    804             new UnicodeBlock("ARABIC");
    805 
    806         /**
    807          * Constant for the "Devanagari" Unicode character block.
    808          * @since 1.2
    809          */
    810         public static final UnicodeBlock DEVANAGARI =
    811             new UnicodeBlock("DEVANAGARI");
    812 
    813         /**
    814          * Constant for the "Bengali" Unicode character block.
    815          * @since 1.2
    816          */
    817         public static final UnicodeBlock BENGALI =
    818             new UnicodeBlock("BENGALI");
    819 
    820         /**
    821          * Constant for the "Gurmukhi" Unicode character block.
    822          * @since 1.2
    823          */
    824         public static final UnicodeBlock GURMUKHI =
    825             new UnicodeBlock("GURMUKHI");
    826 
    827         /**
    828          * Constant for the "Gujarati" Unicode character block.
    829          * @since 1.2
    830          */
    831         public static final UnicodeBlock GUJARATI =
    832             new UnicodeBlock("GUJARATI");
    833 
    834         /**
    835          * Constant for the "Oriya" Unicode character block.
    836          * @since 1.2
    837          */
    838         public static final UnicodeBlock ORIYA =
    839             new UnicodeBlock("ORIYA");
    840 
    841         /**
    842          * Constant for the "Tamil" Unicode character block.
    843          * @since 1.2
    844          */
    845         public static final UnicodeBlock TAMIL =
    846             new UnicodeBlock("TAMIL");
    847 
    848         /**
    849          * Constant for the "Telugu" Unicode character block.
    850          * @since 1.2
    851          */
    852         public static final UnicodeBlock TELUGU =
    853             new UnicodeBlock("TELUGU");
    854 
    855         /**
    856          * Constant for the "Kannada" Unicode character block.
    857          * @since 1.2
    858          */
    859         public static final UnicodeBlock KANNADA =
    860             new UnicodeBlock("KANNADA");
    861 
    862         /**
    863          * Constant for the "Malayalam" Unicode character block.
    864          * @since 1.2
    865          */
    866         public static final UnicodeBlock MALAYALAM =
    867             new UnicodeBlock("MALAYALAM");
    868 
    869         /**
    870          * Constant for the "Thai" Unicode character block.
    871          * @since 1.2
    872          */
    873         public static final UnicodeBlock THAI =
    874             new UnicodeBlock("THAI");
    875 
    876         /**
    877          * Constant for the "Lao" Unicode character block.
    878          * @since 1.2
    879          */
    880         public static final UnicodeBlock LAO =
    881             new UnicodeBlock("LAO");
    882 
    883         /**
    884          * Constant for the "Tibetan" Unicode character block.
    885          * @since 1.2
    886          */
    887         public static final UnicodeBlock TIBETAN =
    888             new UnicodeBlock("TIBETAN");
    889 
    890         /**
    891          * Constant for the "Georgian" Unicode character block.
    892          * @since 1.2
    893          */
    894         public static final UnicodeBlock GEORGIAN =
    895             new UnicodeBlock("GEORGIAN");
    896 
    897         /**
    898          * Constant for the "Hangul Jamo" Unicode character block.
    899          * @since 1.2
    900          */
    901         public static final UnicodeBlock HANGUL_JAMO =
    902             new UnicodeBlock("HANGUL_JAMO",
    903                              "HANGUL JAMO",
    904                              "HANGULJAMO");
    905 
    906         /**
    907          * Constant for the "Latin Extended Additional" Unicode character block.
    908          * @since 1.2
    909          */
    910         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
    911             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
    912                              "LATIN EXTENDED ADDITIONAL",
    913                              "LATINEXTENDEDADDITIONAL");
    914 
    915         /**
    916          * Constant for the "Greek Extended" Unicode character block.
    917          * @since 1.2
    918          */
    919         public static final UnicodeBlock GREEK_EXTENDED =
    920             new UnicodeBlock("GREEK_EXTENDED",
    921                              "GREEK EXTENDED",
    922                              "GREEKEXTENDED");
    923 
    924         /**
    925          * Constant for the "General Punctuation" Unicode character block.
    926          * @since 1.2
    927          */
    928         public static final UnicodeBlock GENERAL_PUNCTUATION =
    929             new UnicodeBlock("GENERAL_PUNCTUATION",
    930                              "GENERAL PUNCTUATION",
    931                              "GENERALPUNCTUATION");
    932 
    933         /**
    934          * Constant for the "Superscripts and Subscripts" Unicode character
    935          * block.
    936          * @since 1.2
    937          */
    938         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
    939             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
    940                              "SUPERSCRIPTS AND SUBSCRIPTS",
    941                              "SUPERSCRIPTSANDSUBSCRIPTS");
    942 
    943         /**
    944          * Constant for the "Currency Symbols" Unicode character block.
    945          * @since 1.2
    946          */
    947         public static final UnicodeBlock CURRENCY_SYMBOLS =
    948             new UnicodeBlock("CURRENCY_SYMBOLS",
    949                              "CURRENCY SYMBOLS",
    950                              "CURRENCYSYMBOLS");
    951 
    952         /**
    953          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
    954          * character block.
    955          * <p>
    956          * This block was previously known as "Combining Marks for Symbols".
    957          * @since 1.2
    958          */
    959         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
    960             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
    961                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
    962                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
    963                              "COMBINING MARKS FOR SYMBOLS",
    964                              "COMBININGMARKSFORSYMBOLS");
    965 
    966         /**
    967          * Constant for the "Letterlike Symbols" Unicode character block.
    968          * @since 1.2
    969          */
    970         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
    971             new UnicodeBlock("LETTERLIKE_SYMBOLS",
    972                              "LETTERLIKE SYMBOLS",
    973                              "LETTERLIKESYMBOLS");
    974 
    975         /**
    976          * Constant for the "Number Forms" Unicode character block.
    977          * @since 1.2
    978          */
    979         public static final UnicodeBlock NUMBER_FORMS =
    980             new UnicodeBlock("NUMBER_FORMS",
    981                              "NUMBER FORMS",
    982                              "NUMBERFORMS");
    983 
    984         /**
    985          * Constant for the "Arrows" Unicode character block.
    986          * @since 1.2
    987          */
    988         public static final UnicodeBlock ARROWS =
    989             new UnicodeBlock("ARROWS");
    990 
    991         /**
    992          * Constant for the "Mathematical Operators" Unicode character block.
    993          * @since 1.2
    994          */
    995         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
    996             new UnicodeBlock("MATHEMATICAL_OPERATORS",
    997                              "MATHEMATICAL OPERATORS",
    998                              "MATHEMATICALOPERATORS");
    999 
   1000         /**
   1001          * Constant for the "Miscellaneous Technical" Unicode character block.
   1002          * @since 1.2
   1003          */
   1004         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
   1005             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
   1006                              "MISCELLANEOUS TECHNICAL",
   1007                              "MISCELLANEOUSTECHNICAL");
   1008 
   1009         /**
   1010          * Constant for the "Control Pictures" Unicode character block.
   1011          * @since 1.2
   1012          */
   1013         public static final UnicodeBlock CONTROL_PICTURES =
   1014             new UnicodeBlock("CONTROL_PICTURES",
   1015                              "CONTROL PICTURES",
   1016                              "CONTROLPICTURES");
   1017 
   1018         /**
   1019          * Constant for the "Optical Character Recognition" Unicode character block.
   1020          * @since 1.2
   1021          */
   1022         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
   1023             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
   1024                              "OPTICAL CHARACTER RECOGNITION",
   1025                              "OPTICALCHARACTERRECOGNITION");
   1026 
   1027         /**
   1028          * Constant for the "Enclosed Alphanumerics" Unicode character block.
   1029          * @since 1.2
   1030          */
   1031         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
   1032             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
   1033                              "ENCLOSED ALPHANUMERICS",
   1034                              "ENCLOSEDALPHANUMERICS");
   1035 
   1036         /**
   1037          * Constant for the "Box Drawing" Unicode character block.
   1038          * @since 1.2
   1039          */
   1040         public static final UnicodeBlock BOX_DRAWING =
   1041             new UnicodeBlock("BOX_DRAWING",
   1042                              "BOX DRAWING",
   1043                              "BOXDRAWING");
   1044 
   1045         /**
   1046          * Constant for the "Block Elements" Unicode character block.
   1047          * @since 1.2
   1048          */
   1049         public static final UnicodeBlock BLOCK_ELEMENTS =
   1050             new UnicodeBlock("BLOCK_ELEMENTS",
   1051                              "BLOCK ELEMENTS",
   1052                              "BLOCKELEMENTS");
   1053 
   1054         /**
   1055          * Constant for the "Geometric Shapes" Unicode character block.
   1056          * @since 1.2
   1057          */
   1058         public static final UnicodeBlock GEOMETRIC_SHAPES =
   1059             new UnicodeBlock("GEOMETRIC_SHAPES",
   1060                              "GEOMETRIC SHAPES",
   1061                              "GEOMETRICSHAPES");
   1062 
   1063         /**
   1064          * Constant for the "Miscellaneous Symbols" Unicode character block.
   1065          * @since 1.2
   1066          */
   1067         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
   1068             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
   1069                              "MISCELLANEOUS SYMBOLS",
   1070                              "MISCELLANEOUSSYMBOLS");
   1071 
   1072         /**
   1073          * Constant for the "Dingbats" Unicode character block.
   1074          * @since 1.2
   1075          */
   1076         public static final UnicodeBlock DINGBATS =
   1077             new UnicodeBlock("DINGBATS");
   1078 
   1079         /**
   1080          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
   1081          * @since 1.2
   1082          */
   1083         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
   1084             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
   1085                              "CJK SYMBOLS AND PUNCTUATION",
   1086                              "CJKSYMBOLSANDPUNCTUATION");
   1087 
   1088         /**
   1089          * Constant for the "Hiragana" Unicode character block.
   1090          * @since 1.2
   1091          */
   1092         public static final UnicodeBlock HIRAGANA =
   1093             new UnicodeBlock("HIRAGANA");
   1094 
   1095         /**
   1096          * Constant for the "Katakana" Unicode character block.
   1097          * @since 1.2
   1098          */
   1099         public static final UnicodeBlock KATAKANA =
   1100             new UnicodeBlock("KATAKANA");
   1101 
   1102         /**
   1103          * Constant for the "Bopomofo" Unicode character block.
   1104          * @since 1.2
   1105          */
   1106         public static final UnicodeBlock BOPOMOFO =
   1107             new UnicodeBlock("BOPOMOFO");
   1108 
   1109         /**
   1110          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
   1111          * @since 1.2
   1112          */
   1113         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
   1114             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
   1115                              "HANGUL COMPATIBILITY JAMO",
   1116                              "HANGULCOMPATIBILITYJAMO");
   1117 
   1118         /**
   1119          * Constant for the "Kanbun" Unicode character block.
   1120          * @since 1.2
   1121          */
   1122         public static final UnicodeBlock KANBUN =
   1123             new UnicodeBlock("KANBUN");
   1124 
   1125         /**
   1126          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
   1127          * @since 1.2
   1128          */
   1129         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
   1130             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
   1131                              "ENCLOSED CJK LETTERS AND MONTHS",
   1132                              "ENCLOSEDCJKLETTERSANDMONTHS");
   1133 
   1134         /**
   1135          * Constant for the "CJK Compatibility" Unicode character block.
   1136          * @since 1.2
   1137          */
   1138         public static final UnicodeBlock CJK_COMPATIBILITY =
   1139             new UnicodeBlock("CJK_COMPATIBILITY",
   1140                              "CJK COMPATIBILITY",
   1141                              "CJKCOMPATIBILITY");
   1142 
   1143         /**
   1144          * Constant for the "CJK Unified Ideographs" Unicode character block.
   1145          * @since 1.2
   1146          */
   1147         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
   1148             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
   1149                              "CJK UNIFIED IDEOGRAPHS",
   1150                              "CJKUNIFIEDIDEOGRAPHS");
   1151 
   1152         /**
   1153          * Constant for the "Hangul Syllables" Unicode character block.
   1154          * @since 1.2
   1155          */
   1156         public static final UnicodeBlock HANGUL_SYLLABLES =
   1157             new UnicodeBlock("HANGUL_SYLLABLES",
   1158                              "HANGUL SYLLABLES",
   1159                              "HANGULSYLLABLES");
   1160 
   1161         /**
   1162          * Constant for the "Private Use Area" Unicode character block.
   1163          * @since 1.2
   1164          */
   1165         public static final UnicodeBlock PRIVATE_USE_AREA =
   1166             new UnicodeBlock("PRIVATE_USE_AREA",
   1167                              "PRIVATE USE AREA",
   1168                              "PRIVATEUSEAREA");
   1169 
   1170         /**
   1171          * Constant for the "CJK Compatibility Ideographs" Unicode character
   1172          * block.
   1173          * @since 1.2
   1174          */
   1175         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
   1176             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
   1177                              "CJK COMPATIBILITY IDEOGRAPHS",
   1178                              "CJKCOMPATIBILITYIDEOGRAPHS");
   1179 
   1180         /**
   1181          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
   1182          * @since 1.2
   1183          */
   1184         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
   1185             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
   1186                              "ALPHABETIC PRESENTATION FORMS",
   1187                              "ALPHABETICPRESENTATIONFORMS");
   1188 
   1189         /**
   1190          * Constant for the "Arabic Presentation Forms-A" Unicode character
   1191          * block.
   1192          * @since 1.2
   1193          */
   1194         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
   1195             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
   1196                              "ARABIC PRESENTATION FORMS-A",
   1197                              "ARABICPRESENTATIONFORMS-A");
   1198 
   1199         /**
   1200          * Constant for the "Combining Half Marks" Unicode character block.
   1201          * @since 1.2
   1202          */
   1203         public static final UnicodeBlock COMBINING_HALF_MARKS =
   1204             new UnicodeBlock("COMBINING_HALF_MARKS",
   1205                              "COMBINING HALF MARKS",
   1206                              "COMBININGHALFMARKS");
   1207 
   1208         /**
   1209          * Constant for the "CJK Compatibility Forms" Unicode character block.
   1210          * @since 1.2
   1211          */
   1212         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
   1213             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
   1214                              "CJK COMPATIBILITY FORMS",
   1215                              "CJKCOMPATIBILITYFORMS");
   1216 
   1217         /**
   1218          * Constant for the "Small Form Variants" Unicode character block.
   1219          * @since 1.2
   1220          */
   1221         public static final UnicodeBlock SMALL_FORM_VARIANTS =
   1222             new UnicodeBlock("SMALL_FORM_VARIANTS",
   1223                              "SMALL FORM VARIANTS",
   1224                              "SMALLFORMVARIANTS");
   1225 
   1226         /**
   1227          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
   1228          * @since 1.2
   1229          */
   1230         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
   1231             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
   1232                              "ARABIC PRESENTATION FORMS-B",
   1233                              "ARABICPRESENTATIONFORMS-B");
   1234 
   1235         /**
   1236          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
   1237          * block.
   1238          * @since 1.2
   1239          */
   1240         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
   1241             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
   1242                              "HALFWIDTH AND FULLWIDTH FORMS",
   1243                              "HALFWIDTHANDFULLWIDTHFORMS");
   1244 
   1245         /**
   1246          * Constant for the "Specials" Unicode character block.
   1247          * @since 1.2
   1248          */
   1249         public static final UnicodeBlock SPECIALS =
   1250             new UnicodeBlock("SPECIALS");
   1251 
   1252         /**
   1253          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
   1254          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
   1255          *             {@link #LOW_SURROGATES}. These new constants match
   1256          *             the block definitions of the Unicode Standard.
   1257          *             The {@link #of(char)} and {@link #of(int)} methods
   1258          *             return the new constants, not SURROGATES_AREA.
   1259          */
   1260         @Deprecated
   1261         public static final UnicodeBlock SURROGATES_AREA =
   1262             new UnicodeBlock("SURROGATES_AREA", false);
   1263 
   1264         /**
   1265          * Constant for the "Syriac" Unicode character block.
   1266          * @since 1.4
   1267          */
   1268         public static final UnicodeBlock SYRIAC =
   1269             new UnicodeBlock("SYRIAC");
   1270 
   1271         /**
   1272          * Constant for the "Thaana" Unicode character block.
   1273          * @since 1.4
   1274          */
   1275         public static final UnicodeBlock THAANA =
   1276             new UnicodeBlock("THAANA");
   1277 
   1278         /**
   1279          * Constant for the "Sinhala" Unicode character block.
   1280          * @since 1.4
   1281          */
   1282         public static final UnicodeBlock SINHALA =
   1283             new UnicodeBlock("SINHALA");
   1284 
   1285         /**
   1286          * Constant for the "Myanmar" Unicode character block.
   1287          * @since 1.4
   1288          */
   1289         public static final UnicodeBlock MYANMAR =
   1290             new UnicodeBlock("MYANMAR");
   1291 
   1292         /**
   1293          * Constant for the "Ethiopic" Unicode character block.
   1294          * @since 1.4
   1295          */
   1296         public static final UnicodeBlock ETHIOPIC =
   1297             new UnicodeBlock("ETHIOPIC");
   1298 
   1299         /**
   1300          * Constant for the "Cherokee" Unicode character block.
   1301          * @since 1.4
   1302          */
   1303         public static final UnicodeBlock CHEROKEE =
   1304             new UnicodeBlock("CHEROKEE");
   1305 
   1306         /**
   1307          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
   1308          * @since 1.4
   1309          */
   1310         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
   1311             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
   1312                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
   1313                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
   1314 
   1315         /**
   1316          * Constant for the "Ogham" Unicode character block.
   1317          * @since 1.4
   1318          */
   1319         public static final UnicodeBlock OGHAM =
   1320             new UnicodeBlock("OGHAM");
   1321 
   1322         /**
   1323          * Constant for the "Runic" Unicode character block.
   1324          * @since 1.4
   1325          */
   1326         public static final UnicodeBlock RUNIC =
   1327             new UnicodeBlock("RUNIC");
   1328 
   1329         /**
   1330          * Constant for the "Khmer" Unicode character block.
   1331          * @since 1.4
   1332          */
   1333         public static final UnicodeBlock KHMER =
   1334             new UnicodeBlock("KHMER");
   1335 
   1336         /**
   1337          * Constant for the "Mongolian" Unicode character block.
   1338          * @since 1.4
   1339          */
   1340         public static final UnicodeBlock MONGOLIAN =
   1341             new UnicodeBlock("MONGOLIAN");
   1342 
   1343         /**
   1344          * Constant for the "Braille Patterns" Unicode character block.
   1345          * @since 1.4
   1346          */
   1347         public static final UnicodeBlock BRAILLE_PATTERNS =
   1348             new UnicodeBlock("BRAILLE_PATTERNS",
   1349                              "BRAILLE PATTERNS",
   1350                              "BRAILLEPATTERNS");
   1351 
   1352         /**
   1353          * Constant for the "CJK Radicals Supplement" Unicode character block.
   1354          * @since 1.4
   1355          */
   1356         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
   1357             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
   1358                              "CJK RADICALS SUPPLEMENT",
   1359                              "CJKRADICALSSUPPLEMENT");
   1360 
   1361         /**
   1362          * Constant for the "Kangxi Radicals" Unicode character block.
   1363          * @since 1.4
   1364          */
   1365         public static final UnicodeBlock KANGXI_RADICALS =
   1366             new UnicodeBlock("KANGXI_RADICALS",
   1367                              "KANGXI RADICALS",
   1368                              "KANGXIRADICALS");
   1369 
   1370         /**
   1371          * Constant for the "Ideographic Description Characters" Unicode character block.
   1372          * @since 1.4
   1373          */
   1374         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
   1375             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
   1376                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
   1377                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
   1378 
   1379         /**
   1380          * Constant for the "Bopomofo Extended" Unicode character block.
   1381          * @since 1.4
   1382          */
   1383         public static final UnicodeBlock BOPOMOFO_EXTENDED =
   1384             new UnicodeBlock("BOPOMOFO_EXTENDED",
   1385                              "BOPOMOFO EXTENDED",
   1386                              "BOPOMOFOEXTENDED");
   1387 
   1388         /**
   1389          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
   1390          * @since 1.4
   1391          */
   1392         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
   1393             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
   1394                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
   1395                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
   1396 
   1397         /**
   1398          * Constant for the "Yi Syllables" Unicode character block.
   1399          * @since 1.4
   1400          */
   1401         public static final UnicodeBlock YI_SYLLABLES =
   1402             new UnicodeBlock("YI_SYLLABLES",
   1403                              "YI SYLLABLES",
   1404                              "YISYLLABLES");
   1405 
   1406         /**
   1407          * Constant for the "Yi Radicals" Unicode character block.
   1408          * @since 1.4
   1409          */
   1410         public static final UnicodeBlock YI_RADICALS =
   1411             new UnicodeBlock("YI_RADICALS",
   1412                              "YI RADICALS",
   1413                              "YIRADICALS");
   1414 
   1415         /**
   1416          * Constant for the "Cyrillic Supplementary" Unicode character block.
   1417          * @since 1.5
   1418          */
   1419         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
   1420             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
   1421                              "CYRILLIC SUPPLEMENTARY",
   1422                              "CYRILLICSUPPLEMENTARY",
   1423                              "CYRILLIC SUPPLEMENT",
   1424                              "CYRILLICSUPPLEMENT");
   1425 
   1426         /**
   1427          * Constant for the "Tagalog" Unicode character block.
   1428          * @since 1.5
   1429          */
   1430         public static final UnicodeBlock TAGALOG =
   1431             new UnicodeBlock("TAGALOG");
   1432 
   1433         /**
   1434          * Constant for the "Hanunoo" Unicode character block.
   1435          * @since 1.5
   1436          */
   1437         public static final UnicodeBlock HANUNOO =
   1438             new UnicodeBlock("HANUNOO");
   1439 
   1440         /**
   1441          * Constant for the "Buhid" Unicode character block.
   1442          * @since 1.5
   1443          */
   1444         public static final UnicodeBlock BUHID =
   1445             new UnicodeBlock("BUHID");
   1446 
   1447         /**
   1448          * Constant for the "Tagbanwa" Unicode character block.
   1449          * @since 1.5
   1450          */
   1451         public static final UnicodeBlock TAGBANWA =
   1452             new UnicodeBlock("TAGBANWA");
   1453 
   1454         /**
   1455          * Constant for the "Limbu" Unicode character block.
   1456          * @since 1.5
   1457          */
   1458         public static final UnicodeBlock LIMBU =
   1459             new UnicodeBlock("LIMBU");
   1460 
   1461         /**
   1462          * Constant for the "Tai Le" Unicode character block.
   1463          * @since 1.5
   1464          */
   1465         public static final UnicodeBlock TAI_LE =
   1466             new UnicodeBlock("TAI_LE",
   1467                              "TAI LE",
   1468                              "TAILE");
   1469 
   1470         /**
   1471          * Constant for the "Khmer Symbols" Unicode character block.
   1472          * @since 1.5
   1473          */
   1474         public static final UnicodeBlock KHMER_SYMBOLS =
   1475             new UnicodeBlock("KHMER_SYMBOLS",
   1476                              "KHMER SYMBOLS",
   1477                              "KHMERSYMBOLS");
   1478 
   1479         /**
   1480          * Constant for the "Phonetic Extensions" Unicode character block.
   1481          * @since 1.5
   1482          */
   1483         public static final UnicodeBlock PHONETIC_EXTENSIONS =
   1484             new UnicodeBlock("PHONETIC_EXTENSIONS",
   1485                              "PHONETIC EXTENSIONS",
   1486                              "PHONETICEXTENSIONS");
   1487 
   1488         /**
   1489          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
   1490          * @since 1.5
   1491          */
   1492         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
   1493             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
   1494                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
   1495                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
   1496 
   1497         /**
   1498          * Constant for the "Supplemental Arrows-A" Unicode character block.
   1499          * @since 1.5
   1500          */
   1501         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
   1502             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
   1503                              "SUPPLEMENTAL ARROWS-A",
   1504                              "SUPPLEMENTALARROWS-A");
   1505 
   1506         /**
   1507          * Constant for the "Supplemental Arrows-B" Unicode character block.
   1508          * @since 1.5
   1509          */
   1510         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
   1511             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
   1512                              "SUPPLEMENTAL ARROWS-B",
   1513                              "SUPPLEMENTALARROWS-B");
   1514 
   1515         /**
   1516          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
   1517          * character block.
   1518          * @since 1.5
   1519          */
   1520         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
   1521             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
   1522                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
   1523                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
   1524 
   1525         /**
   1526          * Constant for the "Supplemental Mathematical Operators" Unicode
   1527          * character block.
   1528          * @since 1.5
   1529          */
   1530         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
   1531             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
   1532                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
   1533                              "SUPPLEMENTALMATHEMATICALOPERATORS");
   1534 
   1535         /**
   1536          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
   1537          * block.
   1538          * @since 1.5
   1539          */
   1540         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
   1541             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
   1542                              "MISCELLANEOUS SYMBOLS AND ARROWS",
   1543                              "MISCELLANEOUSSYMBOLSANDARROWS");
   1544 
   1545         /**
   1546          * Constant for the "Katakana Phonetic Extensions" Unicode character
   1547          * block.
   1548          * @since 1.5
   1549          */
   1550         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
   1551             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
   1552                              "KATAKANA PHONETIC EXTENSIONS",
   1553                              "KATAKANAPHONETICEXTENSIONS");
   1554 
   1555         /**
   1556          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
   1557          * @since 1.5
   1558          */
   1559         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
   1560             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
   1561                              "YIJING HEXAGRAM SYMBOLS",
   1562                              "YIJINGHEXAGRAMSYMBOLS");
   1563 
   1564         /**
   1565          * Constant for the "Variation Selectors" Unicode character block.
   1566          * @since 1.5
   1567          */
   1568         public static final UnicodeBlock VARIATION_SELECTORS =
   1569             new UnicodeBlock("VARIATION_SELECTORS",
   1570                              "VARIATION SELECTORS",
   1571                              "VARIATIONSELECTORS");
   1572 
   1573         /**
   1574          * Constant for the "Linear B Syllabary" Unicode character block.
   1575          * @since 1.5
   1576          */
   1577         public static final UnicodeBlock LINEAR_B_SYLLABARY =
   1578             new UnicodeBlock("LINEAR_B_SYLLABARY",
   1579                              "LINEAR B SYLLABARY",
   1580                              "LINEARBSYLLABARY");
   1581 
   1582         /**
   1583          * Constant for the "Linear B Ideograms" Unicode character block.
   1584          * @since 1.5
   1585          */
   1586         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
   1587             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
   1588                              "LINEAR B IDEOGRAMS",
   1589                              "LINEARBIDEOGRAMS");
   1590 
   1591         /**
   1592          * Constant for the "Aegean Numbers" Unicode character block.
   1593          * @since 1.5
   1594          */
   1595         public static final UnicodeBlock AEGEAN_NUMBERS =
   1596             new UnicodeBlock("AEGEAN_NUMBERS",
   1597                              "AEGEAN NUMBERS",
   1598                              "AEGEANNUMBERS");
   1599 
   1600         /**
   1601          * Constant for the "Old Italic" Unicode character block.
   1602          * @since 1.5
   1603          */
   1604         public static final UnicodeBlock OLD_ITALIC =
   1605             new UnicodeBlock("OLD_ITALIC",
   1606                              "OLD ITALIC",
   1607                              "OLDITALIC");
   1608 
   1609         /**
   1610          * Constant for the "Gothic" Unicode character block.
   1611          * @since 1.5
   1612          */
   1613         public static final UnicodeBlock GOTHIC =
   1614             new UnicodeBlock("GOTHIC");
   1615 
   1616         /**
   1617          * Constant for the "Ugaritic" Unicode character block.
   1618          * @since 1.5
   1619          */
   1620         public static final UnicodeBlock UGARITIC =
   1621             new UnicodeBlock("UGARITIC");
   1622 
   1623         /**
   1624          * Constant for the "Deseret" Unicode character block.
   1625          * @since 1.5
   1626          */
   1627         public static final UnicodeBlock DESERET =
   1628             new UnicodeBlock("DESERET");
   1629 
   1630         /**
   1631          * Constant for the "Shavian" Unicode character block.
   1632          * @since 1.5
   1633          */
   1634         public static final UnicodeBlock SHAVIAN =
   1635             new UnicodeBlock("SHAVIAN");
   1636 
   1637         /**
   1638          * Constant for the "Osmanya" Unicode character block.
   1639          * @since 1.5
   1640          */
   1641         public static final UnicodeBlock OSMANYA =
   1642             new UnicodeBlock("OSMANYA");
   1643 
   1644         /**
   1645          * Constant for the "Cypriot Syllabary" Unicode character block.
   1646          * @since 1.5
   1647          */
   1648         public static final UnicodeBlock CYPRIOT_SYLLABARY =
   1649             new UnicodeBlock("CYPRIOT_SYLLABARY",
   1650                              "CYPRIOT SYLLABARY",
   1651                              "CYPRIOTSYLLABARY");
   1652 
   1653         /**
   1654          * Constant for the "Byzantine Musical Symbols" Unicode character block.
   1655          * @since 1.5
   1656          */
   1657         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
   1658             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
   1659                              "BYZANTINE MUSICAL SYMBOLS",
   1660                              "BYZANTINEMUSICALSYMBOLS");
   1661 
   1662         /**
   1663          * Constant for the "Musical Symbols" Unicode character block.
   1664          * @since 1.5
   1665          */
   1666         public static final UnicodeBlock MUSICAL_SYMBOLS =
   1667             new UnicodeBlock("MUSICAL_SYMBOLS",
   1668                              "MUSICAL SYMBOLS",
   1669                              "MUSICALSYMBOLS");
   1670 
   1671         /**
   1672          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
   1673          * @since 1.5
   1674          */
   1675         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
   1676             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
   1677                              "TAI XUAN JING SYMBOLS",
   1678                              "TAIXUANJINGSYMBOLS");
   1679 
   1680         /**
   1681          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
   1682          * character block.
   1683          * @since 1.5
   1684          */
   1685         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
   1686             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
   1687                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
   1688                              "MATHEMATICALALPHANUMERICSYMBOLS");
   1689 
   1690         /**
   1691          * Constant for the "CJK Unified Ideographs Extension B" Unicode
   1692          * character block.
   1693          * @since 1.5
   1694          */
   1695         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
   1696             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
   1697                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
   1698                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
   1699 
   1700         /**
   1701          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
   1702          * @since 1.5
   1703          */
   1704         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
   1705             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
   1706                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
   1707                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
   1708 
   1709         /**
   1710          * Constant for the "Tags" Unicode character block.
   1711          * @since 1.5
   1712          */
   1713         public static final UnicodeBlock TAGS =
   1714             new UnicodeBlock("TAGS");
   1715 
   1716         /**
   1717          * Constant for the "Variation Selectors Supplement" Unicode character
   1718          * block.
   1719          * @since 1.5
   1720          */
   1721         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
   1722             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
   1723                              "VARIATION SELECTORS SUPPLEMENT",
   1724                              "VARIATIONSELECTORSSUPPLEMENT");
   1725 
   1726         /**
   1727          * Constant for the "Supplementary Private Use Area-A" Unicode character
   1728          * block.
   1729          * @since 1.5
   1730          */
   1731         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
   1732             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
   1733                              "SUPPLEMENTARY PRIVATE USE AREA-A",
   1734                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
   1735 
   1736         /**
   1737          * Constant for the "Supplementary Private Use Area-B" Unicode character
   1738          * block.
   1739          * @since 1.5
   1740          */
   1741         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
   1742             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
   1743                              "SUPPLEMENTARY PRIVATE USE AREA-B",
   1744                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
   1745 
   1746         /**
   1747          * Constant for the "High Surrogates" Unicode character block.
   1748          * This block represents codepoint values in the high surrogate
   1749          * range: U+D800 through U+DB7F
   1750          *
   1751          * @since 1.5
   1752          */
   1753         public static final UnicodeBlock HIGH_SURROGATES =
   1754             new UnicodeBlock("HIGH_SURROGATES",
   1755                              "HIGH SURROGATES",
   1756                              "HIGHSURROGATES");
   1757 
   1758         /**
   1759          * Constant for the "High Private Use Surrogates" Unicode character
   1760          * block.
   1761          * This block represents codepoint values in the private use high
   1762          * surrogate range: U+DB80 through U+DBFF
   1763          *
   1764          * @since 1.5
   1765          */
   1766         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
   1767             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
   1768                              "HIGH PRIVATE USE SURROGATES",
   1769                              "HIGHPRIVATEUSESURROGATES");
   1770 
   1771         /**
   1772          * Constant for the "Low Surrogates" Unicode character block.
   1773          * This block represents codepoint values in the low surrogate
   1774          * range: U+DC00 through U+DFFF
   1775          *
   1776          * @since 1.5
   1777          */
   1778         public static final UnicodeBlock LOW_SURROGATES =
   1779             new UnicodeBlock("LOW_SURROGATES",
   1780                              "LOW SURROGATES",
   1781                              "LOWSURROGATES");
   1782 
   1783         /**
   1784          * Constant for the "Arabic Supplement" Unicode character block.
   1785          * @since 1.7
   1786          */
   1787         public static final UnicodeBlock ARABIC_SUPPLEMENT =
   1788             new UnicodeBlock("ARABIC_SUPPLEMENT",
   1789                              "ARABIC SUPPLEMENT",
   1790                              "ARABICSUPPLEMENT");
   1791 
   1792         /**
   1793          * Constant for the "NKo" Unicode character block.
   1794          * @since 1.7
   1795          */
   1796         public static final UnicodeBlock NKO =
   1797             new UnicodeBlock("NKO");
   1798 
   1799         /**
   1800          * Constant for the "Samaritan" Unicode character block.
   1801          * @since 1.7
   1802          */
   1803         public static final UnicodeBlock SAMARITAN =
   1804             new UnicodeBlock("SAMARITAN");
   1805 
   1806         /**
   1807          * Constant for the "Mandaic" Unicode character block.
   1808          * @since 1.7
   1809          */
   1810         public static final UnicodeBlock MANDAIC =
   1811             new UnicodeBlock("MANDAIC");
   1812 
   1813         /**
   1814          * Constant for the "Ethiopic Supplement" Unicode character block.
   1815          * @since 1.7
   1816          */
   1817         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
   1818             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
   1819                              "ETHIOPIC SUPPLEMENT",
   1820                              "ETHIOPICSUPPLEMENT");
   1821 
   1822         /**
   1823          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
   1824          * Unicode character block.
   1825          * @since 1.7
   1826          */
   1827         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
   1828             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
   1829                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
   1830                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
   1831 
   1832         /**
   1833          * Constant for the "New Tai Lue" Unicode character block.
   1834          * @since 1.7
   1835          */
   1836         public static final UnicodeBlock NEW_TAI_LUE =
   1837             new UnicodeBlock("NEW_TAI_LUE",
   1838                              "NEW TAI LUE",
   1839                              "NEWTAILUE");
   1840 
   1841         /**
   1842          * Constant for the "Buginese" Unicode character block.
   1843          * @since 1.7
   1844          */
   1845         public static final UnicodeBlock BUGINESE =
   1846             new UnicodeBlock("BUGINESE");
   1847 
   1848         /**
   1849          * Constant for the "Tai Tham" Unicode character block.
   1850          * @since 1.7
   1851          */
   1852         public static final UnicodeBlock TAI_THAM =
   1853             new UnicodeBlock("TAI_THAM",
   1854                              "TAI THAM",
   1855                              "TAITHAM");
   1856 
   1857         /**
   1858          * Constant for the "Balinese" Unicode character block.
   1859          * @since 1.7
   1860          */
   1861         public static final UnicodeBlock BALINESE =
   1862             new UnicodeBlock("BALINESE");
   1863 
   1864         /**
   1865          * Constant for the "Sundanese" Unicode character block.
   1866          * @since 1.7
   1867          */
   1868         public static final UnicodeBlock SUNDANESE =
   1869             new UnicodeBlock("SUNDANESE");
   1870 
   1871         /**
   1872          * Constant for the "Batak" Unicode character block.
   1873          * @since 1.7
   1874          */
   1875         public static final UnicodeBlock BATAK =
   1876             new UnicodeBlock("BATAK");
   1877 
   1878         /**
   1879          * Constant for the "Lepcha" Unicode character block.
   1880          * @since 1.7
   1881          */
   1882         public static final UnicodeBlock LEPCHA =
   1883             new UnicodeBlock("LEPCHA");
   1884 
   1885         /**
   1886          * Constant for the "Ol Chiki" Unicode character block.
   1887          * @since 1.7
   1888          */
   1889         public static final UnicodeBlock OL_CHIKI =
   1890             new UnicodeBlock("OL_CHIKI",
   1891                              "OL CHIKI",
   1892                              "OLCHIKI");
   1893 
   1894         /**
   1895          * Constant for the "Vedic Extensions" Unicode character block.
   1896          * @since 1.7
   1897          */
   1898         public static final UnicodeBlock VEDIC_EXTENSIONS =
   1899             new UnicodeBlock("VEDIC_EXTENSIONS",
   1900                              "VEDIC EXTENSIONS",
   1901                              "VEDICEXTENSIONS");
   1902 
   1903         /**
   1904          * Constant for the "Phonetic Extensions Supplement" Unicode character
   1905          * block.
   1906          * @since 1.7
   1907          */
   1908         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
   1909             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
   1910                              "PHONETIC EXTENSIONS SUPPLEMENT",
   1911                              "PHONETICEXTENSIONSSUPPLEMENT");
   1912 
   1913         /**
   1914          * Constant for the "Combining Diacritical Marks Supplement" Unicode
   1915          * character block.
   1916          * @since 1.7
   1917          */
   1918         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
   1919             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
   1920                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
   1921                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
   1922 
   1923         /**
   1924          * Constant for the "Glagolitic" Unicode character block.
   1925          * @since 1.7
   1926          */
   1927         public static final UnicodeBlock GLAGOLITIC =
   1928             new UnicodeBlock("GLAGOLITIC");
   1929 
   1930         /**
   1931          * Constant for the "Latin Extended-C" Unicode character block.
   1932          * @since 1.7
   1933          */
   1934         public static final UnicodeBlock LATIN_EXTENDED_C =
   1935             new UnicodeBlock("LATIN_EXTENDED_C",
   1936                              "LATIN EXTENDED-C",
   1937                              "LATINEXTENDED-C");
   1938 
   1939         /**
   1940          * Constant for the "Coptic" Unicode character block.
   1941          * @since 1.7
   1942          */
   1943         public static final UnicodeBlock COPTIC =
   1944             new UnicodeBlock("COPTIC");
   1945 
   1946         /**
   1947          * Constant for the "Georgian Supplement" Unicode character block.
   1948          * @since 1.7
   1949          */
   1950         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
   1951             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
   1952                              "GEORGIAN SUPPLEMENT",
   1953                              "GEORGIANSUPPLEMENT");
   1954 
   1955         /**
   1956          * Constant for the "Tifinagh" Unicode character block.
   1957          * @since 1.7
   1958          */
   1959         public static final UnicodeBlock TIFINAGH =
   1960             new UnicodeBlock("TIFINAGH");
   1961 
   1962         /**
   1963          * Constant for the "Ethiopic Extended" Unicode character block.
   1964          * @since 1.7
   1965          */
   1966         public static final UnicodeBlock ETHIOPIC_EXTENDED =
   1967             new UnicodeBlock("ETHIOPIC_EXTENDED",
   1968                              "ETHIOPIC EXTENDED",
   1969                              "ETHIOPICEXTENDED");
   1970 
   1971         /**
   1972          * Constant for the "Cyrillic Extended-A" Unicode character block.
   1973          * @since 1.7
   1974          */
   1975         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
   1976             new UnicodeBlock("CYRILLIC_EXTENDED_A",
   1977                              "CYRILLIC EXTENDED-A",
   1978                              "CYRILLICEXTENDED-A");
   1979 
   1980         /**
   1981          * Constant for the "Supplemental Punctuation" Unicode character block.
   1982          * @since 1.7
   1983          */
   1984         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
   1985             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
   1986                              "SUPPLEMENTAL PUNCTUATION",
   1987                              "SUPPLEMENTALPUNCTUATION");
   1988 
   1989         /**
   1990          * Constant for the "CJK Strokes" Unicode character block.
   1991          * @since 1.7
   1992          */
   1993         public static final UnicodeBlock CJK_STROKES =
   1994             new UnicodeBlock("CJK_STROKES",
   1995                              "CJK STROKES",
   1996                              "CJKSTROKES");
   1997 
   1998         /**
   1999          * Constant for the "Lisu" Unicode character block.
   2000          * @since 1.7
   2001          */
   2002         public static final UnicodeBlock LISU =
   2003             new UnicodeBlock("LISU");
   2004 
   2005         /**
   2006          * Constant for the "Vai" Unicode character block.
   2007          * @since 1.7
   2008          */
   2009         public static final UnicodeBlock VAI =
   2010             new UnicodeBlock("VAI");
   2011 
   2012         /**
   2013          * Constant for the "Cyrillic Extended-B" Unicode character block.
   2014          * @since 1.7
   2015          */
   2016         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
   2017             new UnicodeBlock("CYRILLIC_EXTENDED_B",
   2018                              "CYRILLIC EXTENDED-B",
   2019                              "CYRILLICEXTENDED-B");
   2020 
   2021         /**
   2022          * Constant for the "Bamum" Unicode character block.
   2023          * @since 1.7
   2024          */
   2025         public static final UnicodeBlock BAMUM =
   2026             new UnicodeBlock("BAMUM");
   2027 
   2028         /**
   2029          * Constant for the "Modifier Tone Letters" Unicode character block.
   2030          * @since 1.7
   2031          */
   2032         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
   2033             new UnicodeBlock("MODIFIER_TONE_LETTERS",
   2034                              "MODIFIER TONE LETTERS",
   2035                              "MODIFIERTONELETTERS");
   2036 
   2037         /**
   2038          * Constant for the "Latin Extended-D" Unicode character block.
   2039          * @since 1.7
   2040          */
   2041         public static final UnicodeBlock LATIN_EXTENDED_D =
   2042             new UnicodeBlock("LATIN_EXTENDED_D",
   2043                              "LATIN EXTENDED-D",
   2044                              "LATINEXTENDED-D");
   2045 
   2046         /**
   2047          * Constant for the "Syloti Nagri" Unicode character block.
   2048          * @since 1.7
   2049          */
   2050         public static final UnicodeBlock SYLOTI_NAGRI =
   2051             new UnicodeBlock("SYLOTI_NAGRI",
   2052                              "SYLOTI NAGRI",
   2053                              "SYLOTINAGRI");
   2054 
   2055         /**
   2056          * Constant for the "Common Indic Number Forms" Unicode character block.
   2057          * @since 1.7
   2058          */
   2059         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
   2060             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
   2061                              "COMMON INDIC NUMBER FORMS",
   2062                              "COMMONINDICNUMBERFORMS");
   2063 
   2064         /**
   2065          * Constant for the "Phags-pa" Unicode character block.
   2066          * @since 1.7
   2067          */
   2068         public static final UnicodeBlock PHAGS_PA =
   2069             new UnicodeBlock("PHAGS_PA",
   2070                              "PHAGS-PA");
   2071 
   2072         /**
   2073          * Constant for the "Saurashtra" Unicode character block.
   2074          * @since 1.7
   2075          */
   2076         public static final UnicodeBlock SAURASHTRA =
   2077             new UnicodeBlock("SAURASHTRA");
   2078 
   2079         /**
   2080          * Constant for the "Devanagari Extended" Unicode character block.
   2081          * @since 1.7
   2082          */
   2083         public static final UnicodeBlock DEVANAGARI_EXTENDED =
   2084             new UnicodeBlock("DEVANAGARI_EXTENDED",
   2085                              "DEVANAGARI EXTENDED",
   2086                              "DEVANAGARIEXTENDED");
   2087 
   2088         /**
   2089          * Constant for the "Kayah Li" Unicode character block.
   2090          * @since 1.7
   2091          */
   2092         public static final UnicodeBlock KAYAH_LI =
   2093             new UnicodeBlock("KAYAH_LI",
   2094                              "KAYAH LI",
   2095                              "KAYAHLI");
   2096 
   2097         /**
   2098          * Constant for the "Rejang" Unicode character block.
   2099          * @since 1.7
   2100          */
   2101         public static final UnicodeBlock REJANG =
   2102             new UnicodeBlock("REJANG");
   2103 
   2104         /**
   2105          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
   2106          * @since 1.7
   2107          */
   2108         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
   2109             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
   2110                              "HANGUL JAMO EXTENDED-A",
   2111                              "HANGULJAMOEXTENDED-A");
   2112 
   2113         /**
   2114          * Constant for the "Javanese" Unicode character block.
   2115          * @since 1.7
   2116          */
   2117         public static final UnicodeBlock JAVANESE =
   2118             new UnicodeBlock("JAVANESE");
   2119 
   2120         /**
   2121          * Constant for the "Cham" Unicode character block.
   2122          * @since 1.7
   2123          */
   2124         public static final UnicodeBlock CHAM =
   2125             new UnicodeBlock("CHAM");
   2126 
   2127         /**
   2128          * Constant for the "Myanmar Extended-A" Unicode character block.
   2129          * @since 1.7
   2130          */
   2131         public static final UnicodeBlock MYANMAR_EXTENDED_A =
   2132             new UnicodeBlock("MYANMAR_EXTENDED_A",
   2133                              "MYANMAR EXTENDED-A",
   2134                              "MYANMAREXTENDED-A");
   2135 
   2136         /**
   2137          * Constant for the "Tai Viet" Unicode character block.
   2138          * @since 1.7
   2139          */
   2140         public static final UnicodeBlock TAI_VIET =
   2141             new UnicodeBlock("TAI_VIET",
   2142                              "TAI VIET",
   2143                              "TAIVIET");
   2144 
   2145         /**
   2146          * Constant for the "Ethiopic Extended-A" Unicode character block.
   2147          * @since 1.7
   2148          */
   2149         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
   2150             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
   2151                              "ETHIOPIC EXTENDED-A",
   2152                              "ETHIOPICEXTENDED-A");
   2153 
   2154         /**
   2155          * Constant for the "Meetei Mayek" Unicode character block.
   2156          * @since 1.7
   2157          */
   2158         public static final UnicodeBlock MEETEI_MAYEK =
   2159             new UnicodeBlock("MEETEI_MAYEK",
   2160                              "MEETEI MAYEK",
   2161                              "MEETEIMAYEK");
   2162 
   2163         /**
   2164          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
   2165          * @since 1.7
   2166          */
   2167         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
   2168             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
   2169                              "HANGUL JAMO EXTENDED-B",
   2170                              "HANGULJAMOEXTENDED-B");
   2171 
   2172         /**
   2173          * Constant for the "Vertical Forms" Unicode character block.
   2174          * @since 1.7
   2175          */
   2176         public static final UnicodeBlock VERTICAL_FORMS =
   2177             new UnicodeBlock("VERTICAL_FORMS",
   2178                              "VERTICAL FORMS",
   2179                              "VERTICALFORMS");
   2180 
   2181         /**
   2182          * Constant for the "Ancient Greek Numbers" Unicode character block.
   2183          * @since 1.7
   2184          */
   2185         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
   2186             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
   2187                              "ANCIENT GREEK NUMBERS",
   2188                              "ANCIENTGREEKNUMBERS");
   2189 
   2190         /**
   2191          * Constant for the "Ancient Symbols" Unicode character block.
   2192          * @since 1.7
   2193          */
   2194         public static final UnicodeBlock ANCIENT_SYMBOLS =
   2195             new UnicodeBlock("ANCIENT_SYMBOLS",
   2196                              "ANCIENT SYMBOLS",
   2197                              "ANCIENTSYMBOLS");
   2198 
   2199         /**
   2200          * Constant for the "Phaistos Disc" Unicode character block.
   2201          * @since 1.7
   2202          */
   2203         public static final UnicodeBlock PHAISTOS_DISC =
   2204             new UnicodeBlock("PHAISTOS_DISC",
   2205                              "PHAISTOS DISC",
   2206                              "PHAISTOSDISC");
   2207 
   2208         /**
   2209          * Constant for the "Lycian" Unicode character block.
   2210          * @since 1.7
   2211          */
   2212         public static final UnicodeBlock LYCIAN =
   2213             new UnicodeBlock("LYCIAN");
   2214 
   2215         /**
   2216          * Constant for the "Carian" Unicode character block.
   2217          * @since 1.7
   2218          */
   2219         public static final UnicodeBlock CARIAN =
   2220             new UnicodeBlock("CARIAN");
   2221 
   2222         /**
   2223          * Constant for the "Old Persian" Unicode character block.
   2224          * @since 1.7
   2225          */
   2226         public static final UnicodeBlock OLD_PERSIAN =
   2227             new UnicodeBlock("OLD_PERSIAN",
   2228                              "OLD PERSIAN",
   2229                              "OLDPERSIAN");
   2230 
   2231         /**
   2232          * Constant for the "Imperial Aramaic" Unicode character block.
   2233          * @since 1.7
   2234          */
   2235         public static final UnicodeBlock IMPERIAL_ARAMAIC =
   2236             new UnicodeBlock("IMPERIAL_ARAMAIC",
   2237                              "IMPERIAL ARAMAIC",
   2238                              "IMPERIALARAMAIC");
   2239 
   2240         /**
   2241          * Constant for the "Phoenician" Unicode character block.
   2242          * @since 1.7
   2243          */
   2244         public static final UnicodeBlock PHOENICIAN =
   2245             new UnicodeBlock("PHOENICIAN");
   2246 
   2247         /**
   2248          * Constant for the "Lydian" Unicode character block.
   2249          * @since 1.7
   2250          */
   2251         public static final UnicodeBlock LYDIAN =
   2252             new UnicodeBlock("LYDIAN");
   2253 
   2254         /**
   2255          * Constant for the "Kharoshthi" Unicode character block.
   2256          * @since 1.7
   2257          */
   2258         public static final UnicodeBlock KHAROSHTHI =
   2259             new UnicodeBlock("KHAROSHTHI");
   2260 
   2261         /**
   2262          * Constant for the "Old South Arabian" Unicode character block.
   2263          * @since 1.7
   2264          */
   2265         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
   2266             new UnicodeBlock("OLD_SOUTH_ARABIAN",
   2267                              "OLD SOUTH ARABIAN",
   2268                              "OLDSOUTHARABIAN");
   2269 
   2270         /**
   2271          * Constant for the "Avestan" Unicode character block.
   2272          * @since 1.7
   2273          */
   2274         public static final UnicodeBlock AVESTAN =
   2275             new UnicodeBlock("AVESTAN");
   2276 
   2277         /**
   2278          * Constant for the "Inscriptional Parthian" Unicode character block.
   2279          * @since 1.7
   2280          */
   2281         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
   2282             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
   2283                              "INSCRIPTIONAL PARTHIAN",
   2284                              "INSCRIPTIONALPARTHIAN");
   2285 
   2286         /**
   2287          * Constant for the "Inscriptional Pahlavi" Unicode character block.
   2288          * @since 1.7
   2289          */
   2290         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
   2291             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
   2292                              "INSCRIPTIONAL PAHLAVI",
   2293                              "INSCRIPTIONALPAHLAVI");
   2294 
   2295         /**
   2296          * Constant for the "Old Turkic" Unicode character block.
   2297          * @since 1.7
   2298          */
   2299         public static final UnicodeBlock OLD_TURKIC =
   2300             new UnicodeBlock("OLD_TURKIC",
   2301                              "OLD TURKIC",
   2302                              "OLDTURKIC");
   2303 
   2304         /**
   2305          * Constant for the "Rumi Numeral Symbols" Unicode character block.
   2306          * @since 1.7
   2307          */
   2308         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
   2309             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
   2310                              "RUMI NUMERAL SYMBOLS",
   2311                              "RUMINUMERALSYMBOLS");
   2312 
   2313         /**
   2314          * Constant for the "Brahmi" Unicode character block.
   2315          * @since 1.7
   2316          */
   2317         public static final UnicodeBlock BRAHMI =
   2318             new UnicodeBlock("BRAHMI");
   2319 
   2320         /**
   2321          * Constant for the "Kaithi" Unicode character block.
   2322          * @since 1.7
   2323          */
   2324         public static final UnicodeBlock KAITHI =
   2325             new UnicodeBlock("KAITHI");
   2326 
   2327         /**
   2328          * Constant for the "Cuneiform" Unicode character block.
   2329          * @since 1.7
   2330          */
   2331         public static final UnicodeBlock CUNEIFORM =
   2332             new UnicodeBlock("CUNEIFORM");
   2333 
   2334         /**
   2335          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
   2336          * character block.
   2337          * @since 1.7
   2338          */
   2339         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
   2340             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
   2341                              "CUNEIFORM NUMBERS AND PUNCTUATION",
   2342                              "CUNEIFORMNUMBERSANDPUNCTUATION");
   2343 
   2344         /**
   2345          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
   2346          * @since 1.7
   2347          */
   2348         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
   2349             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
   2350                              "EGYPTIAN HIEROGLYPHS",
   2351                              "EGYPTIANHIEROGLYPHS");
   2352 
   2353         /**
   2354          * Constant for the "Bamum Supplement" Unicode character block.
   2355          * @since 1.7
   2356          */
   2357         public static final UnicodeBlock BAMUM_SUPPLEMENT =
   2358             new UnicodeBlock("BAMUM_SUPPLEMENT",
   2359                              "BAMUM SUPPLEMENT",
   2360                              "BAMUMSUPPLEMENT");
   2361 
   2362         /**
   2363          * Constant for the "Kana Supplement" Unicode character block.
   2364          * @since 1.7
   2365          */
   2366         public static final UnicodeBlock KANA_SUPPLEMENT =
   2367             new UnicodeBlock("KANA_SUPPLEMENT",
   2368                              "KANA SUPPLEMENT",
   2369                              "KANASUPPLEMENT");
   2370 
   2371         /**
   2372          * Constant for the "Ancient Greek Musical Notation" Unicode character
   2373          * block.
   2374          * @since 1.7
   2375          */
   2376         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
   2377             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
   2378                              "ANCIENT GREEK MUSICAL NOTATION",
   2379                              "ANCIENTGREEKMUSICALNOTATION");
   2380 
   2381         /**
   2382          * Constant for the "Counting Rod Numerals" Unicode character block.
   2383          * @since 1.7
   2384          */
   2385         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
   2386             new UnicodeBlock("COUNTING_ROD_NUMERALS",
   2387                              "COUNTING ROD NUMERALS",
   2388                              "COUNTINGRODNUMERALS");
   2389 
   2390         /**
   2391          * Constant for the "Mahjong Tiles" Unicode character block.
   2392          * @since 1.7
   2393          */
   2394         public static final UnicodeBlock MAHJONG_TILES =
   2395             new UnicodeBlock("MAHJONG_TILES",
   2396                              "MAHJONG TILES",
   2397                              "MAHJONGTILES");
   2398 
   2399         /**
   2400          * Constant for the "Domino Tiles" Unicode character block.
   2401          * @since 1.7
   2402          */
   2403         public static final UnicodeBlock DOMINO_TILES =
   2404             new UnicodeBlock("DOMINO_TILES",
   2405                              "DOMINO TILES",
   2406                              "DOMINOTILES");
   2407 
   2408         /**
   2409          * Constant for the "Playing Cards" Unicode character block.
   2410          * @since 1.7
   2411          */
   2412         public static final UnicodeBlock PLAYING_CARDS =
   2413             new UnicodeBlock("PLAYING_CARDS",
   2414                              "PLAYING CARDS",
   2415                              "PLAYINGCARDS");
   2416 
   2417         /**
   2418          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
   2419          * block.
   2420          * @since 1.7
   2421          */
   2422         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
   2423             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
   2424                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
   2425                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
   2426 
   2427         /**
   2428          * Constant for the "Enclosed Ideographic Supplement" Unicode character
   2429          * block.
   2430          * @since 1.7
   2431          */
   2432         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
   2433             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
   2434                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
   2435                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
   2436 
   2437         /**
   2438          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
   2439          * character block.
   2440          * @since 1.7
   2441          */
   2442         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
   2443             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
   2444                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
   2445                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
   2446 
   2447         /**
   2448          * Constant for the "Emoticons" Unicode character block.
   2449          * @since 1.7
   2450          */
   2451         public static final UnicodeBlock EMOTICONS =
   2452             new UnicodeBlock("EMOTICONS");
   2453 
   2454         /**
   2455          * Constant for the "Transport And Map Symbols" Unicode character block.
   2456          * @since 1.7
   2457          */
   2458         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
   2459             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
   2460                              "TRANSPORT AND MAP SYMBOLS",
   2461                              "TRANSPORTANDMAPSYMBOLS");
   2462 
   2463         /**
   2464          * Constant for the "Alchemical Symbols" Unicode character block.
   2465          * @since 1.7
   2466          */
   2467         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
   2468             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
   2469                              "ALCHEMICAL SYMBOLS",
   2470                              "ALCHEMICALSYMBOLS");
   2471 
   2472         /**
   2473          * Constant for the "CJK Unified Ideographs Extension C" Unicode
   2474          * character block.
   2475          * @since 1.7
   2476          */
   2477         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
   2478             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
   2479                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
   2480                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
   2481 
   2482         /**
   2483          * Constant for the "CJK Unified Ideographs Extension D" Unicode
   2484          * character block.
   2485          * @since 1.7
   2486          */
   2487         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
   2488             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
   2489                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
   2490                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
   2491 
   2492         /**
   2493          * Constant for the "Arabic Extended-A" Unicode character block.
   2494          * @since 1.8
   2495          */
   2496         public static final UnicodeBlock ARABIC_EXTENDED_A =
   2497             new UnicodeBlock("ARABIC_EXTENDED_A",
   2498                              "ARABIC EXTENDED-A",
   2499                              "ARABICEXTENDED-A");
   2500 
   2501         /**
   2502          * Constant for the "Sundanese Supplement" Unicode character block.
   2503          * @since 1.8
   2504          */
   2505         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
   2506             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
   2507                              "SUNDANESE SUPPLEMENT",
   2508                              "SUNDANESESUPPLEMENT");
   2509 
   2510         /**
   2511          * Constant for the "Meetei Mayek Extensions" Unicode character block.
   2512          * @since 1.8
   2513          */
   2514         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
   2515             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
   2516                              "MEETEI MAYEK EXTENSIONS",
   2517                              "MEETEIMAYEKEXTENSIONS");
   2518 
   2519         /**
   2520          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
   2521          * @since 1.8
   2522          */
   2523         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
   2524             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
   2525                              "MEROITIC HIEROGLYPHS",
   2526                              "MEROITICHIEROGLYPHS");
   2527 
   2528         /**
   2529          * Constant for the "Meroitic Cursive" Unicode character block.
   2530          * @since 1.8
   2531          */
   2532         public static final UnicodeBlock MEROITIC_CURSIVE =
   2533             new UnicodeBlock("MEROITIC_CURSIVE",
   2534                              "MEROITIC CURSIVE",
   2535                              "MEROITICCURSIVE");
   2536 
   2537         /**
   2538          * Constant for the "Sora Sompeng" Unicode character block.
   2539          * @since 1.8
   2540          */
   2541         public static final UnicodeBlock SORA_SOMPENG =
   2542             new UnicodeBlock("SORA_SOMPENG",
   2543                              "SORA SOMPENG",
   2544                              "SORASOMPENG");
   2545 
   2546         /**
   2547          * Constant for the "Chakma" Unicode character block.
   2548          * @since 1.8
   2549          */
   2550         public static final UnicodeBlock CHAKMA =
   2551             new UnicodeBlock("CHAKMA");
   2552 
   2553         /**
   2554          * Constant for the "Sharada" Unicode character block.
   2555          * @since 1.8
   2556          */
   2557         public static final UnicodeBlock SHARADA =
   2558             new UnicodeBlock("SHARADA");
   2559 
   2560         /**
   2561          * Constant for the "Takri" Unicode character block.
   2562          * @since 1.8
   2563          */
   2564         public static final UnicodeBlock TAKRI =
   2565             new UnicodeBlock("TAKRI");
   2566 
   2567         /**
   2568          * Constant for the "Miao" Unicode character block.
   2569          * @since 1.8
   2570          */
   2571         public static final UnicodeBlock MIAO =
   2572             new UnicodeBlock("MIAO");
   2573 
   2574         /**
   2575          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
   2576          * character block.
   2577          * @since 1.8
   2578          */
   2579         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
   2580             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
   2581                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
   2582                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
   2583 
   2584         private static final int blockStarts[] = {
   2585             0x0000,   // 0000..007F; Basic Latin
   2586             0x0080,   // 0080..00FF; Latin-1 Supplement
   2587             0x0100,   // 0100..017F; Latin Extended-A
   2588             0x0180,   // 0180..024F; Latin Extended-B
   2589             0x0250,   // 0250..02AF; IPA Extensions
   2590             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
   2591             0x0300,   // 0300..036F; Combining Diacritical Marks
   2592             0x0370,   // 0370..03FF; Greek and Coptic
   2593             0x0400,   // 0400..04FF; Cyrillic
   2594             0x0500,   // 0500..052F; Cyrillic Supplement
   2595             0x0530,   // 0530..058F; Armenian
   2596             0x0590,   // 0590..05FF; Hebrew
   2597             0x0600,   // 0600..06FF; Arabic
   2598             0x0700,   // 0700..074F; Syriac
   2599             0x0750,   // 0750..077F; Arabic Supplement
   2600             0x0780,   // 0780..07BF; Thaana
   2601             0x07C0,   // 07C0..07FF; NKo
   2602             0x0800,   // 0800..083F; Samaritan
   2603             0x0840,   // 0840..085F; Mandaic
   2604             0x0860,   //             unassigned
   2605             0x08A0,   // 08A0..08FF; Arabic Extended-A
   2606             0x0900,   // 0900..097F; Devanagari
   2607             0x0980,   // 0980..09FF; Bengali
   2608             0x0A00,   // 0A00..0A7F; Gurmukhi
   2609             0x0A80,   // 0A80..0AFF; Gujarati
   2610             0x0B00,   // 0B00..0B7F; Oriya
   2611             0x0B80,   // 0B80..0BFF; Tamil
   2612             0x0C00,   // 0C00..0C7F; Telugu
   2613             0x0C80,   // 0C80..0CFF; Kannada
   2614             0x0D00,   // 0D00..0D7F; Malayalam
   2615             0x0D80,   // 0D80..0DFF; Sinhala
   2616             0x0E00,   // 0E00..0E7F; Thai
   2617             0x0E80,   // 0E80..0EFF; Lao
   2618             0x0F00,   // 0F00..0FFF; Tibetan
   2619             0x1000,   // 1000..109F; Myanmar
   2620             0x10A0,   // 10A0..10FF; Georgian
   2621             0x1100,   // 1100..11FF; Hangul Jamo
   2622             0x1200,   // 1200..137F; Ethiopic
   2623             0x1380,   // 1380..139F; Ethiopic Supplement
   2624             0x13A0,   // 13A0..13FF; Cherokee
   2625             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
   2626             0x1680,   // 1680..169F; Ogham
   2627             0x16A0,   // 16A0..16FF; Runic
   2628             0x1700,   // 1700..171F; Tagalog
   2629             0x1720,   // 1720..173F; Hanunoo
   2630             0x1740,   // 1740..175F; Buhid
   2631             0x1760,   // 1760..177F; Tagbanwa
   2632             0x1780,   // 1780..17FF; Khmer
   2633             0x1800,   // 1800..18AF; Mongolian
   2634             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
   2635             0x1900,   // 1900..194F; Limbu
   2636             0x1950,   // 1950..197F; Tai Le
   2637             0x1980,   // 1980..19DF; New Tai Lue
   2638             0x19E0,   // 19E0..19FF; Khmer Symbols
   2639             0x1A00,   // 1A00..1A1F; Buginese
   2640             0x1A20,   // 1A20..1AAF; Tai Tham
   2641             0x1AB0,   //             unassigned
   2642             0x1B00,   // 1B00..1B7F; Balinese
   2643             0x1B80,   // 1B80..1BBF; Sundanese
   2644             0x1BC0,   // 1BC0..1BFF; Batak
   2645             0x1C00,   // 1C00..1C4F; Lepcha
   2646             0x1C50,   // 1C50..1C7F; Ol Chiki
   2647             0x1C80,   //             unassigned
   2648             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
   2649             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
   2650             0x1D00,   // 1D00..1D7F; Phonetic Extensions
   2651             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
   2652             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
   2653             0x1E00,   // 1E00..1EFF; Latin Extended Additional
   2654             0x1F00,   // 1F00..1FFF; Greek Extended
   2655             0x2000,   // 2000..206F; General Punctuation
   2656             0x2070,   // 2070..209F; Superscripts and Subscripts
   2657             0x20A0,   // 20A0..20CF; Currency Symbols
   2658             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
   2659             0x2100,   // 2100..214F; Letterlike Symbols
   2660             0x2150,   // 2150..218F; Number Forms
   2661             0x2190,   // 2190..21FF; Arrows
   2662             0x2200,   // 2200..22FF; Mathematical Operators
   2663             0x2300,   // 2300..23FF; Miscellaneous Technical
   2664             0x2400,   // 2400..243F; Control Pictures
   2665             0x2440,   // 2440..245F; Optical Character Recognition
   2666             0x2460,   // 2460..24FF; Enclosed Alphanumerics
   2667             0x2500,   // 2500..257F; Box Drawing
   2668             0x2580,   // 2580..259F; Block Elements
   2669             0x25A0,   // 25A0..25FF; Geometric Shapes
   2670             0x2600,   // 2600..26FF; Miscellaneous Symbols
   2671             0x2700,   // 2700..27BF; Dingbats
   2672             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
   2673             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
   2674             0x2800,   // 2800..28FF; Braille Patterns
   2675             0x2900,   // 2900..297F; Supplemental Arrows-B
   2676             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
   2677             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
   2678             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
   2679             0x2C00,   // 2C00..2C5F; Glagolitic
   2680             0x2C60,   // 2C60..2C7F; Latin Extended-C
   2681             0x2C80,   // 2C80..2CFF; Coptic
   2682             0x2D00,   // 2D00..2D2F; Georgian Supplement
   2683             0x2D30,   // 2D30..2D7F; Tifinagh
   2684             0x2D80,   // 2D80..2DDF; Ethiopic Extended
   2685             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
   2686             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
   2687             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
   2688             0x2F00,   // 2F00..2FDF; Kangxi Radicals
   2689             0x2FE0,   //             unassigned
   2690             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
   2691             0x3000,   // 3000..303F; CJK Symbols and Punctuation
   2692             0x3040,   // 3040..309F; Hiragana
   2693             0x30A0,   // 30A0..30FF; Katakana
   2694             0x3100,   // 3100..312F; Bopomofo
   2695             0x3130,   // 3130..318F; Hangul Compatibility Jamo
   2696             0x3190,   // 3190..319F; Kanbun
   2697             0x31A0,   // 31A0..31BF; Bopomofo Extended
   2698             0x31C0,   // 31C0..31EF; CJK Strokes
   2699             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
   2700             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
   2701             0x3300,   // 3300..33FF; CJK Compatibility
   2702             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
   2703             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
   2704             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
   2705             0xA000,   // A000..A48F; Yi Syllables
   2706             0xA490,   // A490..A4CF; Yi Radicals
   2707             0xA4D0,   // A4D0..A4FF; Lisu
   2708             0xA500,   // A500..A63F; Vai
   2709             0xA640,   // A640..A69F; Cyrillic Extended-B
   2710             0xA6A0,   // A6A0..A6FF; Bamum
   2711             0xA700,   // A700..A71F; Modifier Tone Letters
   2712             0xA720,   // A720..A7FF; Latin Extended-D
   2713             0xA800,   // A800..A82F; Syloti Nagri
   2714             0xA830,   // A830..A83F; Common Indic Number Forms
   2715             0xA840,   // A840..A87F; Phags-pa
   2716             0xA880,   // A880..A8DF; Saurashtra
   2717             0xA8E0,   // A8E0..A8FF; Devanagari Extended
   2718             0xA900,   // A900..A92F; Kayah Li
   2719             0xA930,   // A930..A95F; Rejang
   2720             0xA960,   // A960..A97F; Hangul Jamo Extended-A
   2721             0xA980,   // A980..A9DF; Javanese
   2722             0xA9E0,   //             unassigned
   2723             0xAA00,   // AA00..AA5F; Cham
   2724             0xAA60,   // AA60..AA7F; Myanmar Extended-A
   2725             0xAA80,   // AA80..AADF; Tai Viet
   2726             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
   2727             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
   2728             0xAB30,   //             unassigned
   2729             0xABC0,   // ABC0..ABFF; Meetei Mayek
   2730             0xAC00,   // AC00..D7AF; Hangul Syllables
   2731             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
   2732             0xD800,   // D800..DB7F; High Surrogates
   2733             0xDB80,   // DB80..DBFF; High Private Use Surrogates
   2734             0xDC00,   // DC00..DFFF; Low Surrogates
   2735             0xE000,   // E000..F8FF; Private Use Area
   2736             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
   2737             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
   2738             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
   2739             0xFE00,   // FE00..FE0F; Variation Selectors
   2740             0xFE10,   // FE10..FE1F; Vertical Forms
   2741             0xFE20,   // FE20..FE2F; Combining Half Marks
   2742             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
   2743             0xFE50,   // FE50..FE6F; Small Form Variants
   2744             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
   2745             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
   2746             0xFFF0,   // FFF0..FFFF; Specials
   2747             0x10000,  // 10000..1007F; Linear B Syllabary
   2748             0x10080,  // 10080..100FF; Linear B Ideograms
   2749             0x10100,  // 10100..1013F; Aegean Numbers
   2750             0x10140,  // 10140..1018F; Ancient Greek Numbers
   2751             0x10190,  // 10190..101CF; Ancient Symbols
   2752             0x101D0,  // 101D0..101FF; Phaistos Disc
   2753             0x10200,  //               unassigned
   2754             0x10280,  // 10280..1029F; Lycian
   2755             0x102A0,  // 102A0..102DF; Carian
   2756             0x102E0,  //               unassigned
   2757             0x10300,  // 10300..1032F; Old Italic
   2758             0x10330,  // 10330..1034F; Gothic
   2759             0x10350,  //               unassigned
   2760             0x10380,  // 10380..1039F; Ugaritic
   2761             0x103A0,  // 103A0..103DF; Old Persian
   2762             0x103E0,  //               unassigned
   2763             0x10400,  // 10400..1044F; Deseret
   2764             0x10450,  // 10450..1047F; Shavian
   2765             0x10480,  // 10480..104AF; Osmanya
   2766             0x104B0,  //               unassigned
   2767             0x10800,  // 10800..1083F; Cypriot Syllabary
   2768             0x10840,  // 10840..1085F; Imperial Aramaic
   2769             0x10860,  //               unassigned
   2770             0x10900,  // 10900..1091F; Phoenician
   2771             0x10920,  // 10920..1093F; Lydian
   2772             0x10940,  //               unassigned
   2773             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
   2774             0x109A0,  // 109A0..109FF; Meroitic Cursive
   2775             0x10A00,  // 10A00..10A5F; Kharoshthi
   2776             0x10A60,  // 10A60..10A7F; Old South Arabian
   2777             0x10A80,  //               unassigned
   2778             0x10B00,  // 10B00..10B3F; Avestan
   2779             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
   2780             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
   2781             0x10B80,  //               unassigned
   2782             0x10C00,  // 10C00..10C4F; Old Turkic
   2783             0x10C50,  //               unassigned
   2784             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
   2785             0x10E80,  //               unassigned
   2786             0x11000,  // 11000..1107F; Brahmi
   2787             0x11080,  // 11080..110CF; Kaithi
   2788             0x110D0,  // 110D0..110FF; Sora Sompeng
   2789             0x11100,  // 11100..1114F; Chakma
   2790             0x11150,  //               unassigned
   2791             0x11180,  // 11180..111DF; Sharada
   2792             0x111E0,  //               unassigned
   2793             0x11680,  // 11680..116CF; Takri
   2794             0x116D0,  //               unassigned
   2795             0x12000,  // 12000..123FF; Cuneiform
   2796             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
   2797             0x12480,  //               unassigned
   2798             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
   2799             0x13430,  //               unassigned
   2800             0x16800,  // 16800..16A3F; Bamum Supplement
   2801             0x16A40,  //               unassigned
   2802             0x16F00,  // 16F00..16F9F; Miao
   2803             0x16FA0,  //               unassigned
   2804             0x1B000,  // 1B000..1B0FF; Kana Supplement
   2805             0x1B100,  //               unassigned
   2806             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
   2807             0x1D100,  // 1D100..1D1FF; Musical Symbols
   2808             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
   2809             0x1D250,  //               unassigned
   2810             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
   2811             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
   2812             0x1D380,  //               unassigned
   2813             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
   2814             0x1D800,  //               unassigned
   2815             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
   2816             0x1EF00,  //               unassigned
   2817             0x1F000,  // 1F000..1F02F; Mahjong Tiles
   2818             0x1F030,  // 1F030..1F09F; Domino Tiles
   2819             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
   2820             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
   2821             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
   2822             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
   2823             0x1F600,  // 1F600..1F64F; Emoticons
   2824             0x1F650,  //               unassigned
   2825             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
   2826             0x1F700,  // 1F700..1F77F; Alchemical Symbols
   2827             0x1F780,  //               unassigned
   2828             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
   2829             0x2A6E0,  //               unassigned
   2830             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
   2831             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
   2832             0x2B820,  //               unassigned
   2833             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
   2834             0x2FA20,  //               unassigned
   2835             0xE0000,  // E0000..E007F; Tags
   2836             0xE0080,  //               unassigned
   2837             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
   2838             0xE01F0,  //               unassigned
   2839             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
   2840             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
   2841         };
   2842 
   2843         private static final UnicodeBlock[] blocks = {
   2844             BASIC_LATIN,
   2845             LATIN_1_SUPPLEMENT,
   2846             LATIN_EXTENDED_A,
   2847             LATIN_EXTENDED_B,
   2848             IPA_EXTENSIONS,
   2849             SPACING_MODIFIER_LETTERS,
   2850             COMBINING_DIACRITICAL_MARKS,
   2851             GREEK,
   2852             CYRILLIC,
   2853             CYRILLIC_SUPPLEMENTARY,
   2854             ARMENIAN,
   2855             HEBREW,
   2856             ARABIC,
   2857             SYRIAC,
   2858             ARABIC_SUPPLEMENT,
   2859             THAANA,
   2860             NKO,
   2861             SAMARITAN,
   2862             MANDAIC,
   2863             null,
   2864             ARABIC_EXTENDED_A,
   2865             DEVANAGARI,
   2866             BENGALI,
   2867             GURMUKHI,
   2868             GUJARATI,
   2869             ORIYA,
   2870             TAMIL,
   2871             TELUGU,
   2872             KANNADA,
   2873             MALAYALAM,
   2874             SINHALA,
   2875             THAI,
   2876             LAO,
   2877             TIBETAN,
   2878             MYANMAR,
   2879             GEORGIAN,
   2880             HANGUL_JAMO,
   2881             ETHIOPIC,
   2882             ETHIOPIC_SUPPLEMENT,
   2883             CHEROKEE,
   2884             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
   2885             OGHAM,
   2886             RUNIC,
   2887             TAGALOG,
   2888             HANUNOO,
   2889             BUHID,
   2890             TAGBANWA,
   2891             KHMER,
   2892             MONGOLIAN,
   2893             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
   2894             LIMBU,
   2895             TAI_LE,
   2896             NEW_TAI_LUE,
   2897             KHMER_SYMBOLS,
   2898             BUGINESE,
   2899             TAI_THAM,
   2900             null,
   2901             BALINESE,
   2902             SUNDANESE,
   2903             BATAK,
   2904             LEPCHA,
   2905             OL_CHIKI,
   2906             null,
   2907             SUNDANESE_SUPPLEMENT,
   2908             VEDIC_EXTENSIONS,
   2909             PHONETIC_EXTENSIONS,
   2910             PHONETIC_EXTENSIONS_SUPPLEMENT,
   2911             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
   2912             LATIN_EXTENDED_ADDITIONAL,
   2913             GREEK_EXTENDED,
   2914             GENERAL_PUNCTUATION,
   2915             SUPERSCRIPTS_AND_SUBSCRIPTS,
   2916             CURRENCY_SYMBOLS,
   2917             COMBINING_MARKS_FOR_SYMBOLS,
   2918             LETTERLIKE_SYMBOLS,
   2919             NUMBER_FORMS,
   2920             ARROWS,
   2921             MATHEMATICAL_OPERATORS,
   2922             MISCELLANEOUS_TECHNICAL,
   2923             CONTROL_PICTURES,
   2924             OPTICAL_CHARACTER_RECOGNITION,
   2925             ENCLOSED_ALPHANUMERICS,
   2926             BOX_DRAWING,
   2927             BLOCK_ELEMENTS,
   2928             GEOMETRIC_SHAPES,
   2929             MISCELLANEOUS_SYMBOLS,
   2930             DINGBATS,
   2931             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
   2932             SUPPLEMENTAL_ARROWS_A,
   2933             BRAILLE_PATTERNS,
   2934             SUPPLEMENTAL_ARROWS_B,
   2935             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
   2936             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
   2937             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
   2938             GLAGOLITIC,
   2939             LATIN_EXTENDED_C,
   2940             COPTIC,
   2941             GEORGIAN_SUPPLEMENT,
   2942             TIFINAGH,
   2943             ETHIOPIC_EXTENDED,
   2944             CYRILLIC_EXTENDED_A,
   2945             SUPPLEMENTAL_PUNCTUATION,
   2946             CJK_RADICALS_SUPPLEMENT,
   2947             KANGXI_RADICALS,
   2948             null,
   2949             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
   2950             CJK_SYMBOLS_AND_PUNCTUATION,
   2951             HIRAGANA,
   2952             KATAKANA,
   2953             BOPOMOFO,
   2954             HANGUL_COMPATIBILITY_JAMO,
   2955             KANBUN,
   2956             BOPOMOFO_EXTENDED,
   2957             CJK_STROKES,
   2958             KATAKANA_PHONETIC_EXTENSIONS,
   2959             ENCLOSED_CJK_LETTERS_AND_MONTHS,
   2960             CJK_COMPATIBILITY,
   2961             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
   2962             YIJING_HEXAGRAM_SYMBOLS,
   2963             CJK_UNIFIED_IDEOGRAPHS,
   2964             YI_SYLLABLES,
   2965             YI_RADICALS,
   2966             LISU,
   2967             VAI,
   2968             CYRILLIC_EXTENDED_B,
   2969             BAMUM,
   2970             MODIFIER_TONE_LETTERS,
   2971             LATIN_EXTENDED_D,
   2972             SYLOTI_NAGRI,
   2973             COMMON_INDIC_NUMBER_FORMS,
   2974             PHAGS_PA,
   2975             SAURASHTRA,
   2976             DEVANAGARI_EXTENDED,
   2977             KAYAH_LI,
   2978             REJANG,
   2979             HANGUL_JAMO_EXTENDED_A,
   2980             JAVANESE,
   2981             null,
   2982             CHAM,
   2983             MYANMAR_EXTENDED_A,
   2984             TAI_VIET,
   2985             MEETEI_MAYEK_EXTENSIONS,
   2986             ETHIOPIC_EXTENDED_A,
   2987             null,
   2988             MEETEI_MAYEK,
   2989             HANGUL_SYLLABLES,
   2990             HANGUL_JAMO_EXTENDED_B,
   2991             HIGH_SURROGATES,
   2992             HIGH_PRIVATE_USE_SURROGATES,
   2993             LOW_SURROGATES,
   2994             PRIVATE_USE_AREA,
   2995             CJK_COMPATIBILITY_IDEOGRAPHS,
   2996             ALPHABETIC_PRESENTATION_FORMS,
   2997             ARABIC_PRESENTATION_FORMS_A,
   2998             VARIATION_SELECTORS,
   2999             VERTICAL_FORMS,
   3000             COMBINING_HALF_MARKS,
   3001             CJK_COMPATIBILITY_FORMS,
   3002             SMALL_FORM_VARIANTS,
   3003             ARABIC_PRESENTATION_FORMS_B,
   3004             HALFWIDTH_AND_FULLWIDTH_FORMS,
   3005             SPECIALS,
   3006             LINEAR_B_SYLLABARY,
   3007             LINEAR_B_IDEOGRAMS,
   3008             AEGEAN_NUMBERS,
   3009             ANCIENT_GREEK_NUMBERS,
   3010             ANCIENT_SYMBOLS,
   3011             PHAISTOS_DISC,
   3012             null,
   3013             LYCIAN,
   3014             CARIAN,
   3015             null,
   3016             OLD_ITALIC,
   3017             GOTHIC,
   3018             null,
   3019             UGARITIC,
   3020             OLD_PERSIAN,
   3021             null,
   3022             DESERET,
   3023             SHAVIAN,
   3024             OSMANYA,
   3025             null,
   3026             CYPRIOT_SYLLABARY,
   3027             IMPERIAL_ARAMAIC,
   3028             null,
   3029             PHOENICIAN,
   3030             LYDIAN,
   3031             null,
   3032             MEROITIC_HIEROGLYPHS,
   3033             MEROITIC_CURSIVE,
   3034             KHAROSHTHI,
   3035             OLD_SOUTH_ARABIAN,
   3036             null,
   3037             AVESTAN,
   3038             INSCRIPTIONAL_PARTHIAN,
   3039             INSCRIPTIONAL_PAHLAVI,
   3040             null,
   3041             OLD_TURKIC,
   3042             null,
   3043             RUMI_NUMERAL_SYMBOLS,
   3044             null,
   3045             BRAHMI,
   3046             KAITHI,
   3047             SORA_SOMPENG,
   3048             CHAKMA,
   3049             null,
   3050             SHARADA,
   3051             null,
   3052             TAKRI,
   3053             null,
   3054             CUNEIFORM,
   3055             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
   3056             null,
   3057             EGYPTIAN_HIEROGLYPHS,
   3058             null,
   3059             BAMUM_SUPPLEMENT,
   3060             null,
   3061             MIAO,
   3062             null,
   3063             KANA_SUPPLEMENT,
   3064             null,
   3065             BYZANTINE_MUSICAL_SYMBOLS,
   3066             MUSICAL_SYMBOLS,
   3067             ANCIENT_GREEK_MUSICAL_NOTATION,
   3068             null,
   3069             TAI_XUAN_JING_SYMBOLS,
   3070             COUNTING_ROD_NUMERALS,
   3071             null,
   3072             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
   3073             null,
   3074             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
   3075             null,
   3076             MAHJONG_TILES,
   3077             DOMINO_TILES,
   3078             PLAYING_CARDS,
   3079             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
   3080             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
   3081             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
   3082             EMOTICONS,
   3083             null,
   3084             TRANSPORT_AND_MAP_SYMBOLS,
   3085             ALCHEMICAL_SYMBOLS,
   3086             null,
   3087             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
   3088             null,
   3089             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
   3090             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
   3091             null,
   3092             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
   3093             null,
   3094             TAGS,
   3095             null,
   3096             VARIATION_SELECTORS_SUPPLEMENT,
   3097             null,
   3098             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
   3099             SUPPLEMENTARY_PRIVATE_USE_AREA_B
   3100         };
   3101 
   3102 
   3103         /**
   3104          * Returns the object representing the Unicode block containing the
   3105          * given character, or {@code null} if the character is not a
   3106          * member of a defined block.
   3107          *
   3108          * <p><b>Note:</b> This method cannot handle
   3109          * <a href="Character.html#supplementary"> supplementary
   3110          * characters</a>.  To support all Unicode characters, including
   3111          * supplementary characters, use the {@link #of(int)} method.
   3112          *
   3113          * @param   c  The character in question
   3114          * @return  The {@code UnicodeBlock} instance representing the
   3115          *          Unicode block of which this character is a member, or
   3116          *          {@code null} if the character is not a member of any
   3117          *          Unicode block
   3118          */
   3119         public static UnicodeBlock of(char c) {
   3120             return of((int)c);
   3121         }
   3122 
   3123         /**
   3124          * Returns the object representing the Unicode block
   3125          * containing the given character (Unicode code point), or
   3126          * {@code null} if the character is not a member of a
   3127          * defined block.
   3128          *
   3129          * @param   codePoint the character (Unicode code point) in question.
   3130          * @return  The {@code UnicodeBlock} instance representing the
   3131          *          Unicode block of which this character is a member, or
   3132          *          {@code null} if the character is not a member of any
   3133          *          Unicode block
   3134          * @exception IllegalArgumentException if the specified
   3135          * {@code codePoint} is an invalid Unicode code point.
   3136          * @see Character#isValidCodePoint(int)
   3137          * @since   1.5
   3138          */
   3139         public static UnicodeBlock of(int codePoint) {
   3140             if (!isValidCodePoint(codePoint)) {
   3141                 throw new IllegalArgumentException();
   3142             }
   3143 
   3144             int top, bottom, current;
   3145             bottom = 0;
   3146             top = blockStarts.length;
   3147             current = top/2;
   3148 
   3149             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
   3150             while (top - bottom > 1) {
   3151                 if (codePoint >= blockStarts[current]) {
   3152                     bottom = current;
   3153                 } else {
   3154                     top = current;
   3155                 }
   3156                 current = (top + bottom) / 2;
   3157             }
   3158             return blocks[current];
   3159         }
   3160 
   3161         /**
   3162          * Returns the UnicodeBlock with the given name. Block
   3163          * names are determined by The Unicode Standard. The file
   3164          * Blocks-&lt;version&gt;.txt defines blocks for a particular
   3165          * version of the standard. The {@link Character} class specifies
   3166          * the version of the standard that it supports.
   3167          * <p>
   3168          * This method accepts block names in the following forms:
   3169          * <ol>
   3170          * <li> Canonical block names as defined by the Unicode Standard.
   3171          * For example, the standard defines a "Basic Latin" block. Therefore, this
   3172          * method accepts "Basic Latin" as a valid block name. The documentation of
   3173          * each UnicodeBlock provides the canonical name.
   3174          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
   3175          * is a valid block name for the "Basic Latin" block.
   3176          * <li>The text representation of each constant UnicodeBlock identifier.
   3177          * For example, this method will return the {@link #BASIC_LATIN} block if
   3178          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
   3179          * hyphens in the canonical name with underscores.
   3180          * </ol>
   3181          * Finally, character case is ignored for all of the valid block name forms.
   3182          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
   3183          * The en_US locale's case mapping rules are used to provide case-insensitive
   3184          * string comparisons for block name validation.
   3185          * <p>
   3186          * If the Unicode Standard changes block names, both the previous and
   3187          * current names will be accepted.
   3188          *
   3189          * @param blockName A {@code UnicodeBlock} name.
   3190          * @return The {@code UnicodeBlock} instance identified
   3191          *         by {@code blockName}
   3192          * @throws IllegalArgumentException if {@code blockName} is an
   3193          *         invalid name
   3194          * @throws NullPointerException if {@code blockName} is null
   3195          * @since 1.5
   3196          */
   3197         public static final UnicodeBlock forName(String blockName) {
   3198             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
   3199             if (block == null) {
   3200                 throw new IllegalArgumentException();
   3201             }
   3202             return block;
   3203         }
   3204     }
   3205 
   3206 
   3207     /**
   3208      * A family of character subsets representing the character scripts
   3209      * defined in the <a href="http://www.unicode.org/reports/tr24/">
   3210      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
   3211      * character is assigned to a single Unicode script, either a specific
   3212      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
   3213      * one of the following three special values,
   3214      * {@link Character.UnicodeScript#INHERITED Inherited},
   3215      * {@link Character.UnicodeScript#COMMON Common} or
   3216      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
   3217      *
   3218      * @since 1.7
   3219      */
   3220     public static enum UnicodeScript {
   3221         /**
   3222          * Unicode script "Common".
   3223          */
   3224         COMMON,
   3225 
   3226         /**
   3227          * Unicode script "Latin".
   3228          */
   3229         LATIN,
   3230 
   3231         /**
   3232          * Unicode script "Greek".
   3233          */
   3234         GREEK,
   3235 
   3236         /**
   3237          * Unicode script "Cyrillic".
   3238          */
   3239         CYRILLIC,
   3240 
   3241         /**
   3242          * Unicode script "Armenian".
   3243          */
   3244         ARMENIAN,
   3245 
   3246         /**
   3247          * Unicode script "Hebrew".
   3248          */
   3249         HEBREW,
   3250 
   3251         /**
   3252          * Unicode script "Arabic".
   3253          */
   3254         ARABIC,
   3255 
   3256         /**
   3257          * Unicode script "Syriac".
   3258          */
   3259         SYRIAC,
   3260 
   3261         /**
   3262          * Unicode script "Thaana".
   3263          */
   3264         THAANA,
   3265 
   3266         /**
   3267          * Unicode script "Devanagari".
   3268          */
   3269         DEVANAGARI,
   3270 
   3271         /**
   3272          * Unicode script "Bengali".
   3273          */
   3274         BENGALI,
   3275 
   3276         /**
   3277          * Unicode script "Gurmukhi".
   3278          */
   3279         GURMUKHI,
   3280 
   3281         /**
   3282          * Unicode script "Gujarati".
   3283          */
   3284         GUJARATI,
   3285 
   3286         /**
   3287          * Unicode script "Oriya".
   3288          */
   3289         ORIYA,
   3290 
   3291         /**
   3292          * Unicode script "Tamil".
   3293          */
   3294         TAMIL,
   3295 
   3296         /**
   3297          * Unicode script "Telugu".
   3298          */
   3299         TELUGU,
   3300 
   3301         /**
   3302          * Unicode script "Kannada".
   3303          */
   3304         KANNADA,
   3305 
   3306         /**
   3307          * Unicode script "Malayalam".
   3308          */
   3309         MALAYALAM,
   3310 
   3311         /**
   3312          * Unicode script "Sinhala".
   3313          */
   3314         SINHALA,
   3315 
   3316         /**
   3317          * Unicode script "Thai".
   3318          */
   3319         THAI,
   3320 
   3321         /**
   3322          * Unicode script "Lao".
   3323          */
   3324         LAO,
   3325 
   3326         /**
   3327          * Unicode script "Tibetan".
   3328          */
   3329         TIBETAN,
   3330 
   3331         /**
   3332          * Unicode script "Myanmar".
   3333          */
   3334         MYANMAR,
   3335 
   3336         /**
   3337          * Unicode script "Georgian".
   3338          */
   3339         GEORGIAN,
   3340 
   3341         /**
   3342          * Unicode script "Hangul".
   3343          */
   3344         HANGUL,
   3345 
   3346         /**
   3347          * Unicode script "Ethiopic".
   3348          */
   3349         ETHIOPIC,
   3350 
   3351         /**
   3352          * Unicode script "Cherokee".
   3353          */
   3354         CHEROKEE,
   3355 
   3356         /**
   3357          * Unicode script "Canadian_Aboriginal".
   3358          */
   3359         CANADIAN_ABORIGINAL,
   3360 
   3361         /**
   3362          * Unicode script "Ogham".
   3363          */
   3364         OGHAM,
   3365 
   3366         /**
   3367          * Unicode script "Runic".
   3368          */
   3369         RUNIC,
   3370 
   3371         /**
   3372          * Unicode script "Khmer".
   3373          */
   3374         KHMER,
   3375 
   3376         /**
   3377          * Unicode script "Mongolian".
   3378          */
   3379         MONGOLIAN,
   3380 
   3381         /**
   3382          * Unicode script "Hiragana".
   3383          */
   3384         HIRAGANA,
   3385 
   3386         /**
   3387          * Unicode script "Katakana".
   3388          */
   3389         KATAKANA,
   3390 
   3391         /**
   3392          * Unicode script "Bopomofo".
   3393          */
   3394         BOPOMOFO,
   3395 
   3396         /**
   3397          * Unicode script "Han".
   3398          */
   3399         HAN,
   3400 
   3401         /**
   3402          * Unicode script "Yi".
   3403          */
   3404         YI,
   3405 
   3406         /**
   3407          * Unicode script "Old_Italic".
   3408          */
   3409         OLD_ITALIC,
   3410 
   3411         /**
   3412          * Unicode script "Gothic".
   3413          */
   3414         GOTHIC,
   3415 
   3416         /**
   3417          * Unicode script "Deseret".
   3418          */
   3419         DESERET,
   3420 
   3421         /**
   3422          * Unicode script "Inherited".
   3423          */
   3424         INHERITED,
   3425 
   3426         /**
   3427          * Unicode script "Tagalog".
   3428          */
   3429         TAGALOG,
   3430 
   3431         /**
   3432          * Unicode script "Hanunoo".
   3433          */
   3434         HANUNOO,
   3435 
   3436         /**
   3437          * Unicode script "Buhid".
   3438          */
   3439         BUHID,
   3440 
   3441         /**
   3442          * Unicode script "Tagbanwa".
   3443          */
   3444         TAGBANWA,
   3445 
   3446         /**
   3447          * Unicode script "Limbu".
   3448          */
   3449         LIMBU,
   3450 
   3451         /**
   3452          * Unicode script "Tai_Le".
   3453          */
   3454         TAI_LE,
   3455 
   3456         /**
   3457          * Unicode script "Linear_B".
   3458          */
   3459         LINEAR_B,
   3460 
   3461         /**
   3462          * Unicode script "Ugaritic".
   3463          */
   3464         UGARITIC,
   3465 
   3466         /**
   3467          * Unicode script "Shavian".
   3468          */
   3469         SHAVIAN,
   3470 
   3471         /**
   3472          * Unicode script "Osmanya".
   3473          */
   3474         OSMANYA,
   3475 
   3476         /**
   3477          * Unicode script "Cypriot".
   3478          */
   3479         CYPRIOT,
   3480 
   3481         /**
   3482          * Unicode script "Braille".
   3483          */
   3484         BRAILLE,
   3485 
   3486         /**
   3487          * Unicode script "Buginese".
   3488          */
   3489         BUGINESE,
   3490 
   3491         /**
   3492          * Unicode script "Coptic".
   3493          */
   3494         COPTIC,
   3495 
   3496         /**
   3497          * Unicode script "New_Tai_Lue".
   3498          */
   3499         NEW_TAI_LUE,
   3500 
   3501         /**
   3502          * Unicode script "Glagolitic".
   3503          */
   3504         GLAGOLITIC,
   3505 
   3506         /**
   3507          * Unicode script "Tifinagh".
   3508          */
   3509         TIFINAGH,
   3510 
   3511         /**
   3512          * Unicode script "Syloti_Nagri".
   3513          */
   3514         SYLOTI_NAGRI,
   3515 
   3516         /**
   3517          * Unicode script "Old_Persian".
   3518          */
   3519         OLD_PERSIAN,
   3520 
   3521         /**
   3522          * Unicode script "Kharoshthi".
   3523          */
   3524         KHAROSHTHI,
   3525 
   3526         /**
   3527          * Unicode script "Balinese".
   3528          */
   3529         BALINESE,
   3530 
   3531         /**
   3532          * Unicode script "Cuneiform".
   3533          */
   3534         CUNEIFORM,
   3535 
   3536         /**
   3537          * Unicode script "Phoenician".
   3538          */
   3539         PHOENICIAN,
   3540 
   3541         /**
   3542          * Unicode script "Phags_Pa".
   3543          */
   3544         PHAGS_PA,
   3545 
   3546         /**
   3547          * Unicode script "Nko".
   3548          */
   3549         NKO,
   3550 
   3551         /**
   3552          * Unicode script "Sundanese".
   3553          */
   3554         SUNDANESE,
   3555 
   3556         /**
   3557          * Unicode script "Batak".
   3558          */
   3559         BATAK,
   3560 
   3561         /**
   3562          * Unicode script "Lepcha".
   3563          */
   3564         LEPCHA,
   3565 
   3566         /**
   3567          * Unicode script "Ol_Chiki".
   3568          */
   3569         OL_CHIKI,
   3570 
   3571         /**
   3572          * Unicode script "Vai".
   3573          */
   3574         VAI,
   3575 
   3576         /**
   3577          * Unicode script "Saurashtra".
   3578          */
   3579         SAURASHTRA,
   3580 
   3581         /**
   3582          * Unicode script "Kayah_Li".
   3583          */
   3584         KAYAH_LI,
   3585 
   3586         /**
   3587          * Unicode script "Rejang".
   3588          */
   3589         REJANG,
   3590 
   3591         /**
   3592          * Unicode script "Lycian".
   3593          */
   3594         LYCIAN,
   3595 
   3596         /**
   3597          * Unicode script "Carian".
   3598          */
   3599         CARIAN,
   3600 
   3601         /**
   3602          * Unicode script "Lydian".
   3603          */
   3604         LYDIAN,
   3605 
   3606         /**
   3607          * Unicode script "Cham".
   3608          */
   3609         CHAM,
   3610 
   3611         /**
   3612          * Unicode script "Tai_Tham".
   3613          */
   3614         TAI_THAM,
   3615 
   3616         /**
   3617          * Unicode script "Tai_Viet".
   3618          */
   3619         TAI_VIET,
   3620 
   3621         /**
   3622          * Unicode script "Avestan".
   3623          */
   3624         AVESTAN,
   3625 
   3626         /**
   3627          * Unicode script "Egyptian_Hieroglyphs".
   3628          */
   3629         EGYPTIAN_HIEROGLYPHS,
   3630 
   3631         /**
   3632          * Unicode script "Samaritan".
   3633          */
   3634         SAMARITAN,
   3635 
   3636         /**
   3637          * Unicode script "Mandaic".
   3638          */
   3639         MANDAIC,
   3640 
   3641         /**
   3642          * Unicode script "Lisu".
   3643          */
   3644         LISU,
   3645 
   3646         /**
   3647          * Unicode script "Bamum".
   3648          */
   3649         BAMUM,
   3650 
   3651         /**
   3652          * Unicode script "Javanese".
   3653          */
   3654         JAVANESE,
   3655 
   3656         /**
   3657          * Unicode script "Meetei_Mayek".
   3658          */
   3659         MEETEI_MAYEK,
   3660 
   3661         /**
   3662          * Unicode script "Imperial_Aramaic".
   3663          */
   3664         IMPERIAL_ARAMAIC,
   3665 
   3666         /**
   3667          * Unicode script "Old_South_Arabian".
   3668          */
   3669         OLD_SOUTH_ARABIAN,
   3670 
   3671         /**
   3672          * Unicode script "Inscriptional_Parthian".
   3673          */
   3674         INSCRIPTIONAL_PARTHIAN,
   3675 
   3676         /**
   3677          * Unicode script "Inscriptional_Pahlavi".
   3678          */
   3679         INSCRIPTIONAL_PAHLAVI,
   3680 
   3681         /**
   3682          * Unicode script "Old_Turkic".
   3683          */
   3684         OLD_TURKIC,
   3685 
   3686         /**
   3687          * Unicode script "Brahmi".
   3688          */
   3689         BRAHMI,
   3690 
   3691         /**
   3692          * Unicode script "Kaithi".
   3693          */
   3694         KAITHI,
   3695 
   3696         /**
   3697          * Unicode script "Meroitic Hieroglyphs".
   3698          */
   3699         MEROITIC_HIEROGLYPHS,
   3700 
   3701         /**
   3702          * Unicode script "Meroitic Cursive".
   3703          */
   3704         MEROITIC_CURSIVE,
   3705 
   3706         /**
   3707          * Unicode script "Sora Sompeng".
   3708          */
   3709         SORA_SOMPENG,
   3710 
   3711         /**
   3712          * Unicode script "Chakma".
   3713          */
   3714         CHAKMA,
   3715 
   3716         /**
   3717          * Unicode script "Sharada".
   3718          */
   3719         SHARADA,
   3720 
   3721         /**
   3722          * Unicode script "Takri".
   3723          */
   3724         TAKRI,
   3725 
   3726         /**
   3727          * Unicode script "Miao".
   3728          */
   3729         MIAO,
   3730 
   3731         /**
   3732          * Unicode script "Unknown".
   3733          */
   3734         UNKNOWN;
   3735 
   3736         private static final int[] scriptStarts = {
   3737             0x0000,   // 0000..0040; COMMON
   3738             0x0041,   // 0041..005A; LATIN
   3739             0x005B,   // 005B..0060; COMMON
   3740             0x0061,   // 0061..007A; LATIN
   3741             0x007B,   // 007B..00A9; COMMON
   3742             0x00AA,   // 00AA..00AA; LATIN
   3743             0x00AB,   // 00AB..00B9; COMMON
   3744             0x00BA,   // 00BA..00BA; LATIN
   3745             0x00BB,   // 00BB..00BF; COMMON
   3746             0x00C0,   // 00C0..00D6; LATIN
   3747             0x00D7,   // 00D7..00D7; COMMON
   3748             0x00D8,   // 00D8..00F6; LATIN
   3749             0x00F7,   // 00F7..00F7; COMMON
   3750             0x00F8,   // 00F8..02B8; LATIN
   3751             0x02B9,   // 02B9..02DF; COMMON
   3752             0x02E0,   // 02E0..02E4; LATIN
   3753             0x02E5,   // 02E5..02E9; COMMON
   3754             0x02EA,   // 02EA..02EB; BOPOMOFO
   3755             0x02EC,   // 02EC..02FF; COMMON
   3756             0x0300,   // 0300..036F; INHERITED
   3757             0x0370,   // 0370..0373; GREEK
   3758             0x0374,   // 0374..0374; COMMON
   3759             0x0375,   // 0375..037D; GREEK
   3760             0x037E,   // 037E..0383; COMMON
   3761             0x0384,   // 0384..0384; GREEK
   3762             0x0385,   // 0385..0385; COMMON
   3763             0x0386,   // 0386..0386; GREEK
   3764             0x0387,   // 0387..0387; COMMON
   3765             0x0388,   // 0388..03E1; GREEK
   3766             0x03E2,   // 03E2..03EF; COPTIC
   3767             0x03F0,   // 03F0..03FF; GREEK
   3768             0x0400,   // 0400..0484; CYRILLIC
   3769             0x0485,   // 0485..0486; INHERITED
   3770             0x0487,   // 0487..0530; CYRILLIC
   3771             0x0531,   // 0531..0588; ARMENIAN
   3772             0x0589,   // 0589..0589; COMMON
   3773             0x058A,   // 058A..0590; ARMENIAN
   3774             0x0591,   // 0591..05FF; HEBREW
   3775             0x0600,   // 0600..060B; ARABIC
   3776             0x060C,   // 060C..060C; COMMON
   3777             0x060D,   // 060D..061A; ARABIC
   3778             0x061B,   // 061B..061D; COMMON
   3779             0x061E,   // 061E..061E; ARABIC
   3780             0x061F,   // 061F..061F; COMMON
   3781             0x0620,   // 0620..063F; ARABIC
   3782             0x0640,   // 0640..0640; COMMON
   3783             0x0641,   // 0641..064A; ARABIC
   3784             0x064B,   // 064B..0655; INHERITED
   3785             0x0656,   // 0656..065F; ARABIC
   3786             0x0660,   // 0660..0669; COMMON
   3787             0x066A,   // 066A..066F; ARABIC
   3788             0x0670,   // 0670..0670; INHERITED
   3789             0x0671,   // 0671..06DC; ARABIC
   3790             0x06DD,   // 06DD..06DD; COMMON
   3791             0x06DE,   // 06DE..06FF; ARABIC
   3792             0x0700,   // 0700..074F; SYRIAC
   3793             0x0750,   // 0750..077F; ARABIC
   3794             0x0780,   // 0780..07BF; THAANA
   3795             0x07C0,   // 07C0..07FF; NKO
   3796             0x0800,   // 0800..083F; SAMARITAN
   3797             0x0840,   // 0840..089F; MANDAIC
   3798             0x08A0,   // 08A0..08FF; ARABIC
   3799             0x0900,   // 0900..0950; DEVANAGARI
   3800             0x0951,   // 0951..0952; INHERITED
   3801             0x0953,   // 0953..0963; DEVANAGARI
   3802             0x0964,   // 0964..0965; COMMON
   3803             0x0966,   // 0966..0980; DEVANAGARI
   3804             0x0981,   // 0981..0A00; BENGALI
   3805             0x0A01,   // 0A01..0A80; GURMUKHI
   3806             0x0A81,   // 0A81..0B00; GUJARATI
   3807             0x0B01,   // 0B01..0B81; ORIYA
   3808             0x0B82,   // 0B82..0C00; TAMIL
   3809             0x0C01,   // 0C01..0C81; TELUGU
   3810             0x0C82,   // 0C82..0CF0; KANNADA
   3811             0x0D02,   // 0D02..0D81; MALAYALAM
   3812             0x0D82,   // 0D82..0E00; SINHALA
   3813             0x0E01,   // 0E01..0E3E; THAI
   3814             0x0E3F,   // 0E3F..0E3F; COMMON
   3815             0x0E40,   // 0E40..0E80; THAI
   3816             0x0E81,   // 0E81..0EFF; LAO
   3817             0x0F00,   // 0F00..0FD4; TIBETAN
   3818             0x0FD5,   // 0FD5..0FD8; COMMON
   3819             0x0FD9,   // 0FD9..0FFF; TIBETAN
   3820             0x1000,   // 1000..109F; MYANMAR
   3821             0x10A0,   // 10A0..10FA; GEORGIAN
   3822             0x10FB,   // 10FB..10FB; COMMON
   3823             0x10FC,   // 10FC..10FF; GEORGIAN
   3824             0x1100,   // 1100..11FF; HANGUL
   3825             0x1200,   // 1200..139F; ETHIOPIC
   3826             0x13A0,   // 13A0..13FF; CHEROKEE
   3827             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
   3828             0x1680,   // 1680..169F; OGHAM
   3829             0x16A0,   // 16A0..16EA; RUNIC
   3830             0x16EB,   // 16EB..16ED; COMMON
   3831             0x16EE,   // 16EE..16FF; RUNIC
   3832             0x1700,   // 1700..171F; TAGALOG
   3833             0x1720,   // 1720..1734; HANUNOO
   3834             0x1735,   // 1735..173F; COMMON
   3835             0x1740,   // 1740..175F; BUHID
   3836             0x1760,   // 1760..177F; TAGBANWA
   3837             0x1780,   // 1780..17FF; KHMER
   3838             0x1800,   // 1800..1801; MONGOLIAN
   3839             0x1802,   // 1802..1803; COMMON
   3840             0x1804,   // 1804..1804; MONGOLIAN
   3841             0x1805,   // 1805..1805; COMMON
   3842             0x1806,   // 1806..18AF; MONGOLIAN
   3843             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
   3844             0x1900,   // 1900..194F; LIMBU
   3845             0x1950,   // 1950..197F; TAI_LE
   3846             0x1980,   // 1980..19DF; NEW_TAI_LUE
   3847             0x19E0,   // 19E0..19FF; KHMER
   3848             0x1A00,   // 1A00..1A1F; BUGINESE
   3849             0x1A20,   // 1A20..1AFF; TAI_THAM
   3850             0x1B00,   // 1B00..1B7F; BALINESE
   3851             0x1B80,   // 1B80..1BBF; SUNDANESE
   3852             0x1BC0,   // 1BC0..1BFF; BATAK
   3853             0x1C00,   // 1C00..1C4F; LEPCHA
   3854             0x1C50,   // 1C50..1CBF; OL_CHIKI
   3855             0x1CC0,   // 1CC0..1CCF; SUNDANESE
   3856             0x1CD0,   // 1CD0..1CD2; INHERITED
   3857             0x1CD3,   // 1CD3..1CD3; COMMON
   3858             0x1CD4,   // 1CD4..1CE0; INHERITED
   3859             0x1CE1,   // 1CE1..1CE1; COMMON
   3860             0x1CE2,   // 1CE2..1CE8; INHERITED
   3861             0x1CE9,   // 1CE9..1CEC; COMMON
   3862             0x1CED,   // 1CED..1CED; INHERITED
   3863             0x1CEE,   // 1CEE..1CF3; COMMON
   3864             0x1CF4,   // 1CF4..1CF4; INHERITED
   3865             0x1CF5,   // 1CF5..1CFF; COMMON
   3866             0x1D00,   // 1D00..1D25; LATIN
   3867             0x1D26,   // 1D26..1D2A; GREEK
   3868             0x1D2B,   // 1D2B..1D2B; CYRILLIC
   3869             0x1D2C,   // 1D2C..1D5C; LATIN
   3870             0x1D5D,   // 1D5D..1D61; GREEK
   3871             0x1D62,   // 1D62..1D65; LATIN
   3872             0x1D66,   // 1D66..1D6A; GREEK
   3873             0x1D6B,   // 1D6B..1D77; LATIN
   3874             0x1D78,   // 1D78..1D78; CYRILLIC
   3875             0x1D79,   // 1D79..1DBE; LATIN
   3876             0x1DBF,   // 1DBF..1DBF; GREEK
   3877             0x1DC0,   // 1DC0..1DFF; INHERITED
   3878             0x1E00,   // 1E00..1EFF; LATIN
   3879             0x1F00,   // 1F00..1FFF; GREEK
   3880             0x2000,   // 2000..200B; COMMON
   3881             0x200C,   // 200C..200D; INHERITED
   3882             0x200E,   // 200E..2070; COMMON
   3883             0x2071,   // 2071..2073; LATIN
   3884             0x2074,   // 2074..207E; COMMON
   3885             0x207F,   // 207F..207F; LATIN
   3886             0x2080,   // 2080..208F; COMMON
   3887             0x2090,   // 2090..209F; LATIN
   3888             0x20A0,   // 20A0..20CF; COMMON
   3889             0x20D0,   // 20D0..20FF; INHERITED
   3890             0x2100,   // 2100..2125; COMMON
   3891             0x2126,   // 2126..2126; GREEK
   3892             0x2127,   // 2127..2129; COMMON
   3893             0x212A,   // 212A..212B; LATIN
   3894             0x212C,   // 212C..2131; COMMON
   3895             0x2132,   // 2132..2132; LATIN
   3896             0x2133,   // 2133..214D; COMMON
   3897             0x214E,   // 214E..214E; LATIN
   3898             0x214F,   // 214F..215F; COMMON
   3899             0x2160,   // 2160..2188; LATIN
   3900             0x2189,   // 2189..27FF; COMMON
   3901             0x2800,   // 2800..28FF; BRAILLE
   3902             0x2900,   // 2900..2BFF; COMMON
   3903             0x2C00,   // 2C00..2C5F; GLAGOLITIC
   3904             0x2C60,   // 2C60..2C7F; LATIN
   3905             0x2C80,   // 2C80..2CFF; COPTIC
   3906             0x2D00,   // 2D00..2D2F; GEORGIAN
   3907             0x2D30,   // 2D30..2D7F; TIFINAGH
   3908             0x2D80,   // 2D80..2DDF; ETHIOPIC
   3909             0x2DE0,   // 2DE0..2DFF; CYRILLIC
   3910             0x2E00,   // 2E00..2E7F; COMMON
   3911             0x2E80,   // 2E80..2FEF; HAN
   3912             0x2FF0,   // 2FF0..3004; COMMON
   3913             0x3005,   // 3005..3005; HAN
   3914             0x3006,   // 3006..3006; COMMON
   3915             0x3007,   // 3007..3007; HAN
   3916             0x3008,   // 3008..3020; COMMON
   3917             0x3021,   // 3021..3029; HAN
   3918             0x302A,   // 302A..302D; INHERITED
   3919             0x302E,   // 302E..302F; HANGUL
   3920             0x3030,   // 3030..3037; COMMON
   3921             0x3038,   // 3038..303B; HAN
   3922             0x303C,   // 303C..3040; COMMON
   3923             0x3041,   // 3041..3098; HIRAGANA
   3924             0x3099,   // 3099..309A; INHERITED
   3925             0x309B,   // 309B..309C; COMMON
   3926             0x309D,   // 309D..309F; HIRAGANA
   3927             0x30A0,   // 30A0..30A0; COMMON
   3928             0x30A1,   // 30A1..30FA; KATAKANA
   3929             0x30FB,   // 30FB..30FC; COMMON
   3930             0x30FD,   // 30FD..3104; KATAKANA
   3931             0x3105,   // 3105..3130; BOPOMOFO
   3932             0x3131,   // 3131..318F; HANGUL
   3933             0x3190,   // 3190..319F; COMMON
   3934             0x31A0,   // 31A0..31BF; BOPOMOFO
   3935             0x31C0,   // 31C0..31EF; COMMON
   3936             0x31F0,   // 31F0..31FF; KATAKANA
   3937             0x3200,   // 3200..321F; HANGUL
   3938             0x3220,   // 3220..325F; COMMON
   3939             0x3260,   // 3260..327E; HANGUL
   3940             0x327F,   // 327F..32CF; COMMON
   3941             0x32D0,   // 32D0..3357; KATAKANA
   3942             0x3358,   // 3358..33FF; COMMON
   3943             0x3400,   // 3400..4DBF; HAN
   3944             0x4DC0,   // 4DC0..4DFF; COMMON
   3945             0x4E00,   // 4E00..9FFF; HAN
   3946             0xA000,   // A000..A4CF; YI
   3947             0xA4D0,   // A4D0..A4FF; LISU
   3948             0xA500,   // A500..A63F; VAI
   3949             0xA640,   // A640..A69F; CYRILLIC
   3950             0xA6A0,   // A6A0..A6FF; BAMUM
   3951             0xA700,   // A700..A721; COMMON
   3952             0xA722,   // A722..A787; LATIN
   3953             0xA788,   // A788..A78A; COMMON
   3954             0xA78B,   // A78B..A7FF; LATIN
   3955             0xA800,   // A800..A82F; SYLOTI_NAGRI
   3956             0xA830,   // A830..A83F; COMMON
   3957             0xA840,   // A840..A87F; PHAGS_PA
   3958             0xA880,   // A880..A8DF; SAURASHTRA
   3959             0xA8E0,   // A8E0..A8FF; DEVANAGARI
   3960             0xA900,   // A900..A92F; KAYAH_LI
   3961             0xA930,   // A930..A95F; REJANG
   3962             0xA960,   // A960..A97F; HANGUL
   3963             0xA980,   // A980..A9FF; JAVANESE
   3964             0xAA00,   // AA00..AA5F; CHAM
   3965             0xAA60,   // AA60..AA7F; MYANMAR
   3966             0xAA80,   // AA80..AADF; TAI_VIET
   3967             0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
   3968             0xAB01,   // AB01..ABBF; ETHIOPIC
   3969             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
   3970             0xAC00,   // AC00..D7FB; HANGUL
   3971             0xD7FC,   // D7FC..F8FF; UNKNOWN
   3972             0xF900,   // F900..FAFF; HAN
   3973             0xFB00,   // FB00..FB12; LATIN
   3974             0xFB13,   // FB13..FB1C; ARMENIAN
   3975             0xFB1D,   // FB1D..FB4F; HEBREW
   3976             0xFB50,   // FB50..FD3D; ARABIC
   3977             0xFD3E,   // FD3E..FD4F; COMMON
   3978             0xFD50,   // FD50..FDFC; ARABIC
   3979             0xFDFD,   // FDFD..FDFF; COMMON
   3980             0xFE00,   // FE00..FE0F; INHERITED
   3981             0xFE10,   // FE10..FE1F; COMMON
   3982             0xFE20,   // FE20..FE2F; INHERITED
   3983             0xFE30,   // FE30..FE6F; COMMON
   3984             0xFE70,   // FE70..FEFE; ARABIC
   3985             0xFEFF,   // FEFF..FF20; COMMON
   3986             0xFF21,   // FF21..FF3A; LATIN
   3987             0xFF3B,   // FF3B..FF40; COMMON
   3988             0xFF41,   // FF41..FF5A; LATIN
   3989             0xFF5B,   // FF5B..FF65; COMMON
   3990             0xFF66,   // FF66..FF6F; KATAKANA
   3991             0xFF70,   // FF70..FF70; COMMON
   3992             0xFF71,   // FF71..FF9D; KATAKANA
   3993             0xFF9E,   // FF9E..FF9F; COMMON
   3994             0xFFA0,   // FFA0..FFDF; HANGUL
   3995             0xFFE0,   // FFE0..FFFF; COMMON
   3996             0x10000,  // 10000..100FF; LINEAR_B
   3997             0x10100,  // 10100..1013F; COMMON
   3998             0x10140,  // 10140..1018F; GREEK
   3999             0x10190,  // 10190..101FC; COMMON
   4000             0x101FD,  // 101FD..1027F; INHERITED
   4001             0x10280,  // 10280..1029F; LYCIAN
   4002             0x102A0,  // 102A0..102FF; CARIAN
   4003             0x10300,  // 10300..1032F; OLD_ITALIC
   4004             0x10330,  // 10330..1037F; GOTHIC
   4005             0x10380,  // 10380..1039F; UGARITIC
   4006             0x103A0,  // 103A0..103FF; OLD_PERSIAN
   4007             0x10400,  // 10400..1044F; DESERET
   4008             0x10450,  // 10450..1047F; SHAVIAN
   4009             0x10480,  // 10480..107FF; OSMANYA
   4010             0x10800,  // 10800..1083F; CYPRIOT
   4011             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
   4012             0x10900,  // 10900..1091F; PHOENICIAN
   4013             0x10920,  // 10920..1097F; LYDIAN
   4014             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
   4015             0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
   4016             0x10A00,  // 10A00..10A5F; KHAROSHTHI
   4017             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
   4018             0x10B00,  // 10B00..10B3F; AVESTAN
   4019             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
   4020             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
   4021             0x10C00,  // 10C00..10E5F; OLD_TURKIC
   4022             0x10E60,  // 10E60..10FFF; ARABIC
   4023             0x11000,  // 11000..1107F; BRAHMI
   4024             0x11080,  // 11080..110CF; KAITHI
   4025             0x110D0,  // 110D0..110FF; SORA_SOMPENG
   4026             0x11100,  // 11100..1117F; CHAKMA
   4027             0x11180,  // 11180..1167F; SHARADA
   4028             0x11680,  // 11680..116CF; TAKRI
   4029             0x12000,  // 12000..12FFF; CUNEIFORM
   4030             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
   4031             0x16800,  // 16800..16A38; BAMUM
   4032             0x16F00,  // 16F00..16F9F; MIAO
   4033             0x1B000,  // 1B000..1B000; KATAKANA
   4034             0x1B001,  // 1B001..1CFFF; HIRAGANA
   4035             0x1D000,  // 1D000..1D166; COMMON
   4036             0x1D167,  // 1D167..1D169; INHERITED
   4037             0x1D16A,  // 1D16A..1D17A; COMMON
   4038             0x1D17B,  // 1D17B..1D182; INHERITED
   4039             0x1D183,  // 1D183..1D184; COMMON
   4040             0x1D185,  // 1D185..1D18B; INHERITED
   4041             0x1D18C,  // 1D18C..1D1A9; COMMON
   4042             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
   4043             0x1D1AE,  // 1D1AE..1D1FF; COMMON
   4044             0x1D200,  // 1D200..1D2FF; GREEK
   4045             0x1D300,  // 1D300..1EDFF; COMMON
   4046             0x1EE00,  // 1EE00..1EFFF; ARABIC
   4047             0x1F000,  // 1F000..1F1FF; COMMON
   4048             0x1F200,  // 1F200..1F200; HIRAGANA
   4049             0x1F201,  // 1F210..1FFFF; COMMON
   4050             0x20000,  // 20000..E0000; HAN
   4051             0xE0001,  // E0001..E00FF; COMMON
   4052             0xE0100,  // E0100..E01EF; INHERITED
   4053             0xE01F0   // E01F0..10FFFF; UNKNOWN
   4054 
   4055         };
   4056 
   4057         private static final UnicodeScript[] scripts = {
   4058             COMMON,
   4059             LATIN,
   4060             COMMON,
   4061             LATIN,
   4062             COMMON,
   4063             LATIN,
   4064             COMMON,
   4065             LATIN,
   4066             COMMON,
   4067             LATIN,
   4068             COMMON,
   4069             LATIN,
   4070             COMMON,
   4071             LATIN,
   4072             COMMON,
   4073             LATIN,
   4074             COMMON,
   4075             BOPOMOFO,
   4076             COMMON,
   4077             INHERITED,
   4078             GREEK,
   4079             COMMON,
   4080             GREEK,
   4081             COMMON,
   4082             GREEK,
   4083             COMMON,
   4084             GREEK,
   4085             COMMON,
   4086             GREEK,
   4087             COPTIC,
   4088             GREEK,
   4089             CYRILLIC,
   4090             INHERITED,
   4091             CYRILLIC,
   4092             ARMENIAN,
   4093             COMMON,
   4094             ARMENIAN,
   4095             HEBREW,
   4096             ARABIC,
   4097             COMMON,
   4098             ARABIC,
   4099             COMMON,
   4100             ARABIC,
   4101             COMMON,
   4102             ARABIC,
   4103             COMMON,
   4104             ARABIC,
   4105             INHERITED,
   4106             ARABIC,
   4107             COMMON,
   4108             ARABIC,
   4109             INHERITED,
   4110             ARABIC,
   4111             COMMON,
   4112             ARABIC,
   4113             SYRIAC,
   4114             ARABIC,
   4115             THAANA,
   4116             NKO,
   4117             SAMARITAN,
   4118             MANDAIC,
   4119             ARABIC,
   4120             DEVANAGARI,
   4121             INHERITED,
   4122             DEVANAGARI,
   4123             COMMON,
   4124             DEVANAGARI,
   4125             BENGALI,
   4126             GURMUKHI,
   4127             GUJARATI,
   4128             ORIYA,
   4129             TAMIL,
   4130             TELUGU,
   4131             KANNADA,
   4132             MALAYALAM,
   4133             SINHALA,
   4134             THAI,
   4135             COMMON,
   4136             THAI,
   4137             LAO,
   4138             TIBETAN,
   4139             COMMON,
   4140             TIBETAN,
   4141             MYANMAR,
   4142             GEORGIAN,
   4143             COMMON,
   4144             GEORGIAN,
   4145             HANGUL,
   4146             ETHIOPIC,
   4147             CHEROKEE,
   4148             CANADIAN_ABORIGINAL,
   4149             OGHAM,
   4150             RUNIC,
   4151             COMMON,
   4152             RUNIC,
   4153             TAGALOG,
   4154             HANUNOO,
   4155             COMMON,
   4156             BUHID,
   4157             TAGBANWA,
   4158             KHMER,
   4159             MONGOLIAN,
   4160             COMMON,
   4161             MONGOLIAN,
   4162             COMMON,
   4163             MONGOLIAN,
   4164             CANADIAN_ABORIGINAL,
   4165             LIMBU,
   4166             TAI_LE,
   4167             NEW_TAI_LUE,
   4168             KHMER,
   4169             BUGINESE,
   4170             TAI_THAM,
   4171             BALINESE,
   4172             SUNDANESE,
   4173             BATAK,
   4174             LEPCHA,
   4175             OL_CHIKI,
   4176             SUNDANESE,
   4177             INHERITED,
   4178             COMMON,
   4179             INHERITED,
   4180             COMMON,
   4181             INHERITED,
   4182             COMMON,
   4183             INHERITED,
   4184             COMMON,
   4185             INHERITED,
   4186             COMMON,
   4187             LATIN,
   4188             GREEK,
   4189             CYRILLIC,
   4190             LATIN,
   4191             GREEK,
   4192             LATIN,
   4193             GREEK,
   4194             LATIN,
   4195             CYRILLIC,
   4196             LATIN,
   4197             GREEK,
   4198             INHERITED,
   4199             LATIN,
   4200             GREEK,
   4201             COMMON,
   4202             INHERITED,
   4203             COMMON,
   4204             LATIN,
   4205             COMMON,
   4206             LATIN,
   4207             COMMON,
   4208             LATIN,
   4209             COMMON,
   4210             INHERITED,
   4211             COMMON,
   4212             GREEK,
   4213             COMMON,
   4214             LATIN,
   4215             COMMON,
   4216             LATIN,
   4217             COMMON,
   4218             LATIN,
   4219             COMMON,
   4220             LATIN,
   4221             COMMON,
   4222             BRAILLE,
   4223             COMMON,
   4224             GLAGOLITIC,
   4225             LATIN,
   4226             COPTIC,
   4227             GEORGIAN,
   4228             TIFINAGH,
   4229             ETHIOPIC,
   4230             CYRILLIC,
   4231             COMMON,
   4232             HAN,
   4233             COMMON,
   4234             HAN,
   4235             COMMON,
   4236             HAN,
   4237             COMMON,
   4238             HAN,
   4239             INHERITED,
   4240             HANGUL,
   4241             COMMON,
   4242             HAN,
   4243             COMMON,
   4244             HIRAGANA,
   4245             INHERITED,
   4246             COMMON,
   4247             HIRAGANA,
   4248             COMMON,
   4249             KATAKANA,
   4250             COMMON,
   4251             KATAKANA,
   4252             BOPOMOFO,
   4253             HANGUL,
   4254             COMMON,
   4255             BOPOMOFO,
   4256             COMMON,
   4257             KATAKANA,
   4258             HANGUL,
   4259             COMMON,
   4260             HANGUL,
   4261             COMMON,
   4262             KATAKANA,
   4263             COMMON,
   4264             HAN,
   4265             COMMON,
   4266             HAN,
   4267             YI,
   4268             LISU,
   4269             VAI,
   4270             CYRILLIC,
   4271             BAMUM,
   4272             COMMON,
   4273             LATIN,
   4274             COMMON,
   4275             LATIN,
   4276             SYLOTI_NAGRI,
   4277             COMMON,
   4278             PHAGS_PA,
   4279             SAURASHTRA,
   4280             DEVANAGARI,
   4281             KAYAH_LI,
   4282             REJANG,
   4283             HANGUL,
   4284             JAVANESE,
   4285             CHAM,
   4286             MYANMAR,
   4287             TAI_VIET,
   4288             MEETEI_MAYEK,
   4289             ETHIOPIC,
   4290             MEETEI_MAYEK,
   4291             HANGUL,
   4292             UNKNOWN     ,
   4293             HAN,
   4294             LATIN,
   4295             ARMENIAN,
   4296             HEBREW,
   4297             ARABIC,
   4298             COMMON,
   4299             ARABIC,
   4300             COMMON,
   4301             INHERITED,
   4302             COMMON,
   4303             INHERITED,
   4304             COMMON,
   4305             ARABIC,
   4306             COMMON,
   4307             LATIN,
   4308             COMMON,
   4309             LATIN,
   4310             COMMON,
   4311             KATAKANA,
   4312             COMMON,
   4313             KATAKANA,
   4314             COMMON,
   4315             HANGUL,
   4316             COMMON,
   4317             LINEAR_B,
   4318             COMMON,
   4319             GREEK,
   4320             COMMON,
   4321             INHERITED,
   4322             LYCIAN,
   4323             CARIAN,
   4324             OLD_ITALIC,
   4325             GOTHIC,
   4326             UGARITIC,
   4327             OLD_PERSIAN,
   4328             DESERET,
   4329             SHAVIAN,
   4330             OSMANYA,
   4331             CYPRIOT,
   4332             IMPERIAL_ARAMAIC,
   4333             PHOENICIAN,
   4334             LYDIAN,
   4335             MEROITIC_HIEROGLYPHS,
   4336             MEROITIC_CURSIVE,
   4337             KHAROSHTHI,
   4338             OLD_SOUTH_ARABIAN,
   4339             AVESTAN,
   4340             INSCRIPTIONAL_PARTHIAN,
   4341             INSCRIPTIONAL_PAHLAVI,
   4342             OLD_TURKIC,
   4343             ARABIC,
   4344             BRAHMI,
   4345             KAITHI,
   4346             SORA_SOMPENG,
   4347             CHAKMA,
   4348             SHARADA,
   4349             TAKRI,
   4350             CUNEIFORM,
   4351             EGYPTIAN_HIEROGLYPHS,
   4352             BAMUM,
   4353             MIAO,
   4354             KATAKANA,
   4355             HIRAGANA,
   4356             COMMON,
   4357             INHERITED,
   4358             COMMON,
   4359             INHERITED,
   4360             COMMON,
   4361             INHERITED,
   4362             COMMON,
   4363             INHERITED,
   4364             COMMON,
   4365             GREEK,
   4366             COMMON,
   4367             ARABIC,
   4368             COMMON,
   4369             HIRAGANA,
   4370             COMMON,
   4371             HAN,
   4372             COMMON,
   4373             INHERITED,
   4374             UNKNOWN
   4375         };
   4376 
   4377         private static HashMap<String, Character.UnicodeScript> aliases;
   4378         static {
   4379             aliases = new HashMap<>(128);
   4380             aliases.put("ARAB", ARABIC);
   4381             aliases.put("ARMI", IMPERIAL_ARAMAIC);
   4382             aliases.put("ARMN", ARMENIAN);
   4383             aliases.put("AVST", AVESTAN);
   4384             aliases.put("BALI", BALINESE);
   4385             aliases.put("BAMU", BAMUM);
   4386             aliases.put("BATK", BATAK);
   4387             aliases.put("BENG", BENGALI);
   4388             aliases.put("BOPO", BOPOMOFO);
   4389             aliases.put("BRAI", BRAILLE);
   4390             aliases.put("BRAH", BRAHMI);
   4391             aliases.put("BUGI", BUGINESE);
   4392             aliases.put("BUHD", BUHID);
   4393             aliases.put("CAKM", CHAKMA);
   4394             aliases.put("CANS", CANADIAN_ABORIGINAL);
   4395             aliases.put("CARI", CARIAN);
   4396             aliases.put("CHAM", CHAM);
   4397             aliases.put("CHER", CHEROKEE);
   4398             aliases.put("COPT", COPTIC);
   4399             aliases.put("CPRT", CYPRIOT);
   4400             aliases.put("CYRL", CYRILLIC);
   4401             aliases.put("DEVA", DEVANAGARI);
   4402             aliases.put("DSRT", DESERET);
   4403             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
   4404             aliases.put("ETHI", ETHIOPIC);
   4405             aliases.put("GEOR", GEORGIAN);
   4406             aliases.put("GLAG", GLAGOLITIC);
   4407             aliases.put("GOTH", GOTHIC);
   4408             aliases.put("GREK", GREEK);
   4409             aliases.put("GUJR", GUJARATI);
   4410             aliases.put("GURU", GURMUKHI);
   4411             aliases.put("HANG", HANGUL);
   4412             aliases.put("HANI", HAN);
   4413             aliases.put("HANO", HANUNOO);
   4414             aliases.put("HEBR", HEBREW);
   4415             aliases.put("HIRA", HIRAGANA);
   4416             // it appears we don't have the KATAKANA_OR_HIRAGANA
   4417             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
   4418             aliases.put("ITAL", OLD_ITALIC);
   4419             aliases.put("JAVA", JAVANESE);
   4420             aliases.put("KALI", KAYAH_LI);
   4421             aliases.put("KANA", KATAKANA);
   4422             aliases.put("KHAR", KHAROSHTHI);
   4423             aliases.put("KHMR", KHMER);
   4424             aliases.put("KNDA", KANNADA);
   4425             aliases.put("KTHI", KAITHI);
   4426             aliases.put("LANA", TAI_THAM);
   4427             aliases.put("LAOO", LAO);
   4428             aliases.put("LATN", LATIN);
   4429             aliases.put("LEPC", LEPCHA);
   4430             aliases.put("LIMB", LIMBU);
   4431             aliases.put("LINB", LINEAR_B);
   4432             aliases.put("LISU", LISU);
   4433             aliases.put("LYCI", LYCIAN);
   4434             aliases.put("LYDI", LYDIAN);
   4435             aliases.put("MAND", MANDAIC);
   4436             aliases.put("MERC", MEROITIC_CURSIVE);
   4437             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
   4438             aliases.put("MLYM", MALAYALAM);
   4439             aliases.put("MONG", MONGOLIAN);
   4440             aliases.put("MTEI", MEETEI_MAYEK);
   4441             aliases.put("MYMR", MYANMAR);
   4442             aliases.put("NKOO", NKO);
   4443             aliases.put("OGAM", OGHAM);
   4444             aliases.put("OLCK", OL_CHIKI);
   4445             aliases.put("ORKH", OLD_TURKIC);
   4446             aliases.put("ORYA", ORIYA);
   4447             aliases.put("OSMA", OSMANYA);
   4448             aliases.put("PHAG", PHAGS_PA);
   4449             aliases.put("PLRD", MIAO);
   4450             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
   4451             aliases.put("PHNX", PHOENICIAN);
   4452             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
   4453             aliases.put("RJNG", REJANG);
   4454             aliases.put("RUNR", RUNIC);
   4455             aliases.put("SAMR", SAMARITAN);
   4456             aliases.put("SARB", OLD_SOUTH_ARABIAN);
   4457             aliases.put("SAUR", SAURASHTRA);
   4458             aliases.put("SHAW", SHAVIAN);
   4459             aliases.put("SHRD", SHARADA);
   4460             aliases.put("SINH", SINHALA);
   4461             aliases.put("SORA", SORA_SOMPENG);
   4462             aliases.put("SUND", SUNDANESE);
   4463             aliases.put("SYLO", SYLOTI_NAGRI);
   4464             aliases.put("SYRC", SYRIAC);
   4465             aliases.put("TAGB", TAGBANWA);
   4466             aliases.put("TALE", TAI_LE);
   4467             aliases.put("TAKR", TAKRI);
   4468             aliases.put("TALU", NEW_TAI_LUE);
   4469             aliases.put("TAML", TAMIL);
   4470             aliases.put("TAVT", TAI_VIET);
   4471             aliases.put("TELU", TELUGU);
   4472             aliases.put("TFNG", TIFINAGH);
   4473             aliases.put("TGLG", TAGALOG);
   4474             aliases.put("THAA", THAANA);
   4475             aliases.put("THAI", THAI);
   4476             aliases.put("TIBT", TIBETAN);
   4477             aliases.put("UGAR", UGARITIC);
   4478             aliases.put("VAII", VAI);
   4479             aliases.put("XPEO", OLD_PERSIAN);
   4480             aliases.put("XSUX", CUNEIFORM);
   4481             aliases.put("YIII", YI);
   4482             aliases.put("ZINH", INHERITED);
   4483             aliases.put("ZYYY", COMMON);
   4484             aliases.put("ZZZZ", UNKNOWN);
   4485         }
   4486 
   4487         /**
   4488          * Returns the enum constant representing the Unicode script of which
   4489          * the given character (Unicode code point) is assigned to.
   4490          *
   4491          * @param   codePoint the character (Unicode code point) in question.
   4492          * @return  The {@code UnicodeScript} constant representing the
   4493          *          Unicode script of which this character is assigned to.
   4494          *
   4495          * @exception IllegalArgumentException if the specified
   4496          * {@code codePoint} is an invalid Unicode code point.
   4497          * @see Character#isValidCodePoint(int)
   4498          *
   4499          */
   4500         public static UnicodeScript of(int codePoint) {
   4501             if (!isValidCodePoint(codePoint))
   4502                 throw new IllegalArgumentException();
   4503             int type = getType(codePoint);
   4504             // leave SURROGATE and PRIVATE_USE for table lookup
   4505             if (type == UNASSIGNED)
   4506                 return UNKNOWN;
   4507             int index = Arrays.binarySearch(scriptStarts, codePoint);
   4508             if (index < 0)
   4509                 index = -index - 2;
   4510             return scripts[index];
   4511         }
   4512 
   4513         /**
   4514          * Returns the UnicodeScript constant with the given Unicode script
   4515          * name or the script name alias. Script names and their aliases are
   4516          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
   4517          * and PropertyValueAliases&lt;version&gt;.txt define script names
   4518          * and the script name aliases for a particular version of the
   4519          * standard. The {@link Character} class specifies the version of
   4520          * the standard that it supports.
   4521          * <p>
   4522          * Character case is ignored for all of the valid script names.
   4523          * The en_US locale's case mapping rules are used to provide
   4524          * case-insensitive string comparisons for script name validation.
   4525          * <p>
   4526          *
   4527          * @param scriptName A {@code UnicodeScript} name.
   4528          * @return The {@code UnicodeScript} constant identified
   4529          *         by {@code scriptName}
   4530          * @throws IllegalArgumentException if {@code scriptName} is an
   4531          *         invalid name
   4532          * @throws NullPointerException if {@code scriptName} is null
   4533          */
   4534         public static final UnicodeScript forName(String scriptName) {
   4535             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
   4536                                  //.replace(' ', '_'));
   4537             UnicodeScript sc = aliases.get(scriptName);
   4538             if (sc != null)
   4539                 return sc;
   4540             return valueOf(scriptName);
   4541         }
   4542     }
   4543 
   4544     /**
   4545      * The value of the {@code Character}.
   4546      *
   4547      * @serial
   4548      */
   4549     private final char value;
   4550 
   4551     /** use serialVersionUID from JDK 1.0.2 for interoperability */
   4552     private static final long serialVersionUID = 3786198910865385080L;
   4553 
   4554     /**
   4555      * Constructs a newly allocated {@code Character} object that
   4556      * represents the specified {@code char} value.
   4557      *
   4558      * @param  value   the value to be represented by the
   4559      *                  {@code Character} object.
   4560      */
   4561     public Character(char value) {
   4562         this.value = value;
   4563     }
   4564 
   4565     private static class CharacterCache {
   4566         private CharacterCache(){}
   4567 
   4568         static final Character cache[] = new Character[127 + 1];
   4569 
   4570         static {
   4571             for (int i = 0; i < cache.length; i++)
   4572                 cache[i] = new Character((char)i);
   4573         }
   4574     }
   4575 
   4576     /**
   4577      * Returns a <tt>Character</tt> instance representing the specified
   4578      * <tt>char</tt> value.
   4579      * If a new <tt>Character</tt> instance is not required, this method
   4580      * should generally be used in preference to the constructor
   4581      * {@link #Character(char)}, as this method is likely to yield
   4582      * significantly better space and time performance by caching
   4583      * frequently requested values.
   4584      *
   4585      * This method will always cache values in the range {@code
   4586      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
   4587      * cache other values outside of this range.
   4588      *
   4589      * @param  c a char value.
   4590      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
   4591      * @since  1.5
   4592      */
   4593     public static Character valueOf(char c) {
   4594         if (c <= 127) { // must cache
   4595             return CharacterCache.cache[(int)c];
   4596         }
   4597         return new Character(c);
   4598     }
   4599 
   4600     /**
   4601      * Returns the value of this {@code Character} object.
   4602      * @return  the primitive {@code char} value represented by
   4603      *          this object.
   4604      */
   4605     public char charValue() {
   4606         return value;
   4607     }
   4608 
   4609     /**
   4610      * Returns a hash code for this {@code Character}; equal to the result
   4611      * of invoking {@code charValue()}.
   4612      *
   4613      * @return a hash code value for this {@code Character}
   4614      */
   4615     @Override
   4616     public int hashCode() {
   4617         return Character.hashCode(value);
   4618     }
   4619 
   4620     /**
   4621      * Returns a hash code for a {@code char} value; compatible with
   4622      * {@code Character.hashCode()}.
   4623      *
   4624      * @since 1.8
   4625      *
   4626      * @param value The {@code char} for which to return a hash code.
   4627      * @return a hash code value for a {@code char} value.
   4628      */
   4629     public static int hashCode(char value) {
   4630         return (int)value;
   4631     }
   4632 
   4633     /**
   4634      * Compares this object against the specified object.
   4635      * The result is {@code true} if and only if the argument is not
   4636      * {@code null} and is a {@code Character} object that
   4637      * represents the same {@code char} value as this object.
   4638      *
   4639      * @param   obj   the object to compare with.
   4640      * @return  {@code true} if the objects are the same;
   4641      *          {@code false} otherwise.
   4642      */
   4643     public boolean equals(Object obj) {
   4644         if (obj instanceof Character) {
   4645             return value == ((Character)obj).charValue();
   4646         }
   4647         return false;
   4648     }
   4649 
   4650     /**
   4651      * Returns a {@code String} object representing this
   4652      * {@code Character}'s value.  The result is a string of
   4653      * length 1 whose sole component is the primitive
   4654      * {@code char} value represented by this
   4655      * {@code Character} object.
   4656      *
   4657      * @return  a string representation of this object.
   4658      */
   4659     public String toString() {
   4660         char buf[] = {value};
   4661         return String.valueOf(buf);
   4662     }
   4663 
   4664     /**
   4665      * Returns a {@code String} object representing the
   4666      * specified {@code char}.  The result is a string of length
   4667      * 1 consisting solely of the specified {@code char}.
   4668      *
   4669      * @param c the {@code char} to be converted
   4670      * @return the string representation of the specified {@code char}
   4671      * @since 1.4
   4672      */
   4673     public static String toString(char c) {
   4674         return String.valueOf(c);
   4675     }
   4676 
   4677     /**
   4678      * Determines whether the specified code point is a valid
   4679      * <a href="http://www.unicode.org/glossary/#code_point">
   4680      * Unicode code point value</a>.
   4681      *
   4682      * @param  codePoint the Unicode code point to be tested
   4683      * @return {@code true} if the specified code point value is between
   4684      *         {@link #MIN_CODE_POINT} and
   4685      *         {@link #MAX_CODE_POINT} inclusive;
   4686      *         {@code false} otherwise.
   4687      * @since  1.5
   4688      */
   4689     public static boolean isValidCodePoint(int codePoint) {
   4690         // Optimized form of:
   4691         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
   4692         int plane = codePoint >>> 16;
   4693         return plane < ((MAX_CODE_POINT + 1) >>> 16);
   4694     }
   4695 
   4696     /**
   4697      * Determines whether the specified character (Unicode code point)
   4698      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
   4699      * Such code points can be represented using a single {@code char}.
   4700      *
   4701      * @param  codePoint the character (Unicode code point) to be tested
   4702      * @return {@code true} if the specified code point is between
   4703      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
   4704      *         {@code false} otherwise.
   4705      * @since  1.7
   4706      */
   4707     public static boolean isBmpCodePoint(int codePoint) {
   4708         return codePoint >>> 16 == 0;
   4709         // Optimized form of:
   4710         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
   4711         // We consistently use logical shift (>>>) to facilitate
   4712         // additional runtime optimizations.
   4713     }
   4714 
   4715     /**
   4716      * Determines whether the specified character (Unicode code point)
   4717      * is in the <a href="#supplementary">supplementary character</a> range.
   4718      *
   4719      * @param  codePoint the character (Unicode code point) to be tested
   4720      * @return {@code true} if the specified code point is between
   4721      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
   4722      *         {@link #MAX_CODE_POINT} inclusive;
   4723      *         {@code false} otherwise.
   4724      * @since  1.5
   4725      */
   4726     public static boolean isSupplementaryCodePoint(int codePoint) {
   4727         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
   4728             && codePoint <  MAX_CODE_POINT + 1;
   4729     }
   4730 
   4731     /**
   4732      * Determines if the given {@code char} value is a
   4733      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   4734      * Unicode high-surrogate code unit</a>
   4735      * (also known as <i>leading-surrogate code unit</i>).
   4736      *
   4737      * <p>Such values do not represent characters by themselves,
   4738      * but are used in the representation of
   4739      * <a href="#supplementary">supplementary characters</a>
   4740      * in the UTF-16 encoding.
   4741      *
   4742      * @param  ch the {@code char} value to be tested.
   4743      * @return {@code true} if the {@code char} value is between
   4744      *         {@link #MIN_HIGH_SURROGATE} and
   4745      *         {@link #MAX_HIGH_SURROGATE} inclusive;
   4746      *         {@code false} otherwise.
   4747      * @see    Character#isLowSurrogate(char)
   4748      * @see    Character.UnicodeBlock#of(int)
   4749      * @since  1.5
   4750      */
   4751     public static boolean isHighSurrogate(char ch) {
   4752         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
   4753         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
   4754     }
   4755 
   4756     /**
   4757      * Determines if the given {@code char} value is a
   4758      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   4759      * Unicode low-surrogate code unit</a>
   4760      * (also known as <i>trailing-surrogate code unit</i>).
   4761      *
   4762      * <p>Such values do not represent characters by themselves,
   4763      * but are used in the representation of
   4764      * <a href="#supplementary">supplementary characters</a>
   4765      * in the UTF-16 encoding.
   4766      *
   4767      * @param  ch the {@code char} value to be tested.
   4768      * @return {@code true} if the {@code char} value is between
   4769      *         {@link #MIN_LOW_SURROGATE} and
   4770      *         {@link #MAX_LOW_SURROGATE} inclusive;
   4771      *         {@code false} otherwise.
   4772      * @see    Character#isHighSurrogate(char)
   4773      * @since  1.5
   4774      */
   4775     public static boolean isLowSurrogate(char ch) {
   4776         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
   4777     }
   4778 
   4779     /**
   4780      * Determines if the given {@code char} value is a Unicode
   4781      * <i>surrogate code unit</i>.
   4782      *
   4783      * <p>Such values do not represent characters by themselves,
   4784      * but are used in the representation of
   4785      * <a href="#supplementary">supplementary characters</a>
   4786      * in the UTF-16 encoding.
   4787      *
   4788      * <p>A char value is a surrogate code unit if and only if it is either
   4789      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
   4790      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
   4791      *
   4792      * @param  ch the {@code char} value to be tested.
   4793      * @return {@code true} if the {@code char} value is between
   4794      *         {@link #MIN_SURROGATE} and
   4795      *         {@link #MAX_SURROGATE} inclusive;
   4796      *         {@code false} otherwise.
   4797      * @since  1.7
   4798      */
   4799     public static boolean isSurrogate(char ch) {
   4800         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
   4801     }
   4802 
   4803     /**
   4804      * Determines whether the specified pair of {@code char}
   4805      * values is a valid
   4806      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   4807      * Unicode surrogate pair</a>.
   4808 
   4809      * <p>This method is equivalent to the expression:
   4810      * <blockquote><pre>{@code
   4811      * isHighSurrogate(high) && isLowSurrogate(low)
   4812      * }</pre></blockquote>
   4813      *
   4814      * @param  high the high-surrogate code value to be tested
   4815      * @param  low the low-surrogate code value to be tested
   4816      * @return {@code true} if the specified high and
   4817      * low-surrogate code values represent a valid surrogate pair;
   4818      * {@code false} otherwise.
   4819      * @since  1.5
   4820      */
   4821     public static boolean isSurrogatePair(char high, char low) {
   4822         return isHighSurrogate(high) && isLowSurrogate(low);
   4823     }
   4824 
   4825     /**
   4826      * Determines the number of {@code char} values needed to
   4827      * represent the specified character (Unicode code point). If the
   4828      * specified character is equal to or greater than 0x10000, then
   4829      * the method returns 2. Otherwise, the method returns 1.
   4830      *
   4831      * <p>This method doesn't validate the specified character to be a
   4832      * valid Unicode code point. The caller must validate the
   4833      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
   4834      * if necessary.
   4835      *
   4836      * @param   codePoint the character (Unicode code point) to be tested.
   4837      * @return  2 if the character is a valid supplementary character; 1 otherwise.
   4838      * @see     Character#isSupplementaryCodePoint(int)
   4839      * @since   1.5
   4840      */
   4841     public static int charCount(int codePoint) {
   4842         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
   4843     }
   4844 
   4845     /**
   4846      * Converts the specified surrogate pair to its supplementary code
   4847      * point value. This method does not validate the specified
   4848      * surrogate pair. The caller must validate it using {@link
   4849      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
   4850      *
   4851      * @param  high the high-surrogate code unit
   4852      * @param  low the low-surrogate code unit
   4853      * @return the supplementary code point composed from the
   4854      *         specified surrogate pair.
   4855      * @since  1.5
   4856      */
   4857     public static int toCodePoint(char high, char low) {
   4858         // Optimized form of:
   4859         // return ((high - MIN_HIGH_SURROGATE) << 10)
   4860         //         + (low - MIN_LOW_SURROGATE)
   4861         //         + MIN_SUPPLEMENTARY_CODE_POINT;
   4862         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
   4863                                        - (MIN_HIGH_SURROGATE << 10)
   4864                                        - MIN_LOW_SURROGATE);
   4865     }
   4866 
   4867     /**
   4868      * Returns the code point at the given index of the
   4869      * {@code CharSequence}. If the {@code char} value at
   4870      * the given index in the {@code CharSequence} is in the
   4871      * high-surrogate range, the following index is less than the
   4872      * length of the {@code CharSequence}, and the
   4873      * {@code char} value at the following index is in the
   4874      * low-surrogate range, then the supplementary code point
   4875      * corresponding to this surrogate pair is returned. Otherwise,
   4876      * the {@code char} value at the given index is returned.
   4877      *
   4878      * @param seq a sequence of {@code char} values (Unicode code
   4879      * units)
   4880      * @param index the index to the {@code char} values (Unicode
   4881      * code units) in {@code seq} to be converted
   4882      * @return the Unicode code point at the given index
   4883      * @exception NullPointerException if {@code seq} is null.
   4884      * @exception IndexOutOfBoundsException if the value
   4885      * {@code index} is negative or not less than
   4886      * {@link CharSequence#length() seq.length()}.
   4887      * @since  1.5
   4888      */
   4889     public static int codePointAt(CharSequence seq, int index) {
   4890         char c1 = seq.charAt(index);
   4891         if (isHighSurrogate(c1) && ++index < seq.length()) {
   4892             char c2 = seq.charAt(index);
   4893             if (isLowSurrogate(c2)) {
   4894                 return toCodePoint(c1, c2);
   4895             }
   4896         }
   4897         return c1;
   4898     }
   4899 
   4900     /**
   4901      * Returns the code point at the given index of the
   4902      * {@code char} array. If the {@code char} value at
   4903      * the given index in the {@code char} array is in the
   4904      * high-surrogate range, the following index is less than the
   4905      * length of the {@code char} array, and the
   4906      * {@code char} value at the following index is in the
   4907      * low-surrogate range, then the supplementary code point
   4908      * corresponding to this surrogate pair is returned. Otherwise,
   4909      * the {@code char} value at the given index is returned.
   4910      *
   4911      * @param a the {@code char} array
   4912      * @param index the index to the {@code char} values (Unicode
   4913      * code units) in the {@code char} array to be converted
   4914      * @return the Unicode code point at the given index
   4915      * @exception NullPointerException if {@code a} is null.
   4916      * @exception IndexOutOfBoundsException if the value
   4917      * {@code index} is negative or not less than
   4918      * the length of the {@code char} array.
   4919      * @since  1.5
   4920      */
   4921     public static int codePointAt(char[] a, int index) {
   4922         return codePointAtImpl(a, index, a.length);
   4923     }
   4924 
   4925     /**
   4926      * Returns the code point at the given index of the
   4927      * {@code char} array, where only array elements with
   4928      * {@code index} less than {@code limit} can be used. If
   4929      * the {@code char} value at the given index in the
   4930      * {@code char} array is in the high-surrogate range, the
   4931      * following index is less than the {@code limit}, and the
   4932      * {@code char} value at the following index is in the
   4933      * low-surrogate range, then the supplementary code point
   4934      * corresponding to this surrogate pair is returned. Otherwise,
   4935      * the {@code char} value at the given index is returned.
   4936      *
   4937      * @param a the {@code char} array
   4938      * @param index the index to the {@code char} values (Unicode
   4939      * code units) in the {@code char} array to be converted
   4940      * @param limit the index after the last array element that
   4941      * can be used in the {@code char} array
   4942      * @return the Unicode code point at the given index
   4943      * @exception NullPointerException if {@code a} is null.
   4944      * @exception IndexOutOfBoundsException if the {@code index}
   4945      * argument is negative or not less than the {@code limit}
   4946      * argument, or if the {@code limit} argument is negative or
   4947      * greater than the length of the {@code char} array.
   4948      * @since  1.5
   4949      */
   4950     public static int codePointAt(char[] a, int index, int limit) {
   4951         if (index >= limit || limit < 0 || limit > a.length) {
   4952             throw new IndexOutOfBoundsException();
   4953         }
   4954         return codePointAtImpl(a, index, limit);
   4955     }
   4956 
   4957     // throws ArrayIndexOutOfBoundsException if index out of bounds
   4958     static int codePointAtImpl(char[] a, int index, int limit) {
   4959         char c1 = a[index];
   4960         if (isHighSurrogate(c1) && ++index < limit) {
   4961             char c2 = a[index];
   4962             if (isLowSurrogate(c2)) {
   4963                 return toCodePoint(c1, c2);
   4964             }
   4965         }
   4966         return c1;
   4967     }
   4968 
   4969     /**
   4970      * Returns the code point preceding the given index of the
   4971      * {@code CharSequence}. If the {@code char} value at
   4972      * {@code (index - 1)} in the {@code CharSequence} is in
   4973      * the low-surrogate range, {@code (index - 2)} is not
   4974      * negative, and the {@code char} value at {@code (index - 2)}
   4975      * in the {@code CharSequence} is in the
   4976      * high-surrogate range, then the supplementary code point
   4977      * corresponding to this surrogate pair is returned. Otherwise,
   4978      * the {@code char} value at {@code (index - 1)} is
   4979      * returned.
   4980      *
   4981      * @param seq the {@code CharSequence} instance
   4982      * @param index the index following the code point that should be returned
   4983      * @return the Unicode code point value before the given index.
   4984      * @exception NullPointerException if {@code seq} is null.
   4985      * @exception IndexOutOfBoundsException if the {@code index}
   4986      * argument is less than 1 or greater than {@link
   4987      * CharSequence#length() seq.length()}.
   4988      * @since  1.5
   4989      */
   4990     public static int codePointBefore(CharSequence seq, int index) {
   4991         char c2 = seq.charAt(--index);
   4992         if (isLowSurrogate(c2) && index > 0) {
   4993             char c1 = seq.charAt(--index);
   4994             if (isHighSurrogate(c1)) {
   4995                 return toCodePoint(c1, c2);
   4996             }
   4997         }
   4998         return c2;
   4999     }
   5000 
   5001     /**
   5002      * Returns the code point preceding the given index of the
   5003      * {@code char} array. If the {@code char} value at
   5004      * {@code (index - 1)} in the {@code char} array is in
   5005      * the low-surrogate range, {@code (index - 2)} is not
   5006      * negative, and the {@code char} value at {@code (index - 2)}
   5007      * in the {@code char} array is in the
   5008      * high-surrogate range, then the supplementary code point
   5009      * corresponding to this surrogate pair is returned. Otherwise,
   5010      * the {@code char} value at {@code (index - 1)} is
   5011      * returned.
   5012      *
   5013      * @param a the {@code char} array
   5014      * @param index the index following the code point that should be returned
   5015      * @return the Unicode code point value before the given index.
   5016      * @exception NullPointerException if {@code a} is null.
   5017      * @exception IndexOutOfBoundsException if the {@code index}
   5018      * argument is less than 1 or greater than the length of the
   5019      * {@code char} array
   5020      * @since  1.5
   5021      */
   5022     public static int codePointBefore(char[] a, int index) {
   5023         return codePointBeforeImpl(a, index, 0);
   5024     }
   5025 
   5026     /**
   5027      * Returns the code point preceding the given index of the
   5028      * {@code char} array, where only array elements with
   5029      * {@code index} greater than or equal to {@code start}
   5030      * can be used. If the {@code char} value at {@code (index - 1)}
   5031      * in the {@code char} array is in the
   5032      * low-surrogate range, {@code (index - 2)} is not less than
   5033      * {@code start}, and the {@code char} value at
   5034      * {@code (index - 2)} in the {@code char} array is in
   5035      * the high-surrogate range, then the supplementary code point
   5036      * corresponding to this surrogate pair is returned. Otherwise,
   5037      * the {@code char} value at {@code (index - 1)} is
   5038      * returned.
   5039      *
   5040      * @param a the {@code char} array
   5041      * @param index the index following the code point that should be returned
   5042      * @param start the index of the first array element in the
   5043      * {@code char} array
   5044      * @return the Unicode code point value before the given index.
   5045      * @exception NullPointerException if {@code a} is null.
   5046      * @exception IndexOutOfBoundsException if the {@code index}
   5047      * argument is not greater than the {@code start} argument or
   5048      * is greater than the length of the {@code char} array, or
   5049      * if the {@code start} argument is negative or not less than
   5050      * the length of the {@code char} array.
   5051      * @since  1.5
   5052      */
   5053     public static int codePointBefore(char[] a, int index, int start) {
   5054         if (index <= start || start < 0 || start >= a.length) {
   5055             throw new IndexOutOfBoundsException();
   5056         }
   5057         return codePointBeforeImpl(a, index, start);
   5058     }
   5059 
   5060     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
   5061     static int codePointBeforeImpl(char[] a, int index, int start) {
   5062         char c2 = a[--index];
   5063         if (isLowSurrogate(c2) && index > start) {
   5064             char c1 = a[--index];
   5065             if (isHighSurrogate(c1)) {
   5066                 return toCodePoint(c1, c2);
   5067             }
   5068         }
   5069         return c2;
   5070     }
   5071 
   5072     /**
   5073      * Returns the leading surrogate (a
   5074      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   5075      * high surrogate code unit</a>) of the
   5076      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   5077      * surrogate pair</a>
   5078      * representing the specified supplementary character (Unicode
   5079      * code point) in the UTF-16 encoding.  If the specified character
   5080      * is not a
   5081      * <a href="Character.html#supplementary">supplementary character</a>,
   5082      * an unspecified {@code char} is returned.
   5083      *
   5084      * <p>If
   5085      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
   5086      * is {@code true}, then
   5087      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
   5088      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
   5089      * are also always {@code true}.
   5090      *
   5091      * @param   codePoint a supplementary character (Unicode code point)
   5092      * @return  the leading surrogate code unit used to represent the
   5093      *          character in the UTF-16 encoding
   5094      * @since   1.7
   5095      */
   5096     public static char highSurrogate(int codePoint) {
   5097         return (char) ((codePoint >>> 10)
   5098             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
   5099     }
   5100 
   5101     /**
   5102      * Returns the trailing surrogate (a
   5103      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   5104      * low surrogate code unit</a>) of the
   5105      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   5106      * surrogate pair</a>
   5107      * representing the specified supplementary character (Unicode
   5108      * code point) in the UTF-16 encoding.  If the specified character
   5109      * is not a
   5110      * <a href="Character.html#supplementary">supplementary character</a>,
   5111      * an unspecified {@code char} is returned.
   5112      *
   5113      * <p>If
   5114      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
   5115      * is {@code true}, then
   5116      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
   5117      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
   5118      * are also always {@code true}.
   5119      *
   5120      * @param   codePoint a supplementary character (Unicode code point)
   5121      * @return  the trailing surrogate code unit used to represent the
   5122      *          character in the UTF-16 encoding
   5123      * @since   1.7
   5124      */
   5125     public static char lowSurrogate(int codePoint) {
   5126         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
   5127     }
   5128 
   5129     /**
   5130      * Converts the specified character (Unicode code point) to its
   5131      * UTF-16 representation. If the specified code point is a BMP
   5132      * (Basic Multilingual Plane or Plane 0) value, the same value is
   5133      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
   5134      * specified code point is a supplementary character, its
   5135      * surrogate values are stored in {@code dst[dstIndex]}
   5136      * (high-surrogate) and {@code dst[dstIndex+1]}
   5137      * (low-surrogate), and 2 is returned.
   5138      *
   5139      * @param  codePoint the character (Unicode code point) to be converted.
   5140      * @param  dst an array of {@code char} in which the
   5141      * {@code codePoint}'s UTF-16 value is stored.
   5142      * @param dstIndex the start index into the {@code dst}
   5143      * array where the converted value is stored.
   5144      * @return 1 if the code point is a BMP code point, 2 if the
   5145      * code point is a supplementary code point.
   5146      * @exception IllegalArgumentException if the specified
   5147      * {@code codePoint} is not a valid Unicode code point.
   5148      * @exception NullPointerException if the specified {@code dst} is null.
   5149      * @exception IndexOutOfBoundsException if {@code dstIndex}
   5150      * is negative or not less than {@code dst.length}, or if
   5151      * {@code dst} at {@code dstIndex} doesn't have enough
   5152      * array element(s) to store the resulting {@code char}
   5153      * value(s). (If {@code dstIndex} is equal to
   5154      * {@code dst.length-1} and the specified
   5155      * {@code codePoint} is a supplementary character, the
   5156      * high-surrogate value is not stored in
   5157      * {@code dst[dstIndex]}.)
   5158      * @since  1.5
   5159      */
   5160     public static int toChars(int codePoint, char[] dst, int dstIndex) {
   5161         if (isBmpCodePoint(codePoint)) {
   5162             dst[dstIndex] = (char) codePoint;
   5163             return 1;
   5164         } else if (isValidCodePoint(codePoint)) {
   5165             toSurrogates(codePoint, dst, dstIndex);
   5166             return 2;
   5167         } else {
   5168             throw new IllegalArgumentException();
   5169         }
   5170     }
   5171 
   5172     /**
   5173      * Converts the specified character (Unicode code point) to its
   5174      * UTF-16 representation stored in a {@code char} array. If
   5175      * the specified code point is a BMP (Basic Multilingual Plane or
   5176      * Plane 0) value, the resulting {@code char} array has
   5177      * the same value as {@code codePoint}. If the specified code
   5178      * point is a supplementary code point, the resulting
   5179      * {@code char} array has the corresponding surrogate pair.
   5180      *
   5181      * @param  codePoint a Unicode code point
   5182      * @return a {@code char} array having
   5183      *         {@code codePoint}'s UTF-16 representation.
   5184      * @exception IllegalArgumentException if the specified
   5185      * {@code codePoint} is not a valid Unicode code point.
   5186      * @since  1.5
   5187      */
   5188     public static char[] toChars(int codePoint) {
   5189         if (isBmpCodePoint(codePoint)) {
   5190             return new char[] { (char) codePoint };
   5191         } else if (isValidCodePoint(codePoint)) {
   5192             char[] result = new char[2];
   5193             toSurrogates(codePoint, result, 0);
   5194             return result;
   5195         } else {
   5196             throw new IllegalArgumentException();
   5197         }
   5198     }
   5199 
   5200     static void toSurrogates(int codePoint, char[] dst, int index) {
   5201         // We write elements "backwards" to guarantee all-or-nothing
   5202         dst[index+1] = lowSurrogate(codePoint);
   5203         dst[index] = highSurrogate(codePoint);
   5204     }
   5205 
   5206     /**
   5207      * Returns the number of Unicode code points in the text range of
   5208      * the specified char sequence. The text range begins at the
   5209      * specified {@code beginIndex} and extends to the
   5210      * {@code char} at index {@code endIndex - 1}. Thus the
   5211      * length (in {@code char}s) of the text range is
   5212      * {@code endIndex-beginIndex}. Unpaired surrogates within
   5213      * the text range count as one code point each.
   5214      *
   5215      * @param seq the char sequence
   5216      * @param beginIndex the index to the first {@code char} of
   5217      * the text range.
   5218      * @param endIndex the index after the last {@code char} of
   5219      * the text range.
   5220      * @return the number of Unicode code points in the specified text
   5221      * range
   5222      * @exception NullPointerException if {@code seq} is null.
   5223      * @exception IndexOutOfBoundsException if the
   5224      * {@code beginIndex} is negative, or {@code endIndex}
   5225      * is larger than the length of the given sequence, or
   5226      * {@code beginIndex} is larger than {@code endIndex}.
   5227      * @since  1.5
   5228      */
   5229     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
   5230         int length = seq.length();
   5231         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
   5232             throw new IndexOutOfBoundsException();
   5233         }
   5234         int n = endIndex - beginIndex;
   5235         for (int i = beginIndex; i < endIndex; ) {
   5236             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
   5237                 isLowSurrogate(seq.charAt(i))) {
   5238                 n--;
   5239                 i++;
   5240             }
   5241         }
   5242         return n;
   5243     }
   5244 
   5245     /**
   5246      * Returns the number of Unicode code points in a subarray of the
   5247      * {@code char} array argument. The {@code offset}
   5248      * argument is the index of the first {@code char} of the
   5249      * subarray and the {@code count} argument specifies the
   5250      * length of the subarray in {@code char}s. Unpaired
   5251      * surrogates within the subarray count as one code point each.
   5252      *
   5253      * @param a the {@code char} array
   5254      * @param offset the index of the first {@code char} in the
   5255      * given {@code char} array
   5256      * @param count the length of the subarray in {@code char}s
   5257      * @return the number of Unicode code points in the specified subarray
   5258      * @exception NullPointerException if {@code a} is null.
   5259      * @exception IndexOutOfBoundsException if {@code offset} or
   5260      * {@code count} is negative, or if {@code offset +
   5261      * count} is larger than the length of the given array.
   5262      * @since  1.5
   5263      */
   5264     public static int codePointCount(char[] a, int offset, int count) {
   5265         if (count > a.length - offset || offset < 0 || count < 0) {
   5266             throw new IndexOutOfBoundsException();
   5267         }
   5268         return codePointCountImpl(a, offset, count);
   5269     }
   5270 
   5271     static int codePointCountImpl(char[] a, int offset, int count) {
   5272         int endIndex = offset + count;
   5273         int n = count;
   5274         for (int i = offset; i < endIndex; ) {
   5275             if (isHighSurrogate(a[i++]) && i < endIndex &&
   5276                 isLowSurrogate(a[i])) {
   5277                 n--;
   5278                 i++;
   5279             }
   5280         }
   5281         return n;
   5282     }
   5283 
   5284     /**
   5285      * Returns the index within the given char sequence that is offset
   5286      * from the given {@code index} by {@code codePointOffset}
   5287      * code points. Unpaired surrogates within the text range given by
   5288      * {@code index} and {@code codePointOffset} count as
   5289      * one code point each.
   5290      *
   5291      * @param seq the char sequence
   5292      * @param index the index to be offset
   5293      * @param codePointOffset the offset in code points
   5294      * @return the index within the char sequence
   5295      * @exception NullPointerException if {@code seq} is null.
   5296      * @exception IndexOutOfBoundsException if {@code index}
   5297      *   is negative or larger then the length of the char sequence,
   5298      *   or if {@code codePointOffset} is positive and the
   5299      *   subsequence starting with {@code index} has fewer than
   5300      *   {@code codePointOffset} code points, or if
   5301      *   {@code codePointOffset} is negative and the subsequence
   5302      *   before {@code index} has fewer than the absolute value
   5303      *   of {@code codePointOffset} code points.
   5304      * @since 1.5
   5305      */
   5306     public static int offsetByCodePoints(CharSequence seq, int index,
   5307                                          int codePointOffset) {
   5308         int length = seq.length();
   5309         if (index < 0 || index > length) {
   5310             throw new IndexOutOfBoundsException();
   5311         }
   5312 
   5313         int x = index;
   5314         if (codePointOffset >= 0) {
   5315             int i;
   5316             for (i = 0; x < length && i < codePointOffset; i++) {
   5317                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
   5318                     isLowSurrogate(seq.charAt(x))) {
   5319                     x++;
   5320                 }
   5321             }
   5322             if (i < codePointOffset) {
   5323                 throw new IndexOutOfBoundsException();
   5324             }
   5325         } else {
   5326             int i;
   5327             for (i = codePointOffset; x > 0 && i < 0; i++) {
   5328                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
   5329                     isHighSurrogate(seq.charAt(x-1))) {
   5330                     x--;
   5331                 }
   5332             }
   5333             if (i < 0) {
   5334                 throw new IndexOutOfBoundsException();
   5335             }
   5336         }
   5337         return x;
   5338     }
   5339 
   5340     /**
   5341      * Returns the index within the given {@code char} subarray
   5342      * that is offset from the given {@code index} by
   5343      * {@code codePointOffset} code points. The
   5344      * {@code start} and {@code count} arguments specify a
   5345      * subarray of the {@code char} array. Unpaired surrogates
   5346      * within the text range given by {@code index} and
   5347      * {@code codePointOffset} count as one code point each.
   5348      *
   5349      * @param a the {@code char} array
   5350      * @param start the index of the first {@code char} of the
   5351      * subarray
   5352      * @param count the length of the subarray in {@code char}s
   5353      * @param index the index to be offset
   5354      * @param codePointOffset the offset in code points
   5355      * @return the index within the subarray
   5356      * @exception NullPointerException if {@code a} is null.
   5357      * @exception IndexOutOfBoundsException
   5358      *   if {@code start} or {@code count} is negative,
   5359      *   or if {@code start + count} is larger than the length of
   5360      *   the given array,
   5361      *   or if {@code index} is less than {@code start} or
   5362      *   larger then {@code start + count},
   5363      *   or if {@code codePointOffset} is positive and the text range
   5364      *   starting with {@code index} and ending with {@code start + count - 1}
   5365      *   has fewer than {@code codePointOffset} code
   5366      *   points,
   5367      *   or if {@code codePointOffset} is negative and the text range
   5368      *   starting with {@code start} and ending with {@code index - 1}
   5369      *   has fewer than the absolute value of
   5370      *   {@code codePointOffset} code points.
   5371      * @since 1.5
   5372      */
   5373     public static int offsetByCodePoints(char[] a, int start, int count,
   5374                                          int index, int codePointOffset) {
   5375         if (count > a.length-start || start < 0 || count < 0
   5376             || index < start || index > start+count) {
   5377             throw new IndexOutOfBoundsException();
   5378         }
   5379         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
   5380     }
   5381 
   5382     static int offsetByCodePointsImpl(char[]a, int start, int count,
   5383                                       int index, int codePointOffset) {
   5384         int x = index;
   5385         if (codePointOffset >= 0) {
   5386             int limit = start + count;
   5387             int i;
   5388             for (i = 0; x < limit && i < codePointOffset; i++) {
   5389                 if (isHighSurrogate(a[x++]) && x < limit &&
   5390                     isLowSurrogate(a[x])) {
   5391                     x++;
   5392                 }
   5393             }
   5394             if (i < codePointOffset) {
   5395                 throw new IndexOutOfBoundsException();
   5396             }
   5397         } else {
   5398             int i;
   5399             for (i = codePointOffset; x > start && i < 0; i++) {
   5400                 if (isLowSurrogate(a[--x]) && x > start &&
   5401                     isHighSurrogate(a[x-1])) {
   5402                     x--;
   5403                 }
   5404             }
   5405             if (i < 0) {
   5406                 throw new IndexOutOfBoundsException();
   5407             }
   5408         }
   5409         return x;
   5410     }
   5411 
   5412     /**
   5413      * Determines if the specified character is a lowercase character.
   5414      * <p>
   5415      * A character is lowercase if its general category type, provided
   5416      * by {@code Character.getType(ch)}, is
   5417      * {@code LOWERCASE_LETTER}, or it has contributory property
   5418      * Other_Lowercase as defined by the Unicode Standard.
   5419      * <p>
   5420      * The following are examples of lowercase characters:
   5421      * <blockquote><pre>
   5422      * a b c d e f g h i j k l m n o p q r s t u v w x y z
   5423      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
   5424      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
   5425      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
   5426      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
   5427      * </pre></blockquote>
   5428      * <p> Many other Unicode characters are lowercase too.
   5429      *
   5430      * <p><b>Note:</b> This method cannot handle <a
   5431      * href="#supplementary"> supplementary characters</a>. To support
   5432      * all Unicode characters, including supplementary characters, use
   5433      * the {@link #isLowerCase(int)} method.
   5434      *
   5435      * @param   ch   the character to be tested.
   5436      * @return  {@code true} if the character is lowercase;
   5437      *          {@code false} otherwise.
   5438      * @see     Character#isLowerCase(char)
   5439      * @see     Character#isTitleCase(char)
   5440      * @see     Character#toLowerCase(char)
   5441      * @see     Character#getType(char)
   5442      */
   5443     public static boolean isLowerCase(char ch) {
   5444         return isLowerCase((int)ch);
   5445     }
   5446 
   5447     /**
   5448      * Determines if the specified character (Unicode code point) is a
   5449      * lowercase character.
   5450      * <p>
   5451      * A character is lowercase if its general category type, provided
   5452      * by {@link Character#getType getType(codePoint)}, is
   5453      * {@code LOWERCASE_LETTER}, or it has contributory property
   5454      * Other_Lowercase as defined by the Unicode Standard.
   5455      * <p>
   5456      * The following are examples of lowercase characters:
   5457      * <blockquote><pre>
   5458      * a b c d e f g h i j k l m n o p q r s t u v w x y z
   5459      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
   5460      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
   5461      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
   5462      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
   5463      * </pre></blockquote>
   5464      * <p> Many other Unicode characters are lowercase too.
   5465      *
   5466      * @param   codePoint the character (Unicode code point) to be tested.
   5467      * @return  {@code true} if the character is lowercase;
   5468      *          {@code false} otherwise.
   5469      * @see     Character#isLowerCase(int)
   5470      * @see     Character#isTitleCase(int)
   5471      * @see     Character#toLowerCase(int)
   5472      * @see     Character#getType(int)
   5473      * @since   1.5
   5474      */
   5475     public static boolean isLowerCase(int codePoint) {
   5476         return isLowerCaseImpl(codePoint);
   5477     }
   5478 
   5479     @FastNative
   5480     static native boolean isLowerCaseImpl(int codePoint);
   5481 
   5482     /**
   5483      * Determines if the specified character is an uppercase character.
   5484      * <p>
   5485      * A character is uppercase if its general category type, provided by
   5486      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
   5487      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
   5488      * <p>
   5489      * The following are examples of uppercase characters:
   5490      * <blockquote><pre>
   5491      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
   5492      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
   5493      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
   5494      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
   5495      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
   5496      * </pre></blockquote>
   5497      * <p> Many other Unicode characters are uppercase too.
   5498      *
   5499      * <p><b>Note:</b> This method cannot handle <a
   5500      * href="#supplementary"> supplementary characters</a>. To support
   5501      * all Unicode characters, including supplementary characters, use
   5502      * the {@link #isUpperCase(int)} method.
   5503      *
   5504      * @param   ch   the character to be tested.
   5505      * @return  {@code true} if the character is uppercase;
   5506      *          {@code false} otherwise.
   5507      * @see     Character#isLowerCase(char)
   5508      * @see     Character#isTitleCase(char)
   5509      * @see     Character#toUpperCase(char)
   5510      * @see     Character#getType(char)
   5511      * @since   1.0
   5512      */
   5513     public static boolean isUpperCase(char ch) {
   5514         return isUpperCase((int)ch);
   5515     }
   5516 
   5517     /**
   5518      * Determines if the specified character (Unicode code point) is an uppercase character.
   5519      * <p>
   5520      * A character is uppercase if its general category type, provided by
   5521      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
   5522      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
   5523      * <p>
   5524      * The following are examples of uppercase characters:
   5525      * <blockquote><pre>
   5526      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
   5527      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
   5528      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
   5529      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
   5530      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
   5531      * </pre></blockquote>
   5532      * <p> Many other Unicode characters are uppercase too.<p>
   5533      *
   5534      * @param   codePoint the character (Unicode code point) to be tested.
   5535      * @return  {@code true} if the character is uppercase;
   5536      *          {@code false} otherwise.
   5537      * @see     Character#isLowerCase(int)
   5538      * @see     Character#isTitleCase(int)
   5539      * @see     Character#toUpperCase(int)
   5540      * @see     Character#getType(int)
   5541      * @since   1.5
   5542      */
   5543     public static boolean isUpperCase(int codePoint) {
   5544         return isUpperCaseImpl(codePoint);
   5545     }
   5546 
   5547     @FastNative
   5548     static native boolean isUpperCaseImpl(int codePoint);
   5549 
   5550 
   5551     /**
   5552      * Determines if the specified character is a titlecase character.
   5553      * <p>
   5554      * A character is a titlecase character if its general
   5555      * category type, provided by {@code Character.getType(ch)},
   5556      * is {@code TITLECASE_LETTER}.
   5557      * <p>
   5558      * Some characters look like pairs of Latin letters. For example, there
   5559      * is an uppercase letter that looks like "LJ" and has a corresponding
   5560      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
   5561      * is the appropriate form to use when rendering a word in lowercase
   5562      * with initial capitals, as for a book title.
   5563      * <p>
   5564      * These are some of the Unicode characters for which this method returns
   5565      * {@code true}:
   5566      * <ul>
   5567      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
   5568      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
   5569      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
   5570      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
   5571      * </ul>
   5572      * <p> Many other Unicode characters are titlecase too.
   5573      *
   5574      * <p><b>Note:</b> This method cannot handle <a
   5575      * href="#supplementary"> supplementary characters</a>. To support
   5576      * all Unicode characters, including supplementary characters, use
   5577      * the {@link #isTitleCase(int)} method.
   5578      *
   5579      * @param   ch   the character to be tested.
   5580      * @return  {@code true} if the character is titlecase;
   5581      *          {@code false} otherwise.
   5582      * @see     Character#isLowerCase(char)
   5583      * @see     Character#isUpperCase(char)
   5584      * @see     Character#toTitleCase(char)
   5585      * @see     Character#getType(char)
   5586      * @since   1.0.2
   5587      */
   5588     public static boolean isTitleCase(char ch) {
   5589         return isTitleCase((int)ch);
   5590     }
   5591 
   5592     /**
   5593      * Determines if the specified character (Unicode code point) is a titlecase character.
   5594      * <p>
   5595      * A character is a titlecase character if its general
   5596      * category type, provided by {@link Character#getType(int) getType(codePoint)},
   5597      * is {@code TITLECASE_LETTER}.
   5598      * <p>
   5599      * Some characters look like pairs of Latin letters. For example, there
   5600      * is an uppercase letter that looks like "LJ" and has a corresponding
   5601      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
   5602      * is the appropriate form to use when rendering a word in lowercase
   5603      * with initial capitals, as for a book title.
   5604      * <p>
   5605      * These are some of the Unicode characters for which this method returns
   5606      * {@code true}:
   5607      * <ul>
   5608      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
   5609      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
   5610      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
   5611      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
   5612      * </ul>
   5613      * <p> Many other Unicode characters are titlecase too.<p>
   5614      *
   5615      * @param   codePoint the character (Unicode code point) to be tested.
   5616      * @return  {@code true} if the character is titlecase;
   5617      *          {@code false} otherwise.
   5618      * @see     Character#isLowerCase(int)
   5619      * @see     Character#isUpperCase(int)
   5620      * @see     Character#toTitleCase(int)
   5621      * @see     Character#getType(int)
   5622      * @since   1.5
   5623      */
   5624     public static boolean isTitleCase(int codePoint) {
   5625         return isTitleCaseImpl(codePoint);
   5626     }
   5627 
   5628     @FastNative
   5629     static native boolean isTitleCaseImpl(int codePoint);
   5630 
   5631     /**
   5632      * Determines if the specified character is a digit.
   5633      * <p>
   5634      * A character is a digit if its general category type, provided
   5635      * by {@code Character.getType(ch)}, is
   5636      * {@code DECIMAL_DIGIT_NUMBER}.
   5637      * <p>
   5638      * Some Unicode character ranges that contain digits:
   5639      * <ul>
   5640      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
   5641      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
   5642      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
   5643      *     Arabic-Indic digits
   5644      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
   5645      *     Extended Arabic-Indic digits
   5646      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
   5647      *     Devanagari digits
   5648      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
   5649      *     Fullwidth digits
   5650      * </ul>
   5651      *
   5652      * Many other character ranges contain digits as well.
   5653      *
   5654      * <p><b>Note:</b> This method cannot handle <a
   5655      * href="#supplementary"> supplementary characters</a>. To support
   5656      * all Unicode characters, including supplementary characters, use
   5657      * the {@link #isDigit(int)} method.
   5658      *
   5659      * @param   ch   the character to be tested.
   5660      * @return  {@code true} if the character is a digit;
   5661      *          {@code false} otherwise.
   5662      * @see     Character#digit(char, int)
   5663      * @see     Character#forDigit(int, int)
   5664      * @see     Character#getType(char)
   5665      */
   5666     public static boolean isDigit(char ch) {
   5667         return isDigit((int)ch);
   5668     }
   5669 
   5670     /**
   5671      * Determines if the specified character (Unicode code point) is a digit.
   5672      * <p>
   5673      * A character is a digit if its general category type, provided
   5674      * by {@link Character#getType(int) getType(codePoint)}, is
   5675      * {@code DECIMAL_DIGIT_NUMBER}.
   5676      * <p>
   5677      * Some Unicode character ranges that contain digits:
   5678      * <ul>
   5679      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
   5680      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
   5681      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
   5682      *     Arabic-Indic digits
   5683      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
   5684      *     Extended Arabic-Indic digits
   5685      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
   5686      *     Devanagari digits
   5687      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
   5688      *     Fullwidth digits
   5689      * </ul>
   5690      *
   5691      * Many other character ranges contain digits as well.
   5692      *
   5693      * @param   codePoint the character (Unicode code point) to be tested.
   5694      * @return  {@code true} if the character is a digit;
   5695      *          {@code false} otherwise.
   5696      * @see     Character#forDigit(int, int)
   5697      * @see     Character#getType(int)
   5698      * @since   1.5
   5699      */
   5700     public static boolean isDigit(int codePoint) {
   5701         return isDigitImpl(codePoint);
   5702     }
   5703 
   5704     @FastNative
   5705     static native boolean isDigitImpl(int codePoint);
   5706 
   5707     /**
   5708      * Determines if a character is defined in Unicode.
   5709      * <p>
   5710      * A character is defined if at least one of the following is true:
   5711      * <ul>
   5712      * <li>It has an entry in the UnicodeData file.
   5713      * <li>It has a value in a range defined by the UnicodeData file.
   5714      * </ul>
   5715      *
   5716      * <p><b>Note:</b> This method cannot handle <a
   5717      * href="#supplementary"> supplementary characters</a>. To support
   5718      * all Unicode characters, including supplementary characters, use
   5719      * the {@link #isDefined(int)} method.
   5720      *
   5721      * @param   ch   the character to be tested
   5722      * @return  {@code true} if the character has a defined meaning
   5723      *          in Unicode; {@code false} otherwise.
   5724      * @see     Character#isDigit(char)
   5725      * @see     Character#isLetter(char)
   5726      * @see     Character#isLetterOrDigit(char)
   5727      * @see     Character#isLowerCase(char)
   5728      * @see     Character#isTitleCase(char)
   5729      * @see     Character#isUpperCase(char)
   5730      * @since   1.0.2
   5731      */
   5732     public static boolean isDefined(char ch) {
   5733         return isDefined((int)ch);
   5734     }
   5735 
   5736     /**
   5737      * Determines if a character (Unicode code point) is defined in Unicode.
   5738      * <p>
   5739      * A character is defined if at least one of the following is true:
   5740      * <ul>
   5741      * <li>It has an entry in the UnicodeData file.
   5742      * <li>It has a value in a range defined by the UnicodeData file.
   5743      * </ul>
   5744      *
   5745      * @param   codePoint the character (Unicode code point) to be tested.
   5746      * @return  {@code true} if the character has a defined meaning
   5747      *          in Unicode; {@code false} otherwise.
   5748      * @see     Character#isDigit(int)
   5749      * @see     Character#isLetter(int)
   5750      * @see     Character#isLetterOrDigit(int)
   5751      * @see     Character#isLowerCase(int)
   5752      * @see     Character#isTitleCase(int)
   5753      * @see     Character#isUpperCase(int)
   5754      * @since   1.5
   5755      */
   5756     public static boolean isDefined(int codePoint) {
   5757         return isDefinedImpl(codePoint);
   5758     }
   5759 
   5760     @FastNative
   5761     static native boolean isDefinedImpl(int codePoint);
   5762 
   5763     /**
   5764      * Determines if the specified character is a letter.
   5765      * <p>
   5766      * A character is considered to be a letter if its general
   5767      * category type, provided by {@code Character.getType(ch)},
   5768      * is any of the following:
   5769      * <ul>
   5770      * <li> {@code UPPERCASE_LETTER}
   5771      * <li> {@code LOWERCASE_LETTER}
   5772      * <li> {@code TITLECASE_LETTER}
   5773      * <li> {@code MODIFIER_LETTER}
   5774      * <li> {@code OTHER_LETTER}
   5775      * </ul>
   5776      *
   5777      * Not all letters have case. Many characters are
   5778      * letters but are neither uppercase nor lowercase nor titlecase.
   5779      *
   5780      * <p><b>Note:</b> This method cannot handle <a
   5781      * href="#supplementary"> supplementary characters</a>. To support
   5782      * all Unicode characters, including supplementary characters, use
   5783      * the {@link #isLetter(int)} method.
   5784      *
   5785      * @param   ch   the character to be tested.
   5786      * @return  {@code true} if the character is a letter;
   5787      *          {@code false} otherwise.
   5788      * @see     Character#isDigit(char)
   5789      * @see     Character#isJavaIdentifierStart(char)
   5790      * @see     Character#isJavaLetter(char)
   5791      * @see     Character#isJavaLetterOrDigit(char)
   5792      * @see     Character#isLetterOrDigit(char)
   5793      * @see     Character#isLowerCase(char)
   5794      * @see     Character#isTitleCase(char)
   5795      * @see     Character#isUnicodeIdentifierStart(char)
   5796      * @see     Character#isUpperCase(char)
   5797      */
   5798     public static boolean isLetter(char ch) {
   5799         return isLetter((int)ch);
   5800     }
   5801 
   5802     /**
   5803      * Determines if the specified character (Unicode code point) is a letter.
   5804      * <p>
   5805      * A character is considered to be a letter if its general
   5806      * category type, provided by {@link Character#getType(int) getType(codePoint)},
   5807      * is any of the following:
   5808      * <ul>
   5809      * <li> {@code UPPERCASE_LETTER}
   5810      * <li> {@code LOWERCASE_LETTER}
   5811      * <li> {@code TITLECASE_LETTER}
   5812      * <li> {@code MODIFIER_LETTER}
   5813      * <li> {@code OTHER_LETTER}
   5814      * </ul>
   5815      *
   5816      * Not all letters have case. Many characters are
   5817      * letters but are neither uppercase nor lowercase nor titlecase.
   5818      *
   5819      * @param   codePoint the character (Unicode code point) to be tested.
   5820      * @return  {@code true} if the character is a letter;
   5821      *          {@code false} otherwise.
   5822      * @see     Character#isDigit(int)
   5823      * @see     Character#isJavaIdentifierStart(int)
   5824      * @see     Character#isLetterOrDigit(int)
   5825      * @see     Character#isLowerCase(int)
   5826      * @see     Character#isTitleCase(int)
   5827      * @see     Character#isUnicodeIdentifierStart(int)
   5828      * @see     Character#isUpperCase(int)
   5829      * @since   1.5
   5830      */
   5831     public static boolean isLetter(int codePoint) {
   5832         return isLetterImpl(codePoint);
   5833     }
   5834 
   5835     @FastNative
   5836     static native boolean isLetterImpl(int codePoint);
   5837 
   5838     /**
   5839      * Determines if the specified character is a letter or digit.
   5840      * <p>
   5841      * A character is considered to be a letter or digit if either
   5842      * {@code Character.isLetter(char ch)} or
   5843      * {@code Character.isDigit(char ch)} returns
   5844      * {@code true} for the character.
   5845      *
   5846      * <p><b>Note:</b> This method cannot handle <a
   5847      * href="#supplementary"> supplementary characters</a>. To support
   5848      * all Unicode characters, including supplementary characters, use
   5849      * the {@link #isLetterOrDigit(int)} method.
   5850      *
   5851      * @param   ch   the character to be tested.
   5852      * @return  {@code true} if the character is a letter or digit;
   5853      *          {@code false} otherwise.
   5854      * @see     Character#isDigit(char)
   5855      * @see     Character#isJavaIdentifierPart(char)
   5856      * @see     Character#isJavaLetter(char)
   5857      * @see     Character#isJavaLetterOrDigit(char)
   5858      * @see     Character#isLetter(char)
   5859      * @see     Character#isUnicodeIdentifierPart(char)
   5860      * @since   1.0.2
   5861      */
   5862     public static boolean isLetterOrDigit(char ch) {
   5863         return isLetterOrDigit((int)ch);
   5864     }
   5865 
   5866     /**
   5867      * Determines if the specified character (Unicode code point) is a letter or digit.
   5868      * <p>
   5869      * A character is considered to be a letter or digit if either
   5870      * {@link #isLetter(int) isLetter(codePoint)} or
   5871      * {@link #isDigit(int) isDigit(codePoint)} returns
   5872      * {@code true} for the character.
   5873      *
   5874      * @param   codePoint the character (Unicode code point) to be tested.
   5875      * @return  {@code true} if the character is a letter or digit;
   5876      *          {@code false} otherwise.
   5877      * @see     Character#isDigit(int)
   5878      * @see     Character#isJavaIdentifierPart(int)
   5879      * @see     Character#isLetter(int)
   5880      * @see     Character#isUnicodeIdentifierPart(int)
   5881      * @since   1.5
   5882      */
   5883     public static boolean isLetterOrDigit(int codePoint) {
   5884         return isLetterOrDigitImpl(codePoint);
   5885     }
   5886 
   5887     @FastNative
   5888     static native boolean isLetterOrDigitImpl(int codePoint);
   5889 
   5890     /**
   5891      * Determines if the specified character is permissible as the first
   5892      * character in a Java identifier.
   5893      * <p>
   5894      * A character may start a Java identifier if and only if
   5895      * one of the following is true:
   5896      * <ul>
   5897      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
   5898      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
   5899      * <li> {@code ch} is a currency symbol (such as {@code '$'})
   5900      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
   5901      * </ul>
   5902      *
   5903      * @param   ch the character to be tested.
   5904      * @return  {@code true} if the character may start a Java
   5905      *          identifier; {@code false} otherwise.
   5906      * @see     Character#isJavaLetterOrDigit(char)
   5907      * @see     Character#isJavaIdentifierStart(char)
   5908      * @see     Character#isJavaIdentifierPart(char)
   5909      * @see     Character#isLetter(char)
   5910      * @see     Character#isLetterOrDigit(char)
   5911      * @see     Character#isUnicodeIdentifierStart(char)
   5912      * @since   1.02
   5913      * @deprecated Replaced by isJavaIdentifierStart(char).
   5914      */
   5915     @Deprecated
   5916     public static boolean isJavaLetter(char ch) {
   5917         return isJavaIdentifierStart(ch);
   5918     }
   5919 
   5920     /**
   5921      * Determines if the specified character may be part of a Java
   5922      * identifier as other than the first character.
   5923      * <p>
   5924      * A character may be part of a Java identifier if and only if any
   5925      * of the following are true:
   5926      * <ul>
   5927      * <li>  it is a letter
   5928      * <li>  it is a currency symbol (such as {@code '$'})
   5929      * <li>  it is a connecting punctuation character (such as {@code '_'})
   5930      * <li>  it is a digit
   5931      * <li>  it is a numeric letter (such as a Roman numeral character)
   5932      * <li>  it is a combining mark
   5933      * <li>  it is a non-spacing mark
   5934      * <li> {@code isIdentifierIgnorable} returns
   5935      * {@code true} for the character.
   5936      * </ul>
   5937      *
   5938      * @param   ch the character to be tested.
   5939      * @return  {@code true} if the character may be part of a
   5940      *          Java identifier; {@code false} otherwise.
   5941      * @see     Character#isJavaLetter(char)
   5942      * @see     Character#isJavaIdentifierStart(char)
   5943      * @see     Character#isJavaIdentifierPart(char)
   5944      * @see     Character#isLetter(char)
   5945      * @see     Character#isLetterOrDigit(char)
   5946      * @see     Character#isUnicodeIdentifierPart(char)
   5947      * @see     Character#isIdentifierIgnorable(char)
   5948      * @since   1.02
   5949      * @deprecated Replaced by isJavaIdentifierPart(char).
   5950      */
   5951     @Deprecated
   5952     public static boolean isJavaLetterOrDigit(char ch) {
   5953         return isJavaIdentifierPart(ch);
   5954     }
   5955 
   5956     /**
   5957      * Determines if the specified character (Unicode code point) is an alphabet.
   5958      * <p>
   5959      * A character is considered to be alphabetic if its general category type,
   5960      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
   5961      * the following:
   5962      * <ul>
   5963      * <li> <code>UPPERCASE_LETTER</code>
   5964      * <li> <code>LOWERCASE_LETTER</code>
   5965      * <li> <code>TITLECASE_LETTER</code>
   5966      * <li> <code>MODIFIER_LETTER</code>
   5967      * <li> <code>OTHER_LETTER</code>
   5968      * <li> <code>LETTER_NUMBER</code>
   5969      * </ul>
   5970      * or it has contributory property Other_Alphabetic as defined by the
   5971      * Unicode Standard.
   5972      *
   5973      * @param   codePoint the character (Unicode code point) to be tested.
   5974      * @return  <code>true</code> if the character is a Unicode alphabet
   5975      *          character, <code>false</code> otherwise.
   5976      * @since   1.7
   5977      */
   5978     public static boolean isAlphabetic(int codePoint) {
   5979         return isAlphabeticImpl(codePoint);
   5980     }
   5981 
   5982     @FastNative
   5983     static native boolean isAlphabeticImpl(int codePoint);
   5984 
   5985 
   5986     /**
   5987      * Determines if the specified character (Unicode code point) is a CJKV
   5988      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
   5989      * the Unicode Standard.
   5990      *
   5991      * @param   codePoint the character (Unicode code point) to be tested.
   5992      * @return  <code>true</code> if the character is a Unicode ideograph
   5993      *          character, <code>false</code> otherwise.
   5994      * @since   1.7
   5995      */
   5996     public static boolean isIdeographic(int codePoint) {
   5997         return isIdeographicImpl(codePoint);
   5998     }
   5999     @FastNative
   6000     static native boolean isIdeographicImpl(int codePoint);
   6001 
   6002     /**
   6003      * Determines if the specified character is
   6004      * permissible as the first character in a Java identifier.
   6005      * <p>
   6006      * A character may start a Java identifier if and only if
   6007      * one of the following conditions is true:
   6008      * <ul>
   6009      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
   6010      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
   6011      * <li> {@code ch} is a currency symbol (such as {@code '$'})
   6012      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
   6013      * </ul>
   6014      *
   6015      * <p><b>Note:</b> This method cannot handle <a
   6016      * href="#supplementary"> supplementary characters</a>. To support
   6017      * all Unicode characters, including supplementary characters, use
   6018      * the {@link #isJavaIdentifierStart(int)} method.
   6019      *
   6020      * @param   ch the character to be tested.
   6021      * @return  {@code true} if the character may start a Java identifier;
   6022      *          {@code false} otherwise.
   6023      * @see     Character#isJavaIdentifierPart(char)
   6024      * @see     Character#isLetter(char)
   6025      * @see     Character#isUnicodeIdentifierStart(char)
   6026      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   6027      * @since   1.1
   6028      */
   6029     public static boolean isJavaIdentifierStart(char ch) {
   6030         return isJavaIdentifierStart((int)ch);
   6031     }
   6032 
   6033     /**
   6034      * Determines if the character (Unicode code point) is
   6035      * permissible as the first character in a Java identifier.
   6036      * <p>
   6037      * A character may start a Java identifier if and only if
   6038      * one of the following conditions is true:
   6039      * <ul>
   6040      * <li> {@link #isLetter(int) isLetter(codePoint)}
   6041      *      returns {@code true}
   6042      * <li> {@link #getType(int) getType(codePoint)}
   6043      *      returns {@code LETTER_NUMBER}
   6044      * <li> the referenced character is a currency symbol (such as {@code '$'})
   6045      * <li> the referenced character is a connecting punctuation character
   6046      *      (such as {@code '_'}).
   6047      * </ul>
   6048      *
   6049      * @param   codePoint the character (Unicode code point) to be tested.
   6050      * @return  {@code true} if the character may start a Java identifier;
   6051      *          {@code false} otherwise.
   6052      * @see     Character#isJavaIdentifierPart(int)
   6053      * @see     Character#isLetter(int)
   6054      * @see     Character#isUnicodeIdentifierStart(int)
   6055      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   6056      * @since   1.5
   6057      */
   6058     public static boolean isJavaIdentifierStart(int codePoint) {
   6059         // Use precomputed bitmasks to optimize the ASCII range.
   6060         if (codePoint < 64) {
   6061             return (codePoint == '$'); // There's only one character in this range.
   6062         } else if (codePoint < 128) {
   6063             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   6064         }
   6065         return ((1 << getType(codePoint))
   6066                 & ((1 << UPPERCASE_LETTER)
   6067                    | (1 << LOWERCASE_LETTER)
   6068                    | (1  << TITLECASE_LETTER)
   6069                    | (1  << MODIFIER_LETTER)
   6070                    | (1  << OTHER_LETTER)
   6071                    | (1  << CURRENCY_SYMBOL)
   6072                    | (1  << CONNECTOR_PUNCTUATION)
   6073                    | (1  << LETTER_NUMBER))) != 0;
   6074     }
   6075 
   6076     /**
   6077      * Determines if the specified character may be part of a Java
   6078      * identifier as other than the first character.
   6079      * <p>
   6080      * A character may be part of a Java identifier if any of the following
   6081      * are true:
   6082      * <ul>
   6083      * <li>  it is a letter
   6084      * <li>  it is a currency symbol (such as {@code '$'})
   6085      * <li>  it is a connecting punctuation character (such as {@code '_'})
   6086      * <li>  it is a digit
   6087      * <li>  it is a numeric letter (such as a Roman numeral character)
   6088      * <li>  it is a combining mark
   6089      * <li>  it is a non-spacing mark
   6090      * <li> {@code isIdentifierIgnorable} returns
   6091      * {@code true} for the character
   6092      * </ul>
   6093      *
   6094      * <p><b>Note:</b> This method cannot handle <a
   6095      * href="#supplementary"> supplementary characters</a>. To support
   6096      * all Unicode characters, including supplementary characters, use
   6097      * the {@link #isJavaIdentifierPart(int)} method.
   6098      *
   6099      * @param   ch      the character to be tested.
   6100      * @return {@code true} if the character may be part of a
   6101      *          Java identifier; {@code false} otherwise.
   6102      * @see     Character#isIdentifierIgnorable(char)
   6103      * @see     Character#isJavaIdentifierStart(char)
   6104      * @see     Character#isLetterOrDigit(char)
   6105      * @see     Character#isUnicodeIdentifierPart(char)
   6106      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   6107      * @since   1.1
   6108      */
   6109     public static boolean isJavaIdentifierPart(char ch) {
   6110         return isJavaIdentifierPart((int)ch);
   6111     }
   6112 
   6113     /**
   6114      * Determines if the character (Unicode code point) may be part of a Java
   6115      * identifier as other than the first character.
   6116      * <p>
   6117      * A character may be part of a Java identifier if any of the following
   6118      * are true:
   6119      * <ul>
   6120      * <li>  it is a letter
   6121      * <li>  it is a currency symbol (such as {@code '$'})
   6122      * <li>  it is a connecting punctuation character (such as {@code '_'})
   6123      * <li>  it is a digit
   6124      * <li>  it is a numeric letter (such as a Roman numeral character)
   6125      * <li>  it is a combining mark
   6126      * <li>  it is a non-spacing mark
   6127      * <li> {@link #isIdentifierIgnorable(int)
   6128      * isIdentifierIgnorable(codePoint)} returns {@code true} for
   6129      * the character
   6130      * </ul>
   6131      *
   6132      * @param   codePoint the character (Unicode code point) to be tested.
   6133      * @return {@code true} if the character may be part of a
   6134      *          Java identifier; {@code false} otherwise.
   6135      * @see     Character#isIdentifierIgnorable(int)
   6136      * @see     Character#isJavaIdentifierStart(int)
   6137      * @see     Character#isLetterOrDigit(int)
   6138      * @see     Character#isUnicodeIdentifierPart(int)
   6139      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   6140      * @since   1.5
   6141      */
   6142     public static boolean isJavaIdentifierPart(int codePoint) {
   6143         // Use precomputed bitmasks to optimize the ASCII range.
   6144         if (codePoint < 64) {
   6145             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
   6146         } else if (codePoint < 128) {
   6147             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   6148         }
   6149         return ((1 << getType(codePoint))
   6150                 & ((1 << UPPERCASE_LETTER)
   6151                    | (1 << LOWERCASE_LETTER)
   6152                    | (1 << TITLECASE_LETTER)
   6153                    | (1 << MODIFIER_LETTER)
   6154                    | (1 << OTHER_LETTER)
   6155                    | (1 << CURRENCY_SYMBOL)
   6156                    | (1 << CONNECTOR_PUNCTUATION)
   6157                    | (1 << DECIMAL_DIGIT_NUMBER)
   6158                    | (1 << LETTER_NUMBER)
   6159                    | (1 << FORMAT)
   6160                    | (1 << COMBINING_SPACING_MARK)
   6161                    | (1 << NON_SPACING_MARK))) != 0
   6162                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
   6163                 || (codePoint >= 0x7f && codePoint <= 0x9f);
   6164     }
   6165 
   6166     /**
   6167      * Determines if the specified character is permissible as the
   6168      * first character in a Unicode identifier.
   6169      * <p>
   6170      * A character may start a Unicode identifier if and only if
   6171      * one of the following conditions is true:
   6172      * <ul>
   6173      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
   6174      * <li> {@link #getType(char) getType(ch)} returns
   6175      *      {@code LETTER_NUMBER}.
   6176      * </ul>
   6177      *
   6178      * <p><b>Note:</b> This method cannot handle <a
   6179      * href="#supplementary"> supplementary characters</a>. To support
   6180      * all Unicode characters, including supplementary characters, use
   6181      * the {@link #isUnicodeIdentifierStart(int)} method.
   6182      *
   6183      * @param   ch      the character to be tested.
   6184      * @return  {@code true} if the character may start a Unicode
   6185      *          identifier; {@code false} otherwise.
   6186      * @see     Character#isJavaIdentifierStart(char)
   6187      * @see     Character#isLetter(char)
   6188      * @see     Character#isUnicodeIdentifierPart(char)
   6189      * @since   1.1
   6190      */
   6191     public static boolean isUnicodeIdentifierStart(char ch) {
   6192         return isUnicodeIdentifierStart((int)ch);
   6193     }
   6194 
   6195     /**
   6196      * Determines if the specified character (Unicode code point) is permissible as the
   6197      * first character in a Unicode identifier.
   6198      * <p>
   6199      * A character may start a Unicode identifier if and only if
   6200      * one of the following conditions is true:
   6201      * <ul>
   6202      * <li> {@link #isLetter(int) isLetter(codePoint)}
   6203      *      returns {@code true}
   6204      * <li> {@link #getType(int) getType(codePoint)}
   6205      *      returns {@code LETTER_NUMBER}.
   6206      * </ul>
   6207      * @param   codePoint the character (Unicode code point) to be tested.
   6208      * @return  {@code true} if the character may start a Unicode
   6209      *          identifier; {@code false} otherwise.
   6210      * @see     Character#isJavaIdentifierStart(int)
   6211      * @see     Character#isLetter(int)
   6212      * @see     Character#isUnicodeIdentifierPart(int)
   6213      * @since   1.5
   6214      */
   6215     public static boolean isUnicodeIdentifierStart(int codePoint) {
   6216         return isUnicodeIdentifierStartImpl(codePoint);
   6217     }
   6218 
   6219     @FastNative
   6220     static native boolean isUnicodeIdentifierStartImpl(int codePoint);
   6221 
   6222     /**
   6223      * Determines if the specified character may be part of a Unicode
   6224      * identifier as other than the first character.
   6225      * <p>
   6226      * A character may be part of a Unicode identifier if and only if
   6227      * one of the following statements is true:
   6228      * <ul>
   6229      * <li>  it is a letter
   6230      * <li>  it is a connecting punctuation character (such as {@code '_'})
   6231      * <li>  it is a digit
   6232      * <li>  it is a numeric letter (such as a Roman numeral character)
   6233      * <li>  it is a combining mark
   6234      * <li>  it is a non-spacing mark
   6235      * <li> {@code isIdentifierIgnorable} returns
   6236      * {@code true} for this character.
   6237      * </ul>
   6238      *
   6239      * <p><b>Note:</b> This method cannot handle <a
   6240      * href="#supplementary"> supplementary characters</a>. To support
   6241      * all Unicode characters, including supplementary characters, use
   6242      * the {@link #isUnicodeIdentifierPart(int)} method.
   6243      *
   6244      * @param   ch      the character to be tested.
   6245      * @return  {@code true} if the character may be part of a
   6246      *          Unicode identifier; {@code false} otherwise.
   6247      * @see     Character#isIdentifierIgnorable(char)
   6248      * @see     Character#isJavaIdentifierPart(char)
   6249      * @see     Character#isLetterOrDigit(char)
   6250      * @see     Character#isUnicodeIdentifierStart(char)
   6251      * @since   1.1
   6252      */
   6253     public static boolean isUnicodeIdentifierPart(char ch) {
   6254         return isUnicodeIdentifierPart((int)ch);
   6255     }
   6256 
   6257     /**
   6258      * Determines if the specified character (Unicode code point) may be part of a Unicode
   6259      * identifier as other than the first character.
   6260      * <p>
   6261      * A character may be part of a Unicode identifier if and only if
   6262      * one of the following statements is true:
   6263      * <ul>
   6264      * <li>  it is a letter
   6265      * <li>  it is a connecting punctuation character (such as {@code '_'})
   6266      * <li>  it is a digit
   6267      * <li>  it is a numeric letter (such as a Roman numeral character)
   6268      * <li>  it is a combining mark
   6269      * <li>  it is a non-spacing mark
   6270      * <li> {@code isIdentifierIgnorable} returns
   6271      * {@code true} for this character.
   6272      * </ul>
   6273      * @param   codePoint the character (Unicode code point) to be tested.
   6274      * @return  {@code true} if the character may be part of a
   6275      *          Unicode identifier; {@code false} otherwise.
   6276      * @see     Character#isIdentifierIgnorable(int)
   6277      * @see     Character#isJavaIdentifierPart(int)
   6278      * @see     Character#isLetterOrDigit(int)
   6279      * @see     Character#isUnicodeIdentifierStart(int)
   6280      * @since   1.5
   6281      */
   6282     public static boolean isUnicodeIdentifierPart(int codePoint) {
   6283         return isUnicodeIdentifierPartImpl(codePoint);
   6284     }
   6285 
   6286     @FastNative
   6287     static native boolean isUnicodeIdentifierPartImpl(int codePoint);
   6288 
   6289     /**
   6290      * Determines if the specified character should be regarded as
   6291      * an ignorable character in a Java identifier or a Unicode identifier.
   6292      * <p>
   6293      * The following Unicode characters are ignorable in a Java identifier
   6294      * or a Unicode identifier:
   6295      * <ul>
   6296      * <li>ISO control characters that are not whitespace
   6297      * <ul>
   6298      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
   6299      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
   6300      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
   6301      * </ul>
   6302      *
   6303      * <li>all characters that have the {@code FORMAT} general
   6304      * category value
   6305      * </ul>
   6306      *
   6307      * <p><b>Note:</b> This method cannot handle <a
   6308      * href="#supplementary"> supplementary characters</a>. To support
   6309      * all Unicode characters, including supplementary characters, use
   6310      * the {@link #isIdentifierIgnorable(int)} method.
   6311      *
   6312      * @param   ch      the character to be tested.
   6313      * @return  {@code true} if the character is an ignorable control
   6314      *          character that may be part of a Java or Unicode identifier;
   6315      *           {@code false} otherwise.
   6316      * @see     Character#isJavaIdentifierPart(char)
   6317      * @see     Character#isUnicodeIdentifierPart(char)
   6318      * @since   1.1
   6319      */
   6320     public static boolean isIdentifierIgnorable(char ch) {
   6321         return isIdentifierIgnorable((int)ch);
   6322     }
   6323 
   6324     /**
   6325      * Determines if the specified character (Unicode code point) should be regarded as
   6326      * an ignorable character in a Java identifier or a Unicode identifier.
   6327      * <p>
   6328      * The following Unicode characters are ignorable in a Java identifier
   6329      * or a Unicode identifier:
   6330      * <ul>
   6331      * <li>ISO control characters that are not whitespace
   6332      * <ul>
   6333      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
   6334      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
   6335      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
   6336      * </ul>
   6337      *
   6338      * <li>all characters that have the {@code FORMAT} general
   6339      * category value
   6340      * </ul>
   6341      *
   6342      * @param   codePoint the character (Unicode code point) to be tested.
   6343      * @return  {@code true} if the character is an ignorable control
   6344      *          character that may be part of a Java or Unicode identifier;
   6345      *          {@code false} otherwise.
   6346      * @see     Character#isJavaIdentifierPart(int)
   6347      * @see     Character#isUnicodeIdentifierPart(int)
   6348      * @since   1.5
   6349      */
   6350     public static boolean isIdentifierIgnorable(int codePoint) {
   6351         return isIdentifierIgnorableImpl(codePoint);
   6352     }
   6353 
   6354     @FastNative
   6355     static native boolean isIdentifierIgnorableImpl(int codePoint);
   6356 
   6357     /**
   6358      * Converts the character argument to lowercase using case
   6359      * mapping information from the UnicodeData file.
   6360      * <p>
   6361      * Note that
   6362      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
   6363      * does not always return {@code true} for some ranges of
   6364      * characters, particularly those that are symbols or ideographs.
   6365      *
   6366      * <p>In general, {@link String#toLowerCase()} should be used to map
   6367      * characters to lowercase. {@code String} case mapping methods
   6368      * have several benefits over {@code Character} case mapping methods.
   6369      * {@code String} case mapping methods can perform locale-sensitive
   6370      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6371      * the {@code Character} case mapping methods cannot.
   6372      *
   6373      * <p><b>Note:</b> This method cannot handle <a
   6374      * href="#supplementary"> supplementary characters</a>. To support
   6375      * all Unicode characters, including supplementary characters, use
   6376      * the {@link #toLowerCase(int)} method.
   6377      *
   6378      * @param   ch   the character to be converted.
   6379      * @return  the lowercase equivalent of the character, if any;
   6380      *          otherwise, the character itself.
   6381      * @see     Character#isLowerCase(char)
   6382      * @see     String#toLowerCase()
   6383      */
   6384     public static char toLowerCase(char ch) {
   6385         return (char)toLowerCase((int)ch);
   6386     }
   6387 
   6388     /**
   6389      * Converts the character (Unicode code point) argument to
   6390      * lowercase using case mapping information from the UnicodeData
   6391      * file.
   6392      *
   6393      * <p> Note that
   6394      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
   6395      * does not always return {@code true} for some ranges of
   6396      * characters, particularly those that are symbols or ideographs.
   6397      *
   6398      * <p>In general, {@link String#toLowerCase()} should be used to map
   6399      * characters to lowercase. {@code String} case mapping methods
   6400      * have several benefits over {@code Character} case mapping methods.
   6401      * {@code String} case mapping methods can perform locale-sensitive
   6402      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6403      * the {@code Character} case mapping methods cannot.
   6404      *
   6405      * @param   codePoint   the character (Unicode code point) to be converted.
   6406      * @return  the lowercase equivalent of the character (Unicode code
   6407      *          point), if any; otherwise, the character itself.
   6408      * @see     Character#isLowerCase(int)
   6409      * @see     String#toLowerCase()
   6410      *
   6411      * @since   1.5
   6412      */
   6413     public static int toLowerCase(int codePoint) {
   6414         if (codePoint >= 'A' && codePoint <= 'Z') {
   6415             return codePoint + ('a' - 'A');
   6416         }
   6417 
   6418         // All ASCII codepoints except the ones above remain unchanged.
   6419         if (codePoint < 0x80) {
   6420             return codePoint;
   6421         }
   6422 
   6423         return toLowerCaseImpl(codePoint);
   6424     }
   6425 
   6426     @FastNative
   6427     static native int toLowerCaseImpl(int codePoint);
   6428 
   6429     /**
   6430      * Converts the character argument to uppercase using case mapping
   6431      * information from the UnicodeData file.
   6432      * <p>
   6433      * Note that
   6434      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
   6435      * does not always return {@code true} for some ranges of
   6436      * characters, particularly those that are symbols or ideographs.
   6437      *
   6438      * <p>In general, {@link String#toUpperCase()} should be used to map
   6439      * characters to uppercase. {@code String} case mapping methods
   6440      * have several benefits over {@code Character} case mapping methods.
   6441      * {@code String} case mapping methods can perform locale-sensitive
   6442      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6443      * the {@code Character} case mapping methods cannot.
   6444      *
   6445      * <p><b>Note:</b> This method cannot handle <a
   6446      * href="#supplementary"> supplementary characters</a>. To support
   6447      * all Unicode characters, including supplementary characters, use
   6448      * the {@link #toUpperCase(int)} method.
   6449      *
   6450      * @param   ch   the character to be converted.
   6451      * @return  the uppercase equivalent of the character, if any;
   6452      *          otherwise, the character itself.
   6453      * @see     Character#isUpperCase(char)
   6454      * @see     String#toUpperCase()
   6455      */
   6456     public static char toUpperCase(char ch) {
   6457         return (char)toUpperCase((int)ch);
   6458     }
   6459 
   6460     /**
   6461      * Converts the character (Unicode code point) argument to
   6462      * uppercase using case mapping information from the UnicodeData
   6463      * file.
   6464      *
   6465      * <p>Note that
   6466      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
   6467      * does not always return {@code true} for some ranges of
   6468      * characters, particularly those that are symbols or ideographs.
   6469      *
   6470      * <p>In general, {@link String#toUpperCase()} should be used to map
   6471      * characters to uppercase. {@code String} case mapping methods
   6472      * have several benefits over {@code Character} case mapping methods.
   6473      * {@code String} case mapping methods can perform locale-sensitive
   6474      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6475      * the {@code Character} case mapping methods cannot.
   6476      *
   6477      * @param   codePoint   the character (Unicode code point) to be converted.
   6478      * @return  the uppercase equivalent of the character, if any;
   6479      *          otherwise, the character itself.
   6480      * @see     Character#isUpperCase(int)
   6481      * @see     String#toUpperCase()
   6482      *
   6483      * @since   1.5
   6484      */
   6485     public static int toUpperCase(int codePoint) {
   6486         if (codePoint >= 'a' && codePoint <= 'z') {
   6487             return codePoint - ('a' - 'A');
   6488         }
   6489 
   6490         // All ASCII codepoints except the ones above remain unchanged.
   6491         if (codePoint < 0x80) {
   6492             return codePoint;
   6493         }
   6494 
   6495         return toUpperCaseImpl(codePoint);
   6496     }
   6497 
   6498     @FastNative
   6499     static native int toUpperCaseImpl(int codePoint);
   6500 
   6501     /**
   6502      * Converts the character argument to titlecase using case mapping
   6503      * information from the UnicodeData file. If a character has no
   6504      * explicit titlecase mapping and is not itself a titlecase char
   6505      * according to UnicodeData, then the uppercase mapping is
   6506      * returned as an equivalent titlecase mapping. If the
   6507      * {@code char} argument is already a titlecase
   6508      * {@code char}, the same {@code char} value will be
   6509      * returned.
   6510      * <p>
   6511      * Note that
   6512      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
   6513      * does not always return {@code true} for some ranges of
   6514      * characters.
   6515      *
   6516      * <p><b>Note:</b> This method cannot handle <a
   6517      * href="#supplementary"> supplementary characters</a>. To support
   6518      * all Unicode characters, including supplementary characters, use
   6519      * the {@link #toTitleCase(int)} method.
   6520      *
   6521      * @param   ch   the character to be converted.
   6522      * @return  the titlecase equivalent of the character, if any;
   6523      *          otherwise, the character itself.
   6524      * @see     Character#isTitleCase(char)
   6525      * @see     Character#toLowerCase(char)
   6526      * @see     Character#toUpperCase(char)
   6527      * @since   1.0.2
   6528      */
   6529     public static char toTitleCase(char ch) {
   6530         return (char)toTitleCase((int)ch);
   6531     }
   6532 
   6533     /**
   6534      * Converts the character (Unicode code point) argument to titlecase using case mapping
   6535      * information from the UnicodeData file. If a character has no
   6536      * explicit titlecase mapping and is not itself a titlecase char
   6537      * according to UnicodeData, then the uppercase mapping is
   6538      * returned as an equivalent titlecase mapping. If the
   6539      * character argument is already a titlecase
   6540      * character, the same character value will be
   6541      * returned.
   6542      *
   6543      * <p>Note that
   6544      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
   6545      * does not always return {@code true} for some ranges of
   6546      * characters.
   6547      *
   6548      * @param   codePoint   the character (Unicode code point) to be converted.
   6549      * @return  the titlecase equivalent of the character, if any;
   6550      *          otherwise, the character itself.
   6551      * @see     Character#isTitleCase(int)
   6552      * @see     Character#toLowerCase(int)
   6553      * @see     Character#toUpperCase(int)
   6554      * @since   1.5
   6555      */
   6556     public static int toTitleCase(int codePoint) {
   6557         return toTitleCaseImpl(codePoint);
   6558     }
   6559 
   6560     @FastNative
   6561     static native int toTitleCaseImpl(int codePoint);
   6562 
   6563     /**
   6564      * Returns the numeric value of the character {@code ch} in the
   6565      * specified radix.
   6566      * <p>
   6567      * If the radix is not in the range {@code MIN_RADIX} &le;
   6568      * {@code radix} &le; {@code MAX_RADIX} or if the
   6569      * value of {@code ch} is not a valid digit in the specified
   6570      * radix, {@code -1} is returned. A character is a valid digit
   6571      * if at least one of the following is true:
   6572      * <ul>
   6573      * <li>The method {@code isDigit} is {@code true} of the character
   6574      *     and the Unicode decimal digit value of the character (or its
   6575      *     single-character decomposition) is less than the specified radix.
   6576      *     In this case the decimal digit value is returned.
   6577      * <li>The character is one of the uppercase Latin letters
   6578      *     {@code 'A'} through {@code 'Z'} and its code is less than
   6579      *     {@code radix + 'A' - 10}.
   6580      *     In this case, {@code ch - 'A' + 10}
   6581      *     is returned.
   6582      * <li>The character is one of the lowercase Latin letters
   6583      *     {@code 'a'} through {@code 'z'} and its code is less than
   6584      *     {@code radix + 'a' - 10}.
   6585      *     In this case, {@code ch - 'a' + 10}
   6586      *     is returned.
   6587      * <li>The character is one of the fullwidth uppercase Latin letters A
   6588      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
   6589      *     and its code is less than
   6590      *     {@code radix + '\u005CuFF21' - 10}.
   6591      *     In this case, {@code ch - '\u005CuFF21' + 10}
   6592      *     is returned.
   6593      * <li>The character is one of the fullwidth lowercase Latin letters a
   6594      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
   6595      *     and its code is less than
   6596      *     {@code radix + '\u005CuFF41' - 10}.
   6597      *     In this case, {@code ch - '\u005CuFF41' + 10}
   6598      *     is returned.
   6599      * </ul>
   6600      *
   6601      * <p><b>Note:</b> This method cannot handle <a
   6602      * href="#supplementary"> supplementary characters</a>. To support
   6603      * all Unicode characters, including supplementary characters, use
   6604      * the {@link #digit(int, int)} method.
   6605      *
   6606      * @param   ch      the character to be converted.
   6607      * @param   radix   the radix.
   6608      * @return  the numeric value represented by the character in the
   6609      *          specified radix.
   6610      * @see     Character#forDigit(int, int)
   6611      * @see     Character#isDigit(char)
   6612      */
   6613     public static int digit(char ch, int radix) {
   6614         return digit((int)ch, radix);
   6615     }
   6616 
   6617     /**
   6618      * Returns the numeric value of the specified character (Unicode
   6619      * code point) in the specified radix.
   6620      *
   6621      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
   6622      * {@code radix} &le; {@code MAX_RADIX} or if the
   6623      * character is not a valid digit in the specified
   6624      * radix, {@code -1} is returned. A character is a valid digit
   6625      * if at least one of the following is true:
   6626      * <ul>
   6627      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
   6628      *     and the Unicode decimal digit value of the character (or its
   6629      *     single-character decomposition) is less than the specified radix.
   6630      *     In this case the decimal digit value is returned.
   6631      * <li>The character is one of the uppercase Latin letters
   6632      *     {@code 'A'} through {@code 'Z'} and its code is less than
   6633      *     {@code radix + 'A' - 10}.
   6634      *     In this case, {@code codePoint - 'A' + 10}
   6635      *     is returned.
   6636      * <li>The character is one of the lowercase Latin letters
   6637      *     {@code 'a'} through {@code 'z'} and its code is less than
   6638      *     {@code radix + 'a' - 10}.
   6639      *     In this case, {@code codePoint - 'a' + 10}
   6640      *     is returned.
   6641      * <li>The character is one of the fullwidth uppercase Latin letters A
   6642      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
   6643      *     and its code is less than
   6644      *     {@code radix + '\u005CuFF21' - 10}.
   6645      *     In this case,
   6646      *     {@code codePoint - '\u005CuFF21' + 10}
   6647      *     is returned.
   6648      * <li>The character is one of the fullwidth lowercase Latin letters a
   6649      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
   6650      *     and its code is less than
   6651      *     {@code radix + '\u005CuFF41'- 10}.
   6652      *     In this case,
   6653      *     {@code codePoint - '\u005CuFF41' + 10}
   6654      *     is returned.
   6655      * </ul>
   6656      *
   6657      * @param   codePoint the character (Unicode code point) to be converted.
   6658      * @param   radix   the radix.
   6659      * @return  the numeric value represented by the character in the
   6660      *          specified radix.
   6661      * @see     Character#forDigit(int, int)
   6662      * @see     Character#isDigit(int)
   6663      * @since   1.5
   6664      */
   6665     public static int digit(int codePoint, int radix) {
   6666         if (radix < MIN_RADIX || radix > MAX_RADIX) {
   6667             return -1;
   6668         }
   6669         if (codePoint < 128) {
   6670             // Optimized for ASCII
   6671             int result = -1;
   6672             if ('0' <= codePoint && codePoint <= '9') {
   6673                 result = codePoint - '0';
   6674             } else if ('a' <= codePoint && codePoint <= 'z') {
   6675                 result = 10 + (codePoint - 'a');
   6676             } else if ('A' <= codePoint && codePoint <= 'Z') {
   6677                 result = 10 + (codePoint - 'A');
   6678             }
   6679             return result < radix ? result : -1;
   6680         }
   6681         return digitImpl(codePoint, radix);
   6682     }
   6683 
   6684     @FastNative
   6685     native static int digitImpl(int codePoint, int radix);
   6686 
   6687     /**
   6688      * Returns the {@code int} value that the specified Unicode
   6689      * character represents. For example, the character
   6690      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
   6691      * an int with a value of 50.
   6692      * <p>
   6693      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
   6694      * {@code '\u005Cu005A'}), lowercase
   6695      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
   6696      * full width variant ({@code '\u005CuFF21'} through
   6697      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
   6698      * {@code '\u005CuFF5A'}) forms have numeric values from 10
   6699      * through 35. This is independent of the Unicode specification,
   6700      * which does not assign numeric values to these {@code char}
   6701      * values.
   6702      * <p>
   6703      * If the character does not have a numeric value, then -1 is returned.
   6704      * If the character has a numeric value that cannot be represented as a
   6705      * nonnegative integer (for example, a fractional value), then -2
   6706      * is returned.
   6707      *
   6708      * <p><b>Note:</b> This method cannot handle <a
   6709      * href="#supplementary"> supplementary characters</a>. To support
   6710      * all Unicode characters, including supplementary characters, use
   6711      * the {@link #getNumericValue(int)} method.
   6712      *
   6713      * @param   ch      the character to be converted.
   6714      * @return  the numeric value of the character, as a nonnegative {@code int}
   6715      *           value; -2 if the character has a numeric value that is not a
   6716      *          nonnegative integer; -1 if the character has no numeric value.
   6717      * @see     Character#forDigit(int, int)
   6718      * @see     Character#isDigit(char)
   6719      * @since   1.1
   6720      */
   6721     public static int getNumericValue(char ch) {
   6722         return getNumericValue((int)ch);
   6723     }
   6724 
   6725     /**
   6726      * Returns the {@code int} value that the specified
   6727      * character (Unicode code point) represents. For example, the character
   6728      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
   6729      * an {@code int} with a value of 50.
   6730      * <p>
   6731      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
   6732      * {@code '\u005Cu005A'}), lowercase
   6733      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
   6734      * full width variant ({@code '\u005CuFF21'} through
   6735      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
   6736      * {@code '\u005CuFF5A'}) forms have numeric values from 10
   6737      * through 35. This is independent of the Unicode specification,
   6738      * which does not assign numeric values to these {@code char}
   6739      * values.
   6740      * <p>
   6741      * If the character does not have a numeric value, then -1 is returned.
   6742      * If the character has a numeric value that cannot be represented as a
   6743      * nonnegative integer (for example, a fractional value), then -2
   6744      * is returned.
   6745      *
   6746      * @param   codePoint the character (Unicode code point) to be converted.
   6747      * @return  the numeric value of the character, as a nonnegative {@code int}
   6748      *          value; -2 if the character has a numeric value that is not a
   6749      *          nonnegative integer; -1 if the character has no numeric value.
   6750      * @see     Character#forDigit(int, int)
   6751      * @see     Character#isDigit(int)
   6752      * @since   1.5
   6753      */
   6754     public static int getNumericValue(int codePoint) {
   6755         // This is both an optimization and papers over differences between Java and ICU.
   6756         if (codePoint < 128) {
   6757             if (codePoint >= '0' && codePoint <= '9') {
   6758                 return codePoint - '0';
   6759             }
   6760             if (codePoint >= 'a' && codePoint <= 'z') {
   6761                 return codePoint - ('a' - 10);
   6762             }
   6763             if (codePoint >= 'A' && codePoint <= 'Z') {
   6764                 return codePoint - ('A' - 10);
   6765             }
   6766             return -1;
   6767         }
   6768         // Full-width uppercase A-Z.
   6769         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
   6770             return codePoint - 0xff17;
   6771         }
   6772         // Full-width lowercase a-z.
   6773         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
   6774             return codePoint - 0xff37;
   6775         }
   6776         return getNumericValueImpl(codePoint);
   6777     }
   6778 
   6779     @FastNative
   6780     native static int getNumericValueImpl(int codePoint);
   6781 
   6782     /**
   6783      * Determines if the specified character is ISO-LATIN-1 white space.
   6784      * This method returns {@code true} for the following five
   6785      * characters only:
   6786      * <table summary="truechars">
   6787      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
   6788      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
   6789      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
   6790      *     <td>{@code NEW LINE}</td></tr>
   6791      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
   6792      *     <td>{@code FORM FEED}</td></tr>
   6793      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
   6794      *     <td>{@code CARRIAGE RETURN}</td></tr>
   6795      * <tr><td>{@code ' '}</td>             <td>{@code U+0020}</td>
   6796      *     <td>{@code SPACE}</td></tr>
   6797      * </table>
   6798      *
   6799      * @param      ch   the character to be tested.
   6800      * @return     {@code true} if the character is ISO-LATIN-1 white
   6801      *             space; {@code false} otherwise.
   6802      * @see        Character#isSpaceChar(char)
   6803      * @see        Character#isWhitespace(char)
   6804      * @deprecated Replaced by isWhitespace(char).
   6805      */
   6806     @Deprecated
   6807     public static boolean isSpace(char ch) {
   6808         return (ch <= 0x0020) &&
   6809             (((((1L << 0x0009) |
   6810             (1L << 0x000A) |
   6811             (1L << 0x000C) |
   6812             (1L << 0x000D) |
   6813             (1L << 0x0020)) >> ch) & 1L) != 0);
   6814     }
   6815 
   6816 
   6817     /**
   6818      * Determines if the specified character is a Unicode space character.
   6819      * A character is considered to be a space character if and only if
   6820      * it is specified to be a space character by the Unicode Standard. This
   6821      * method returns true if the character's general category type is any of
   6822      * the following:
   6823      * <ul>
   6824      * <li> {@code SPACE_SEPARATOR}
   6825      * <li> {@code LINE_SEPARATOR}
   6826      * <li> {@code PARAGRAPH_SEPARATOR}
   6827      * </ul>
   6828      *
   6829      * <p><b>Note:</b> This method cannot handle <a
   6830      * href="#supplementary"> supplementary characters</a>. To support
   6831      * all Unicode characters, including supplementary characters, use
   6832      * the {@link #isSpaceChar(int)} method.
   6833      *
   6834      * @param   ch      the character to be tested.
   6835      * @return  {@code true} if the character is a space character;
   6836      *          {@code false} otherwise.
   6837      * @see     Character#isWhitespace(char)
   6838      * @since   1.1
   6839      */
   6840     public static boolean isSpaceChar(char ch) {
   6841         return isSpaceChar((int)ch);
   6842     }
   6843 
   6844     /**
   6845      * Determines if the specified character (Unicode code point) is a
   6846      * Unicode space character.  A character is considered to be a
   6847      * space character if and only if it is specified to be a space
   6848      * character by the Unicode Standard. This method returns true if
   6849      * the character's general category type is any of the following:
   6850      *
   6851      * <ul>
   6852      * <li> {@link #SPACE_SEPARATOR}
   6853      * <li> {@link #LINE_SEPARATOR}
   6854      * <li> {@link #PARAGRAPH_SEPARATOR}
   6855      * </ul>
   6856      *
   6857      * @param   codePoint the character (Unicode code point) to be tested.
   6858      * @return  {@code true} if the character is a space character;
   6859      *          {@code false} otherwise.
   6860      * @see     Character#isWhitespace(int)
   6861      * @since   1.5
   6862      */
   6863     public static boolean isSpaceChar(int codePoint) {
   6864         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
   6865         // SPACE or NO-BREAK SPACE?
   6866         if (codePoint == 0x20 || codePoint == 0xa0) {
   6867             return true;
   6868         }
   6869         if (codePoint < 0x1000) {
   6870             return false;
   6871         }
   6872         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
   6873         if (codePoint == 0x1680 || codePoint == 0x180e) {
   6874             return true;
   6875         }
   6876         if (codePoint < 0x2000) {
   6877             return false;
   6878         }
   6879         if (codePoint <= 0xffff) {
   6880             // Other whitespace from General Punctuation...
   6881             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
   6882                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
   6883         }
   6884         // Let icu4c worry about non-BMP code points.
   6885         return isSpaceCharImpl(codePoint);
   6886     }
   6887 
   6888     @FastNative
   6889     static native boolean isSpaceCharImpl(int codePoint);
   6890 
   6891     /**
   6892      * Determines if the specified character is white space according to Java.
   6893      * A character is a Java whitespace character if and only if it satisfies
   6894      * one of the following criteria:
   6895      * <ul>
   6896      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
   6897      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
   6898      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
   6899      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
   6900      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
   6901      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
   6902      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
   6903      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
   6904      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
   6905      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
   6906      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
   6907      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
   6908      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
   6909      * </ul>
   6910      *
   6911      * <p><b>Note:</b> This method cannot handle <a
   6912      * href="#supplementary"> supplementary characters</a>. To support
   6913      * all Unicode characters, including supplementary characters, use
   6914      * the {@link #isWhitespace(int)} method.
   6915      *
   6916      * @param   ch the character to be tested.
   6917      * @return  {@code true} if the character is a Java whitespace
   6918      *          character; {@code false} otherwise.
   6919      * @see     Character#isSpaceChar(char)
   6920      * @since   1.1
   6921      */
   6922     public static boolean isWhitespace(char ch) {
   6923         return isWhitespace((int)ch);
   6924     }
   6925 
   6926     /**
   6927      * Determines if the specified character (Unicode code point) is
   6928      * white space according to Java.  A character is a Java
   6929      * whitespace character if and only if it satisfies one of the
   6930      * following criteria:
   6931      * <ul>
   6932      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
   6933      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
   6934      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
   6935      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
   6936      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
   6937      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
   6938      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
   6939      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
   6940      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
   6941      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
   6942      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
   6943      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
   6944      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
   6945      * </ul>
   6946      * <p>
   6947      *
   6948      * @param   codePoint the character (Unicode code point) to be tested.
   6949      * @return  {@code true} if the character is a Java whitespace
   6950      *          character; {@code false} otherwise.
   6951      * @see     Character#isSpaceChar(int)
   6952      * @since   1.5
   6953      */
   6954     public static boolean isWhitespace(int codePoint) {
   6955         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
   6956         // Any ASCII whitespace character?
   6957         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
   6958             return true;
   6959         }
   6960         if (codePoint < 0x1000) {
   6961             return false;
   6962         }
   6963         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
   6964         if (codePoint == 0x1680 || codePoint == 0x180e) {
   6965             return true;
   6966         }
   6967         if (codePoint < 0x2000) {
   6968             return false;
   6969         }
   6970         // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
   6971         if (codePoint == 0x2007 || codePoint == 0x202f) {
   6972             return false;
   6973         }
   6974         if (codePoint <= 0xffff) {
   6975             // Other whitespace from General Punctuation...
   6976             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
   6977                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
   6978         }
   6979         // Let icu4c worry about non-BMP code points.
   6980         return isWhitespaceImpl(codePoint);
   6981     }
   6982 
   6983     @FastNative
   6984     native static boolean isWhitespaceImpl(int codePoint);
   6985 
   6986     /**
   6987      * Determines if the specified character is an ISO control
   6988      * character.  A character is considered to be an ISO control
   6989      * character if its code is in the range {@code '\u005Cu0000'}
   6990      * through {@code '\u005Cu001F'} or in the range
   6991      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
   6992      *
   6993      * <p><b>Note:</b> This method cannot handle <a
   6994      * href="#supplementary"> supplementary characters</a>. To support
   6995      * all Unicode characters, including supplementary characters, use
   6996      * the {@link #isISOControl(int)} method.
   6997      *
   6998      * @param   ch      the character to be tested.
   6999      * @return  {@code true} if the character is an ISO control character;
   7000      *          {@code false} otherwise.
   7001      *
   7002      * @see     Character#isSpaceChar(char)
   7003      * @see     Character#isWhitespace(char)
   7004      * @since   1.1
   7005      */
   7006     public static boolean isISOControl(char ch) {
   7007         return isISOControl((int)ch);
   7008     }
   7009 
   7010     /**
   7011      * Determines if the referenced character (Unicode code point) is an ISO control
   7012      * character.  A character is considered to be an ISO control
   7013      * character if its code is in the range {@code '\u005Cu0000'}
   7014      * through {@code '\u005Cu001F'} or in the range
   7015      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
   7016      *
   7017      * @param   codePoint the character (Unicode code point) to be tested.
   7018      * @return  {@code true} if the character is an ISO control character;
   7019      *          {@code false} otherwise.
   7020      * @see     Character#isSpaceChar(int)
   7021      * @see     Character#isWhitespace(int)
   7022      * @since   1.5
   7023      */
   7024     public static boolean isISOControl(int codePoint) {
   7025         // Optimized form of:
   7026         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
   7027         //     (codePoint >= 0x7F && codePoint <= 0x9F);
   7028         return codePoint <= 0x9F &&
   7029             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
   7030     }
   7031 
   7032     /**
   7033      * Returns a value indicating a character's general category.
   7034      *
   7035      * <p><b>Note:</b> This method cannot handle <a
   7036      * href="#supplementary"> supplementary characters</a>. To support
   7037      * all Unicode characters, including supplementary characters, use
   7038      * the {@link #getType(int)} method.
   7039      *
   7040      * @param   ch      the character to be tested.
   7041      * @return  a value of type {@code int} representing the
   7042      *          character's general category.
   7043      * @see     Character#COMBINING_SPACING_MARK
   7044      * @see     Character#CONNECTOR_PUNCTUATION
   7045      * @see     Character#CONTROL
   7046      * @see     Character#CURRENCY_SYMBOL
   7047      * @see     Character#DASH_PUNCTUATION
   7048      * @see     Character#DECIMAL_DIGIT_NUMBER
   7049      * @see     Character#ENCLOSING_MARK
   7050      * @see     Character#END_PUNCTUATION
   7051      * @see     Character#FINAL_QUOTE_PUNCTUATION
   7052      * @see     Character#FORMAT
   7053      * @see     Character#INITIAL_QUOTE_PUNCTUATION
   7054      * @see     Character#LETTER_NUMBER
   7055      * @see     Character#LINE_SEPARATOR
   7056      * @see     Character#LOWERCASE_LETTER
   7057      * @see     Character#MATH_SYMBOL
   7058      * @see     Character#MODIFIER_LETTER
   7059      * @see     Character#MODIFIER_SYMBOL
   7060      * @see     Character#NON_SPACING_MARK
   7061      * @see     Character#OTHER_LETTER
   7062      * @see     Character#OTHER_NUMBER
   7063      * @see     Character#OTHER_PUNCTUATION
   7064      * @see     Character#OTHER_SYMBOL
   7065      * @see     Character#PARAGRAPH_SEPARATOR
   7066      * @see     Character#PRIVATE_USE
   7067      * @see     Character#SPACE_SEPARATOR
   7068      * @see     Character#START_PUNCTUATION
   7069      * @see     Character#SURROGATE
   7070      * @see     Character#TITLECASE_LETTER
   7071      * @see     Character#UNASSIGNED
   7072      * @see     Character#UPPERCASE_LETTER
   7073      * @since   1.1
   7074      */
   7075     public static int getType(char ch) {
   7076         return getType((int)ch);
   7077     }
   7078 
   7079     /**
   7080      * Returns a value indicating a character's general category.
   7081      *
   7082      * @param   codePoint the character (Unicode code point) to be tested.
   7083      * @return  a value of type {@code int} representing the
   7084      *          character's general category.
   7085      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
   7086      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
   7087      * @see     Character#CONTROL CONTROL
   7088      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
   7089      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
   7090      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
   7091      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
   7092      * @see     Character#END_PUNCTUATION END_PUNCTUATION
   7093      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
   7094      * @see     Character#FORMAT FORMAT
   7095      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
   7096      * @see     Character#LETTER_NUMBER LETTER_NUMBER
   7097      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
   7098      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
   7099      * @see     Character#MATH_SYMBOL MATH_SYMBOL
   7100      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
   7101      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
   7102      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
   7103      * @see     Character#OTHER_LETTER OTHER_LETTER
   7104      * @see     Character#OTHER_NUMBER OTHER_NUMBER
   7105      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
   7106      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
   7107      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
   7108      * @see     Character#PRIVATE_USE PRIVATE_USE
   7109      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
   7110      * @see     Character#START_PUNCTUATION START_PUNCTUATION
   7111      * @see     Character#SURROGATE SURROGATE
   7112      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
   7113      * @see     Character#UNASSIGNED UNASSIGNED
   7114      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
   7115      * @since   1.5
   7116      */
   7117     public static int getType(int codePoint) {
   7118         int type = getTypeImpl(codePoint);
   7119         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
   7120         if (type <= Character.FORMAT) {
   7121             return type;
   7122         }
   7123         return (type + 1);
   7124     }
   7125 
   7126     @FastNative
   7127     static native int getTypeImpl(int codePoint);
   7128 
   7129     /**
   7130      * Determines the character representation for a specific digit in
   7131      * the specified radix. If the value of {@code radix} is not a
   7132      * valid radix, or the value of {@code digit} is not a valid
   7133      * digit in the specified radix, the null character
   7134      * ({@code '\u005Cu0000'}) is returned.
   7135      * <p>
   7136      * The {@code radix} argument is valid if it is greater than or
   7137      * equal to {@code MIN_RADIX} and less than or equal to
   7138      * {@code MAX_RADIX}. The {@code digit} argument is valid if
   7139      * {@code 0 <= digit < radix}.
   7140      * <p>
   7141      * If the digit is less than 10, then
   7142      * {@code '0' + digit} is returned. Otherwise, the value
   7143      * {@code 'a' + digit - 10} is returned.
   7144      *
   7145      * @param   digit   the number to convert to a character.
   7146      * @param   radix   the radix.
   7147      * @return  the {@code char} representation of the specified digit
   7148      *          in the specified radix.
   7149      * @see     Character#MIN_RADIX
   7150      * @see     Character#MAX_RADIX
   7151      * @see     Character#digit(char, int)
   7152      */
   7153     public static char forDigit(int digit, int radix) {
   7154         if ((digit >= radix) || (digit < 0)) {
   7155             return '\0';
   7156         }
   7157         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
   7158             return '\0';
   7159         }
   7160         if (digit < 10) {
   7161             return (char)('0' + digit);
   7162         }
   7163         return (char)('a' - 10 + digit);
   7164     }
   7165 
   7166     /**
   7167      * Returns the Unicode directionality property for the given
   7168      * character.  Character directionality is used to calculate the
   7169      * visual ordering of text. The directionality value of undefined
   7170      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
   7171      *
   7172      * <p><b>Note:</b> This method cannot handle <a
   7173      * href="#supplementary"> supplementary characters</a>. To support
   7174      * all Unicode characters, including supplementary characters, use
   7175      * the {@link #getDirectionality(int)} method.
   7176      *
   7177      * @param  ch {@code char} for which the directionality property
   7178      *            is requested.
   7179      * @return the directionality property of the {@code char} value.
   7180      *
   7181      * @see Character#DIRECTIONALITY_UNDEFINED
   7182      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
   7183      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
   7184      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
   7185      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
   7186      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
   7187      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
   7188      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
   7189      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
   7190      * @see Character#DIRECTIONALITY_NONSPACING_MARK
   7191      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
   7192      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
   7193      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
   7194      * @see Character#DIRECTIONALITY_WHITESPACE
   7195      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
   7196      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
   7197      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
   7198      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
   7199      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
   7200      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
   7201      * @since 1.4
   7202      */
   7203     public static byte getDirectionality(char ch) {
   7204         return getDirectionality((int)ch);
   7205     }
   7206 
   7207     /**
   7208      * Returns the Unicode directionality property for the given
   7209      * character (Unicode code point).  Character directionality is
   7210      * used to calculate the visual ordering of text. The
   7211      * directionality value of undefined character is {@link
   7212      * #DIRECTIONALITY_UNDEFINED}.
   7213      *
   7214      * @param   codePoint the character (Unicode code point) for which
   7215      *          the directionality property is requested.
   7216      * @return the directionality property of the character.
   7217      *
   7218      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
   7219      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
   7220      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
   7221      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
   7222      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
   7223      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
   7224      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
   7225      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
   7226      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
   7227      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
   7228      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
   7229      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
   7230      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
   7231      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
   7232      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
   7233      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
   7234      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
   7235      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
   7236      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
   7237      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
   7238      * @since    1.5
   7239      */
   7240     public static byte getDirectionality(int codePoint) {
   7241         if (getType(codePoint) == Character.UNASSIGNED) {
   7242             return Character.DIRECTIONALITY_UNDEFINED;
   7243         }
   7244 
   7245         byte directionality = getDirectionalityImpl(codePoint);
   7246         if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
   7247             return DIRECTIONALITY[directionality];
   7248         }
   7249         return Character.DIRECTIONALITY_UNDEFINED;
   7250     }
   7251 
   7252     @FastNative
   7253     native static byte getDirectionalityImpl(int codePoint);
   7254     /**
   7255      * Determines whether the character is mirrored according to the
   7256      * Unicode specification.  Mirrored characters should have their
   7257      * glyphs horizontally mirrored when displayed in text that is
   7258      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
   7259      * PARENTHESIS is semantically defined to be an <i>opening
   7260      * parenthesis</i>.  This will appear as a "(" in text that is
   7261      * left-to-right but as a ")" in text that is right-to-left.
   7262      *
   7263      * <p><b>Note:</b> This method cannot handle <a
   7264      * href="#supplementary"> supplementary characters</a>. To support
   7265      * all Unicode characters, including supplementary characters, use
   7266      * the {@link #isMirrored(int)} method.
   7267      *
   7268      * @param  ch {@code char} for which the mirrored property is requested
   7269      * @return {@code true} if the char is mirrored, {@code false}
   7270      *         if the {@code char} is not mirrored or is not defined.
   7271      * @since 1.4
   7272      */
   7273     public static boolean isMirrored(char ch) {
   7274         return isMirrored((int)ch);
   7275     }
   7276 
   7277     /**
   7278      * Determines whether the specified character (Unicode code point)
   7279      * is mirrored according to the Unicode specification.  Mirrored
   7280      * characters should have their glyphs horizontally mirrored when
   7281      * displayed in text that is right-to-left.  For example,
   7282      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
   7283      * defined to be an <i>opening parenthesis</i>.  This will appear
   7284      * as a "(" in text that is left-to-right but as a ")" in text
   7285      * that is right-to-left.
   7286      *
   7287      * @param   codePoint the character (Unicode code point) to be tested.
   7288      * @return  {@code true} if the character is mirrored, {@code false}
   7289      *          if the character is not mirrored or is not defined.
   7290      * @since   1.5
   7291      */
   7292     public static boolean isMirrored(int codePoint) {
   7293         return isMirroredImpl(codePoint);
   7294     }
   7295 
   7296     @FastNative
   7297     native static boolean isMirroredImpl(int codePoint);
   7298     /**
   7299      * Compares two {@code Character} objects numerically.
   7300      *
   7301      * @param   anotherCharacter   the {@code Character} to be compared.
   7302 
   7303      * @return  the value {@code 0} if the argument {@code Character}
   7304      *          is equal to this {@code Character}; a value less than
   7305      *          {@code 0} if this {@code Character} is numerically less
   7306      *          than the {@code Character} argument; and a value greater than
   7307      *          {@code 0} if this {@code Character} is numerically greater
   7308      *          than the {@code Character} argument (unsigned comparison).
   7309      *          Note that this is strictly a numerical comparison; it is not
   7310      *          locale-dependent.
   7311      * @since   1.2
   7312      */
   7313     public int compareTo(Character anotherCharacter) {
   7314         return compare(this.value, anotherCharacter.value);
   7315     }
   7316 
   7317     /**
   7318      * Compares two {@code char} values numerically.
   7319      * The value returned is identical to what would be returned by:
   7320      * <pre>
   7321      *    Character.valueOf(x).compareTo(Character.valueOf(y))
   7322      * </pre>
   7323      *
   7324      * @param  x the first {@code char} to compare
   7325      * @param  y the second {@code char} to compare
   7326      * @return the value {@code 0} if {@code x == y};
   7327      *         a value less than {@code 0} if {@code x < y}; and
   7328      *         a value greater than {@code 0} if {@code x > y}
   7329      * @since 1.7
   7330      */
   7331     public static int compare(char x, char y) {
   7332         return x - y;
   7333     }
   7334 
   7335     /**
   7336      * The number of bits used to represent a <tt>char</tt> value in unsigned
   7337      * binary form, constant {@code 16}.
   7338      *
   7339      * @since 1.5
   7340      */
   7341     public static final int SIZE = 16;
   7342 
   7343     /**
   7344      * The number of bytes used to represent a {@code char} value in unsigned
   7345      * binary form.
   7346      *
   7347      * @since 1.8
   7348      */
   7349     public static final int BYTES = SIZE / Byte.SIZE;
   7350 
   7351     /**
   7352      * Returns the value obtained by reversing the order of the bytes in the
   7353      * specified <tt>char</tt> value.
   7354      *
   7355      * @param ch The {@code char} of which to reverse the byte order.
   7356      * @return the value obtained by reversing (or, equivalently, swapping)
   7357      *     the bytes in the specified <tt>char</tt> value.
   7358      * @since 1.5
   7359      */
   7360     public static char reverseBytes(char ch) {
   7361         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
   7362     }
   7363 
   7364     /**
   7365      * Returns the Unicode name of the specified character
   7366      * {@code codePoint}, or null if the code point is
   7367      * {@link #UNASSIGNED unassigned}.
   7368      * <p>
   7369      * Note: if the specified character is not assigned a name by
   7370      * the <i>UnicodeData</i> file (part of the Unicode Character
   7371      * Database maintained by the Unicode Consortium), the returned
   7372      * name is the same as the result of expression.
   7373      *
   7374      * <blockquote>{@code
   7375      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
   7376      *     + " "
   7377      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
   7378      *
   7379      * }</blockquote>
   7380      *
   7381      * @param  codePoint the character (Unicode code point)
   7382      *
   7383      * @return the Unicode name of the specified character, or null if
   7384      *         the code point is unassigned.
   7385      *
   7386      * @exception IllegalArgumentException if the specified
   7387      *            {@code codePoint} is not a valid Unicode
   7388      *            code point.
   7389      *
   7390      * @since 1.7
   7391      */
   7392     public static String getName(int codePoint) {
   7393         if (!isValidCodePoint(codePoint)) {
   7394             throw new IllegalArgumentException();
   7395         }
   7396         String name = getNameImpl(codePoint);
   7397         if (name != null)
   7398             return name;
   7399         if (getType(codePoint) == UNASSIGNED)
   7400             return null;
   7401         UnicodeBlock block = UnicodeBlock.of(codePoint);
   7402         if (block != null)
   7403             return block.toString().replace('_', ' ') + " "
   7404                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
   7405         // should never come here
   7406         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
   7407     }
   7408 
   7409     private static native String getNameImpl(int codePoint);
   7410 }
   7411