Home | History | Annotate | Download | only in lang
      1 /*
      2  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
      3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      4  *
      5  * This code is free software; you can redistribute it and/or modify it
      6  * under the terms of the GNU General Public License version 2 only, as
      7  * published by the Free Software Foundation.  Oracle designates this
      8  * particular file as subject to the "Classpath" exception as provided
      9  * by Oracle in the LICENSE file that accompanied this code.
     10  *
     11  * This code is distributed in the hope that it will be useful, but WITHOUT
     12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     14  * version 2 for more details (a copy is included in the LICENSE file that
     15  * accompanied this code).
     16  *
     17  * You should have received a copy of the GNU General Public License version
     18  * 2 along with this work; if not, write to the Free Software Foundation,
     19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     20  *
     21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     22  * or visit www.oracle.com if you need additional information or have any
     23  * questions.
     24  */
     25 
     26 package java.lang;
     27 
     28 import dalvik.annotation.optimization.FastNative;
     29 import java.util.Arrays;
     30 import java.util.HashMap;
     31 import java.util.Locale;
     32 import java.util.Map;
     33 
     34 // Android-changed: Remove reference to a specific unicode standard version
     35 /**
     36  * The {@code Character} class wraps a value of the primitive
     37  * type {@code char} in an object. An object of type
     38  * {@code Character} contains a single field whose type is
     39  * {@code char}.
     40  * <p>
     41  * In addition, this class provides several methods for determining
     42  * a character's category (lowercase letter, digit, etc.) and for converting
     43  * characters from uppercase to lowercase and vice versa.
     44  * <p>
     45  * Character information is based on the Unicode Standard
     46  * <p>
     47  * The methods and data of class {@code Character} are defined by
     48  * the information in the <i>UnicodeData</i> file that is part of the
     49  * Unicode Character Database maintained by the Unicode
     50  * Consortium. This file specifies various properties including name
     51  * and general category for every defined Unicode code point or
     52  * character range.
     53  * <p>
     54  * The file and its description are available from the Unicode Consortium at:
     55  * <ul>
     56  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
     57  * </ul>
     58  *
     59  * <h3><a name="unicode">Unicode Character Representations</a></h3>
     60  *
     61  * <p>The {@code char} data type (and therefore the value that a
     62  * {@code Character} object encapsulates) are based on the
     63  * original Unicode specification, which defined characters as
     64  * fixed-width 16-bit entities. The Unicode Standard has since been
     65  * changed to allow for characters whose representation requires more
     66  * than 16 bits.  The range of legal <em>code point</em>s is now
     67  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
     68  * (Refer to the <a
     69  * href="http://www.unicode.org/reports/tr27/#notation"><i>
     70  * definition</i></a> of the U+<i>n</i> notation in the Unicode
     71  * Standard.)
     72  *
     73  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
     74  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
     75  * <a name="supplementary">Characters</a> whose code points are greater
     76  * than U+FFFF are called <em>supplementary character</em>s.  The Java
     77  * platform uses the UTF-16 representation in {@code char} arrays and
     78  * in the {@code String} and {@code StringBuffer} classes. In
     79  * this representation, supplementary characters are represented as a pair
     80  * of {@code char} values, the first from the <em>high-surrogates</em>
     81  * range, (&#92;uD800-&#92;uDBFF), the second from the
     82  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
     83  *
     84  * <p>A {@code char} value, therefore, represents Basic
     85  * Multilingual Plane (BMP) code points, including the surrogate
     86  * code points, or code units of the UTF-16 encoding. An
     87  * {@code int} value represents all Unicode code points,
     88  * including supplementary code points. The lower (least significant)
     89  * 21 bits of {@code int} are used to represent Unicode code
     90  * points and the upper (most significant) 11 bits must be zero.
     91  * Unless otherwise specified, the behavior with respect to
     92  * supplementary characters and surrogate {@code char} values is
     93  * as follows:
     94  *
     95  * <ul>
     96  * <li>The methods that only accept a {@code char} value cannot support
     97  * supplementary characters. They treat {@code char} values from the
     98  * surrogate ranges as undefined characters. For example,
     99  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
    100  * this specific value if followed by any low-surrogate value in a string
    101  * would represent a letter.
    102  *
    103  * <li>The methods that accept an {@code int} value support all
    104  * Unicode characters, including supplementary characters. For
    105  * example, {@code Character.isLetter(0x2F81A)} returns
    106  * {@code true} because the code point value represents a letter
    107  * (a CJK ideograph).
    108  * </ul>
    109  *
    110  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
    111  * used for character values in the range between U+0000 and U+10FFFF,
    112  * and <em>Unicode code unit</em> is used for 16-bit
    113  * {@code char} values that are code units of the <em>UTF-16</em>
    114  * encoding. For more information on Unicode terminology, refer to the
    115  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
    116  *
    117  * @author  Lee Boynton
    118  * @author  Guy Steele
    119  * @author  Akira Tanaka
    120  * @author  Martin Buchholz
    121  * @author  Ulf Zibis
    122  * @since   1.0
    123  */
    124 public final
    125 class Character implements java.io.Serializable, Comparable<Character> {
    126     /**
    127      * The minimum radix available for conversion to and from strings.
    128      * The constant value of this field is the smallest value permitted
    129      * for the radix argument in radix-conversion methods such as the
    130      * {@code digit} method, the {@code forDigit} method, and the
    131      * {@code toString} method of class {@code Integer}.
    132      *
    133      * @see     Character#digit(char, int)
    134      * @see     Character#forDigit(int, int)
    135      * @see     Integer#toString(int, int)
    136      * @see     Integer#valueOf(String)
    137      */
    138     public static final int MIN_RADIX = 2;
    139 
    140     /**
    141      * The maximum radix available for conversion to and from strings.
    142      * The constant value of this field is the largest value permitted
    143      * for the radix argument in radix-conversion methods such as the
    144      * {@code digit} method, the {@code forDigit} method, and the
    145      * {@code toString} method of class {@code Integer}.
    146      *
    147      * @see     Character#digit(char, int)
    148      * @see     Character#forDigit(int, int)
    149      * @see     Integer#toString(int, int)
    150      * @see     Integer#valueOf(String)
    151      */
    152     public static final int MAX_RADIX = 36;
    153 
    154     /**
    155      * The constant value of this field is the smallest value of type
    156      * {@code char}, {@code '\u005Cu0000'}.
    157      *
    158      * @since   1.0.2
    159      */
    160     public static final char MIN_VALUE = '\u0000';
    161 
    162     /**
    163      * The constant value of this field is the largest value of type
    164      * {@code char}, {@code '\u005CuFFFF'}.
    165      *
    166      * @since   1.0.2
    167      */
    168     public static final char MAX_VALUE = '\uFFFF';
    169 
    170     /**
    171      * The {@code Class} instance representing the primitive type
    172      * {@code char}.
    173      *
    174      * @since   1.1
    175      */
    176     @SuppressWarnings("unchecked")
    177     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
    178 
    179     /*
    180      * Normative general types
    181      */
    182 
    183     /*
    184      * General character types
    185      */
    186 
    187     /**
    188      * General category "Cn" in the Unicode specification.
    189      * @since   1.1
    190      */
    191     public static final byte UNASSIGNED = 0;
    192 
    193     /**
    194      * General category "Lu" in the Unicode specification.
    195      * @since   1.1
    196      */
    197     public static final byte UPPERCASE_LETTER = 1;
    198 
    199     /**
    200      * General category "Ll" in the Unicode specification.
    201      * @since   1.1
    202      */
    203     public static final byte LOWERCASE_LETTER = 2;
    204 
    205     /**
    206      * General category "Lt" in the Unicode specification.
    207      * @since   1.1
    208      */
    209     public static final byte TITLECASE_LETTER = 3;
    210 
    211     /**
    212      * General category "Lm" in the Unicode specification.
    213      * @since   1.1
    214      */
    215     public static final byte MODIFIER_LETTER = 4;
    216 
    217     /**
    218      * General category "Lo" in the Unicode specification.
    219      * @since   1.1
    220      */
    221     public static final byte OTHER_LETTER = 5;
    222 
    223     /**
    224      * General category "Mn" in the Unicode specification.
    225      * @since   1.1
    226      */
    227     public static final byte NON_SPACING_MARK = 6;
    228 
    229     /**
    230      * General category "Me" in the Unicode specification.
    231      * @since   1.1
    232      */
    233     public static final byte ENCLOSING_MARK = 7;
    234 
    235     /**
    236      * General category "Mc" in the Unicode specification.
    237      * @since   1.1
    238      */
    239     public static final byte COMBINING_SPACING_MARK = 8;
    240 
    241     /**
    242      * General category "Nd" in the Unicode specification.
    243      * @since   1.1
    244      */
    245     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
    246 
    247     /**
    248      * General category "Nl" in the Unicode specification.
    249      * @since   1.1
    250      */
    251     public static final byte LETTER_NUMBER = 10;
    252 
    253     /**
    254      * General category "No" in the Unicode specification.
    255      * @since   1.1
    256      */
    257     public static final byte OTHER_NUMBER = 11;
    258 
    259     /**
    260      * General category "Zs" in the Unicode specification.
    261      * @since   1.1
    262      */
    263     public static final byte SPACE_SEPARATOR = 12;
    264 
    265     /**
    266      * General category "Zl" in the Unicode specification.
    267      * @since   1.1
    268      */
    269     public static final byte LINE_SEPARATOR = 13;
    270 
    271     /**
    272      * General category "Zp" in the Unicode specification.
    273      * @since   1.1
    274      */
    275     public static final byte PARAGRAPH_SEPARATOR = 14;
    276 
    277     /**
    278      * General category "Cc" in the Unicode specification.
    279      * @since   1.1
    280      */
    281     public static final byte CONTROL = 15;
    282 
    283     /**
    284      * General category "Cf" in the Unicode specification.
    285      * @since   1.1
    286      */
    287     public static final byte FORMAT = 16;
    288 
    289     /**
    290      * General category "Co" in the Unicode specification.
    291      * @since   1.1
    292      */
    293     public static final byte PRIVATE_USE = 18;
    294 
    295     /**
    296      * General category "Cs" in the Unicode specification.
    297      * @since   1.1
    298      */
    299     public static final byte SURROGATE = 19;
    300 
    301     /**
    302      * General category "Pd" in the Unicode specification.
    303      * @since   1.1
    304      */
    305     public static final byte DASH_PUNCTUATION = 20;
    306 
    307     /**
    308      * General category "Ps" in the Unicode specification.
    309      * @since   1.1
    310      */
    311     public static final byte START_PUNCTUATION = 21;
    312 
    313     /**
    314      * General category "Pe" in the Unicode specification.
    315      * @since   1.1
    316      */
    317     public static final byte END_PUNCTUATION = 22;
    318 
    319     /**
    320      * General category "Pc" in the Unicode specification.
    321      * @since   1.1
    322      */
    323     public static final byte CONNECTOR_PUNCTUATION = 23;
    324 
    325     /**
    326      * General category "Po" in the Unicode specification.
    327      * @since   1.1
    328      */
    329     public static final byte OTHER_PUNCTUATION = 24;
    330 
    331     /**
    332      * General category "Sm" in the Unicode specification.
    333      * @since   1.1
    334      */
    335     public static final byte MATH_SYMBOL = 25;
    336 
    337     /**
    338      * General category "Sc" in the Unicode specification.
    339      * @since   1.1
    340      */
    341     public static final byte CURRENCY_SYMBOL = 26;
    342 
    343     /**
    344      * General category "Sk" in the Unicode specification.
    345      * @since   1.1
    346      */
    347     public static final byte MODIFIER_SYMBOL = 27;
    348 
    349     /**
    350      * General category "So" in the Unicode specification.
    351      * @since   1.1
    352      */
    353     public static final byte OTHER_SYMBOL = 28;
    354 
    355     /**
    356      * General category "Pi" in the Unicode specification.
    357      * @since   1.4
    358      */
    359     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
    360 
    361     /**
    362      * General category "Pf" in the Unicode specification.
    363      * @since   1.4
    364      */
    365     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
    366 
    367     /**
    368      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
    369      */
    370     static final int ERROR = 0xFFFFFFFF;
    371 
    372 
    373     /**
    374      * Undefined bidirectional character type. Undefined {@code char}
    375      * values have undefined directionality in the Unicode specification.
    376      * @since 1.4
    377      */
    378     public static final byte DIRECTIONALITY_UNDEFINED = -1;
    379 
    380     /**
    381      * Strong bidirectional character type "L" in the Unicode specification.
    382      * @since 1.4
    383      */
    384     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
    385 
    386     /**
    387      * Strong bidirectional character type "R" in the Unicode specification.
    388      * @since 1.4
    389      */
    390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
    391 
    392     /**
    393     * Strong bidirectional character type "AL" in the Unicode specification.
    394      * @since 1.4
    395      */
    396     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
    397 
    398     /**
    399      * Weak bidirectional character type "EN" in the Unicode specification.
    400      * @since 1.4
    401      */
    402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
    403 
    404     /**
    405      * Weak bidirectional character type "ES" in the Unicode specification.
    406      * @since 1.4
    407      */
    408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
    409 
    410     /**
    411      * Weak bidirectional character type "ET" in the Unicode specification.
    412      * @since 1.4
    413      */
    414     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
    415 
    416     /**
    417      * Weak bidirectional character type "AN" in the Unicode specification.
    418      * @since 1.4
    419      */
    420     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
    421 
    422     /**
    423      * Weak bidirectional character type "CS" in the Unicode specification.
    424      * @since 1.4
    425      */
    426     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
    427 
    428     /**
    429      * Weak bidirectional character type "NSM" in the Unicode specification.
    430      * @since 1.4
    431      */
    432     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
    433 
    434     /**
    435      * Weak bidirectional character type "BN" in the Unicode specification.
    436      * @since 1.4
    437      */
    438     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
    439 
    440     /**
    441      * Neutral bidirectional character type "B" in the Unicode specification.
    442      * @since 1.4
    443      */
    444     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
    445 
    446     /**
    447      * Neutral bidirectional character type "S" in the Unicode specification.
    448      * @since 1.4
    449      */
    450     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
    451 
    452     /**
    453      * Neutral bidirectional character type "WS" in the Unicode specification.
    454      * @since 1.4
    455      */
    456     public static final byte DIRECTIONALITY_WHITESPACE = 12;
    457 
    458     /**
    459      * Neutral bidirectional character type "ON" in the Unicode specification.
    460      * @since 1.4
    461      */
    462     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
    463 
    464     /**
    465      * Strong bidirectional character type "LRE" in the Unicode specification.
    466      * @since 1.4
    467      */
    468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
    469 
    470     /**
    471      * Strong bidirectional character type "LRO" in the Unicode specification.
    472      * @since 1.4
    473      */
    474     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
    475 
    476     /**
    477      * Strong bidirectional character type "RLE" in the Unicode specification.
    478      * @since 1.4
    479      */
    480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
    481 
    482     /**
    483      * Strong bidirectional character type "RLO" in the Unicode specification.
    484      * @since 1.4
    485      */
    486     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
    487 
    488     /**
    489      * Weak bidirectional character type "PDF" in the Unicode specification.
    490      * @since 1.4
    491      */
    492     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
    493 
    494     /**
    495      * The minimum value of a
    496      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
    497      * Unicode high-surrogate code unit</a>
    498      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
    499      * A high-surrogate is also known as a <i>leading-surrogate</i>.
    500      *
    501      * @since 1.5
    502      */
    503     public static final char MIN_HIGH_SURROGATE = '\uD800';
    504 
    505     /**
    506      * The maximum value of a
    507      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
    508      * Unicode high-surrogate code unit</a>
    509      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
    510      * A high-surrogate is also known as a <i>leading-surrogate</i>.
    511      *
    512      * @since 1.5
    513      */
    514     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
    515 
    516     /**
    517      * The minimum value of a
    518      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
    519      * Unicode low-surrogate code unit</a>
    520      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
    521      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
    522      *
    523      * @since 1.5
    524      */
    525     public static final char MIN_LOW_SURROGATE  = '\uDC00';
    526 
    527     /**
    528      * The maximum value of a
    529      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
    530      * Unicode low-surrogate code unit</a>
    531      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
    532      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
    533      *
    534      * @since 1.5
    535      */
    536     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
    537 
    538     /**
    539      * The minimum value of a Unicode surrogate code unit in the
    540      * UTF-16 encoding, constant {@code '\u005CuD800'}.
    541      *
    542      * @since 1.5
    543      */
    544     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
    545 
    546     /**
    547      * The maximum value of a Unicode surrogate code unit in the
    548      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
    549      *
    550      * @since 1.5
    551      */
    552     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
    553 
    554     /**
    555      * The minimum value of a
    556      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
    557      * Unicode supplementary code point</a>, constant {@code U+10000}.
    558      *
    559      * @since 1.5
    560      */
    561     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
    562 
    563     /**
    564      * The minimum value of a
    565      * <a href="http://www.unicode.org/glossary/#code_point">
    566      * Unicode code point</a>, constant {@code U+0000}.
    567      *
    568      * @since 1.5
    569      */
    570     public static final int MIN_CODE_POINT = 0x000000;
    571 
    572     /**
    573      * The maximum value of a
    574      * <a href="http://www.unicode.org/glossary/#code_point">
    575      * Unicode code point</a>, constant {@code U+10FFFF}.
    576      *
    577      * @since 1.5
    578      */
    579     public static final int MAX_CODE_POINT = 0X10FFFF;
    580 
    581     private static final byte[] DIRECTIONALITY = new byte[] {
    582             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
    583             DIRECTIONALITY_EUROPEAN_NUMBER,
    584             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
    585             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
    586             DIRECTIONALITY_ARABIC_NUMBER,
    587             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
    588             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
    589             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
    590             DIRECTIONALITY_OTHER_NEUTRALS,
    591             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
    592             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
    593             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
    594             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
    595             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
    596             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
    597             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
    598 
    599     /**
    600      * Instances of this class represent particular subsets of the Unicode
    601      * character set.  The only family of subsets defined in the
    602      * {@code Character} class is {@link Character.UnicodeBlock}.
    603      * Other portions of the Java API may define other subsets for their
    604      * own purposes.
    605      *
    606      * @since 1.2
    607      */
    608     public static class Subset  {
    609 
    610         private String name;
    611 
    612         /**
    613          * Constructs a new {@code Subset} instance.
    614          *
    615          * @param  name  The name of this subset
    616          * @exception NullPointerException if name is {@code null}
    617          */
    618         protected Subset(String name) {
    619             if (name == null) {
    620                 throw new NullPointerException("name");
    621             }
    622             this.name = name;
    623         }
    624 
    625         /**
    626          * Compares two {@code Subset} objects for equality.
    627          * This method returns {@code true} if and only if
    628          * {@code this} and the argument refer to the same
    629          * object; since this method is {@code final}, this
    630          * guarantee holds for all subclasses.
    631          */
    632         public final boolean equals(Object obj) {
    633             return (this == obj);
    634         }
    635 
    636         /**
    637          * Returns the standard hash code as defined by the
    638          * {@link Object#hashCode} method.  This method
    639          * is {@code final} in order to ensure that the
    640          * {@code equals} and {@code hashCode} methods will
    641          * be consistent in all subclasses.
    642          */
    643         public final int hashCode() {
    644             return super.hashCode();
    645         }
    646 
    647         /**
    648          * Returns the name of this subset.
    649          */
    650         public final String toString() {
    651             return name;
    652         }
    653     }
    654 
    655     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
    656     // for the latest specification of Unicode Blocks.
    657 
    658     /**
    659      * A family of character subsets representing the character blocks in the
    660      * Unicode specification. Character blocks generally define characters
    661      * used for a specific script or purpose. A character is contained by
    662      * at most one Unicode block.
    663      *
    664      * @since 1.2
    665      */
    666     public static final class UnicodeBlock extends Subset {
    667 
    668         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
    669 
    670         /**
    671          * Creates a UnicodeBlock with the given identifier name.
    672          * This name must be the same as the block identifier.
    673          */
    674         private UnicodeBlock(String idName) {
    675             this(idName, true);
    676         }
    677 
    678         private UnicodeBlock(String idName, boolean isMap) {
    679             super(idName);
    680             if (isMap) {
    681                 map.put(idName, this);
    682             }
    683         }
    684 
    685         /**
    686          * Creates a UnicodeBlock with the given identifier name and
    687          * alias name.
    688          */
    689         private UnicodeBlock(String idName, String alias) {
    690             this(idName);
    691             map.put(alias, this);
    692         }
    693 
    694         /**
    695          * Creates a UnicodeBlock with the given identifier name and
    696          * alias names.
    697          */
    698         private UnicodeBlock(String idName, String... aliases) {
    699             this(idName);
    700             for (String alias : aliases)
    701                 map.put(alias, this);
    702         }
    703 
    704         /**
    705          * Constant for the "Basic Latin" Unicode character block.
    706          * @since 1.2
    707          */
    708         public static final UnicodeBlock  BASIC_LATIN =
    709             new UnicodeBlock("BASIC_LATIN",
    710                              "BASIC LATIN",
    711                              "BASICLATIN");
    712 
    713         /**
    714          * Constant for the "Latin-1 Supplement" Unicode character block.
    715          * @since 1.2
    716          */
    717         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
    718             new UnicodeBlock("LATIN_1_SUPPLEMENT",
    719                              "LATIN-1 SUPPLEMENT",
    720                              "LATIN-1SUPPLEMENT");
    721 
    722         /**
    723          * Constant for the "Latin Extended-A" Unicode character block.
    724          * @since 1.2
    725          */
    726         public static final UnicodeBlock LATIN_EXTENDED_A =
    727             new UnicodeBlock("LATIN_EXTENDED_A",
    728                              "LATIN EXTENDED-A",
    729                              "LATINEXTENDED-A");
    730 
    731         /**
    732          * Constant for the "Latin Extended-B" Unicode character block.
    733          * @since 1.2
    734          */
    735         public static final UnicodeBlock LATIN_EXTENDED_B =
    736             new UnicodeBlock("LATIN_EXTENDED_B",
    737                              "LATIN EXTENDED-B",
    738                              "LATINEXTENDED-B");
    739 
    740         /**
    741          * Constant for the "IPA Extensions" Unicode character block.
    742          * @since 1.2
    743          */
    744         public static final UnicodeBlock IPA_EXTENSIONS =
    745             new UnicodeBlock("IPA_EXTENSIONS",
    746                              "IPA EXTENSIONS",
    747                              "IPAEXTENSIONS");
    748 
    749         /**
    750          * Constant for the "Spacing Modifier Letters" Unicode character block.
    751          * @since 1.2
    752          */
    753         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
    754             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
    755                              "SPACING MODIFIER LETTERS",
    756                              "SPACINGMODIFIERLETTERS");
    757 
    758         /**
    759          * Constant for the "Combining Diacritical Marks" Unicode character block.
    760          * @since 1.2
    761          */
    762         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
    763             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
    764                              "COMBINING DIACRITICAL MARKS",
    765                              "COMBININGDIACRITICALMARKS");
    766 
    767         /**
    768          * Constant for the "Greek and Coptic" Unicode character block.
    769          * <p>
    770          * This block was previously known as the "Greek" block.
    771          *
    772          * @since 1.2
    773          */
    774         public static final UnicodeBlock GREEK =
    775             new UnicodeBlock("GREEK",
    776                              "GREEK AND COPTIC",
    777                              "GREEKANDCOPTIC");
    778 
    779         /**
    780          * Constant for the "Cyrillic" Unicode character block.
    781          * @since 1.2
    782          */
    783         public static final UnicodeBlock CYRILLIC =
    784             new UnicodeBlock("CYRILLIC");
    785 
    786         /**
    787          * Constant for the "Armenian" Unicode character block.
    788          * @since 1.2
    789          */
    790         public static final UnicodeBlock ARMENIAN =
    791             new UnicodeBlock("ARMENIAN");
    792 
    793         /**
    794          * Constant for the "Hebrew" Unicode character block.
    795          * @since 1.2
    796          */
    797         public static final UnicodeBlock HEBREW =
    798             new UnicodeBlock("HEBREW");
    799 
    800         /**
    801          * Constant for the "Arabic" Unicode character block.
    802          * @since 1.2
    803          */
    804         public static final UnicodeBlock ARABIC =
    805             new UnicodeBlock("ARABIC");
    806 
    807         /**
    808          * Constant for the "Devanagari" Unicode character block.
    809          * @since 1.2
    810          */
    811         public static final UnicodeBlock DEVANAGARI =
    812             new UnicodeBlock("DEVANAGARI");
    813 
    814         /**
    815          * Constant for the "Bengali" Unicode character block.
    816          * @since 1.2
    817          */
    818         public static final UnicodeBlock BENGALI =
    819             new UnicodeBlock("BENGALI");
    820 
    821         /**
    822          * Constant for the "Gurmukhi" Unicode character block.
    823          * @since 1.2
    824          */
    825         public static final UnicodeBlock GURMUKHI =
    826             new UnicodeBlock("GURMUKHI");
    827 
    828         /**
    829          * Constant for the "Gujarati" Unicode character block.
    830          * @since 1.2
    831          */
    832         public static final UnicodeBlock GUJARATI =
    833             new UnicodeBlock("GUJARATI");
    834 
    835         /**
    836          * Constant for the "Oriya" Unicode character block.
    837          * @since 1.2
    838          */
    839         public static final UnicodeBlock ORIYA =
    840             new UnicodeBlock("ORIYA");
    841 
    842         /**
    843          * Constant for the "Tamil" Unicode character block.
    844          * @since 1.2
    845          */
    846         public static final UnicodeBlock TAMIL =
    847             new UnicodeBlock("TAMIL");
    848 
    849         /**
    850          * Constant for the "Telugu" Unicode character block.
    851          * @since 1.2
    852          */
    853         public static final UnicodeBlock TELUGU =
    854             new UnicodeBlock("TELUGU");
    855 
    856         /**
    857          * Constant for the "Kannada" Unicode character block.
    858          * @since 1.2
    859          */
    860         public static final UnicodeBlock KANNADA =
    861             new UnicodeBlock("KANNADA");
    862 
    863         /**
    864          * Constant for the "Malayalam" Unicode character block.
    865          * @since 1.2
    866          */
    867         public static final UnicodeBlock MALAYALAM =
    868             new UnicodeBlock("MALAYALAM");
    869 
    870         /**
    871          * Constant for the "Thai" Unicode character block.
    872          * @since 1.2
    873          */
    874         public static final UnicodeBlock THAI =
    875             new UnicodeBlock("THAI");
    876 
    877         /**
    878          * Constant for the "Lao" Unicode character block.
    879          * @since 1.2
    880          */
    881         public static final UnicodeBlock LAO =
    882             new UnicodeBlock("LAO");
    883 
    884         /**
    885          * Constant for the "Tibetan" Unicode character block.
    886          * @since 1.2
    887          */
    888         public static final UnicodeBlock TIBETAN =
    889             new UnicodeBlock("TIBETAN");
    890 
    891         /**
    892          * Constant for the "Georgian" Unicode character block.
    893          * @since 1.2
    894          */
    895         public static final UnicodeBlock GEORGIAN =
    896             new UnicodeBlock("GEORGIAN");
    897 
    898         /**
    899          * Constant for the "Hangul Jamo" Unicode character block.
    900          * @since 1.2
    901          */
    902         public static final UnicodeBlock HANGUL_JAMO =
    903             new UnicodeBlock("HANGUL_JAMO",
    904                              "HANGUL JAMO",
    905                              "HANGULJAMO");
    906 
    907         /**
    908          * Constant for the "Latin Extended Additional" Unicode character block.
    909          * @since 1.2
    910          */
    911         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
    912             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
    913                              "LATIN EXTENDED ADDITIONAL",
    914                              "LATINEXTENDEDADDITIONAL");
    915 
    916         /**
    917          * Constant for the "Greek Extended" Unicode character block.
    918          * @since 1.2
    919          */
    920         public static final UnicodeBlock GREEK_EXTENDED =
    921             new UnicodeBlock("GREEK_EXTENDED",
    922                              "GREEK EXTENDED",
    923                              "GREEKEXTENDED");
    924 
    925         /**
    926          * Constant for the "General Punctuation" Unicode character block.
    927          * @since 1.2
    928          */
    929         public static final UnicodeBlock GENERAL_PUNCTUATION =
    930             new UnicodeBlock("GENERAL_PUNCTUATION",
    931                              "GENERAL PUNCTUATION",
    932                              "GENERALPUNCTUATION");
    933 
    934         /**
    935          * Constant for the "Superscripts and Subscripts" Unicode character
    936          * block.
    937          * @since 1.2
    938          */
    939         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
    940             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
    941                              "SUPERSCRIPTS AND SUBSCRIPTS",
    942                              "SUPERSCRIPTSANDSUBSCRIPTS");
    943 
    944         /**
    945          * Constant for the "Currency Symbols" Unicode character block.
    946          * @since 1.2
    947          */
    948         public static final UnicodeBlock CURRENCY_SYMBOLS =
    949             new UnicodeBlock("CURRENCY_SYMBOLS",
    950                              "CURRENCY SYMBOLS",
    951                              "CURRENCYSYMBOLS");
    952 
    953         /**
    954          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
    955          * character block.
    956          * <p>
    957          * This block was previously known as "Combining Marks for Symbols".
    958          * @since 1.2
    959          */
    960         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
    961             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
    962                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
    963                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
    964                              "COMBINING MARKS FOR SYMBOLS",
    965                              "COMBININGMARKSFORSYMBOLS");
    966 
    967         /**
    968          * Constant for the "Letterlike Symbols" Unicode character block.
    969          * @since 1.2
    970          */
    971         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
    972             new UnicodeBlock("LETTERLIKE_SYMBOLS",
    973                              "LETTERLIKE SYMBOLS",
    974                              "LETTERLIKESYMBOLS");
    975 
    976         /**
    977          * Constant for the "Number Forms" Unicode character block.
    978          * @since 1.2
    979          */
    980         public static final UnicodeBlock NUMBER_FORMS =
    981             new UnicodeBlock("NUMBER_FORMS",
    982                              "NUMBER FORMS",
    983                              "NUMBERFORMS");
    984 
    985         /**
    986          * Constant for the "Arrows" Unicode character block.
    987          * @since 1.2
    988          */
    989         public static final UnicodeBlock ARROWS =
    990             new UnicodeBlock("ARROWS");
    991 
    992         /**
    993          * Constant for the "Mathematical Operators" Unicode character block.
    994          * @since 1.2
    995          */
    996         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
    997             new UnicodeBlock("MATHEMATICAL_OPERATORS",
    998                              "MATHEMATICAL OPERATORS",
    999                              "MATHEMATICALOPERATORS");
   1000 
   1001         /**
   1002          * Constant for the "Miscellaneous Technical" Unicode character block.
   1003          * @since 1.2
   1004          */
   1005         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
   1006             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
   1007                              "MISCELLANEOUS TECHNICAL",
   1008                              "MISCELLANEOUSTECHNICAL");
   1009 
   1010         /**
   1011          * Constant for the "Control Pictures" Unicode character block.
   1012          * @since 1.2
   1013          */
   1014         public static final UnicodeBlock CONTROL_PICTURES =
   1015             new UnicodeBlock("CONTROL_PICTURES",
   1016                              "CONTROL PICTURES",
   1017                              "CONTROLPICTURES");
   1018 
   1019         /**
   1020          * Constant for the "Optical Character Recognition" Unicode character block.
   1021          * @since 1.2
   1022          */
   1023         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
   1024             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
   1025                              "OPTICAL CHARACTER RECOGNITION",
   1026                              "OPTICALCHARACTERRECOGNITION");
   1027 
   1028         /**
   1029          * Constant for the "Enclosed Alphanumerics" Unicode character block.
   1030          * @since 1.2
   1031          */
   1032         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
   1033             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
   1034                              "ENCLOSED ALPHANUMERICS",
   1035                              "ENCLOSEDALPHANUMERICS");
   1036 
   1037         /**
   1038          * Constant for the "Box Drawing" Unicode character block.
   1039          * @since 1.2
   1040          */
   1041         public static final UnicodeBlock BOX_DRAWING =
   1042             new UnicodeBlock("BOX_DRAWING",
   1043                              "BOX DRAWING",
   1044                              "BOXDRAWING");
   1045 
   1046         /**
   1047          * Constant for the "Block Elements" Unicode character block.
   1048          * @since 1.2
   1049          */
   1050         public static final UnicodeBlock BLOCK_ELEMENTS =
   1051             new UnicodeBlock("BLOCK_ELEMENTS",
   1052                              "BLOCK ELEMENTS",
   1053                              "BLOCKELEMENTS");
   1054 
   1055         /**
   1056          * Constant for the "Geometric Shapes" Unicode character block.
   1057          * @since 1.2
   1058          */
   1059         public static final UnicodeBlock GEOMETRIC_SHAPES =
   1060             new UnicodeBlock("GEOMETRIC_SHAPES",
   1061                              "GEOMETRIC SHAPES",
   1062                              "GEOMETRICSHAPES");
   1063 
   1064         /**
   1065          * Constant for the "Miscellaneous Symbols" Unicode character block.
   1066          * @since 1.2
   1067          */
   1068         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
   1069             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
   1070                              "MISCELLANEOUS SYMBOLS",
   1071                              "MISCELLANEOUSSYMBOLS");
   1072 
   1073         /**
   1074          * Constant for the "Dingbats" Unicode character block.
   1075          * @since 1.2
   1076          */
   1077         public static final UnicodeBlock DINGBATS =
   1078             new UnicodeBlock("DINGBATS");
   1079 
   1080         /**
   1081          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
   1082          * @since 1.2
   1083          */
   1084         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
   1085             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
   1086                              "CJK SYMBOLS AND PUNCTUATION",
   1087                              "CJKSYMBOLSANDPUNCTUATION");
   1088 
   1089         /**
   1090          * Constant for the "Hiragana" Unicode character block.
   1091          * @since 1.2
   1092          */
   1093         public static final UnicodeBlock HIRAGANA =
   1094             new UnicodeBlock("HIRAGANA");
   1095 
   1096         /**
   1097          * Constant for the "Katakana" Unicode character block.
   1098          * @since 1.2
   1099          */
   1100         public static final UnicodeBlock KATAKANA =
   1101             new UnicodeBlock("KATAKANA");
   1102 
   1103         /**
   1104          * Constant for the "Bopomofo" Unicode character block.
   1105          * @since 1.2
   1106          */
   1107         public static final UnicodeBlock BOPOMOFO =
   1108             new UnicodeBlock("BOPOMOFO");
   1109 
   1110         /**
   1111          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
   1112          * @since 1.2
   1113          */
   1114         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
   1115             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
   1116                              "HANGUL COMPATIBILITY JAMO",
   1117                              "HANGULCOMPATIBILITYJAMO");
   1118 
   1119         /**
   1120          * Constant for the "Kanbun" Unicode character block.
   1121          * @since 1.2
   1122          */
   1123         public static final UnicodeBlock KANBUN =
   1124             new UnicodeBlock("KANBUN");
   1125 
   1126         /**
   1127          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
   1128          * @since 1.2
   1129          */
   1130         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
   1131             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
   1132                              "ENCLOSED CJK LETTERS AND MONTHS",
   1133                              "ENCLOSEDCJKLETTERSANDMONTHS");
   1134 
   1135         /**
   1136          * Constant for the "CJK Compatibility" Unicode character block.
   1137          * @since 1.2
   1138          */
   1139         public static final UnicodeBlock CJK_COMPATIBILITY =
   1140             new UnicodeBlock("CJK_COMPATIBILITY",
   1141                              "CJK COMPATIBILITY",
   1142                              "CJKCOMPATIBILITY");
   1143 
   1144         /**
   1145          * Constant for the "CJK Unified Ideographs" Unicode character block.
   1146          * @since 1.2
   1147          */
   1148         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
   1149             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
   1150                              "CJK UNIFIED IDEOGRAPHS",
   1151                              "CJKUNIFIEDIDEOGRAPHS");
   1152 
   1153         /**
   1154          * Constant for the "Hangul Syllables" Unicode character block.
   1155          * @since 1.2
   1156          */
   1157         public static final UnicodeBlock HANGUL_SYLLABLES =
   1158             new UnicodeBlock("HANGUL_SYLLABLES",
   1159                              "HANGUL SYLLABLES",
   1160                              "HANGULSYLLABLES");
   1161 
   1162         /**
   1163          * Constant for the "Private Use Area" Unicode character block.
   1164          * @since 1.2
   1165          */
   1166         public static final UnicodeBlock PRIVATE_USE_AREA =
   1167             new UnicodeBlock("PRIVATE_USE_AREA",
   1168                              "PRIVATE USE AREA",
   1169                              "PRIVATEUSEAREA");
   1170 
   1171         /**
   1172          * Constant for the "CJK Compatibility Ideographs" Unicode character
   1173          * block.
   1174          * @since 1.2
   1175          */
   1176         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
   1177             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
   1178                              "CJK COMPATIBILITY IDEOGRAPHS",
   1179                              "CJKCOMPATIBILITYIDEOGRAPHS");
   1180 
   1181         /**
   1182          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
   1183          * @since 1.2
   1184          */
   1185         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
   1186             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
   1187                              "ALPHABETIC PRESENTATION FORMS",
   1188                              "ALPHABETICPRESENTATIONFORMS");
   1189 
   1190         /**
   1191          * Constant for the "Arabic Presentation Forms-A" Unicode character
   1192          * block.
   1193          * @since 1.2
   1194          */
   1195         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
   1196             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
   1197                              "ARABIC PRESENTATION FORMS-A",
   1198                              "ARABICPRESENTATIONFORMS-A");
   1199 
   1200         /**
   1201          * Constant for the "Combining Half Marks" Unicode character block.
   1202          * @since 1.2
   1203          */
   1204         public static final UnicodeBlock COMBINING_HALF_MARKS =
   1205             new UnicodeBlock("COMBINING_HALF_MARKS",
   1206                              "COMBINING HALF MARKS",
   1207                              "COMBININGHALFMARKS");
   1208 
   1209         /**
   1210          * Constant for the "CJK Compatibility Forms" Unicode character block.
   1211          * @since 1.2
   1212          */
   1213         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
   1214             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
   1215                              "CJK COMPATIBILITY FORMS",
   1216                              "CJKCOMPATIBILITYFORMS");
   1217 
   1218         /**
   1219          * Constant for the "Small Form Variants" Unicode character block.
   1220          * @since 1.2
   1221          */
   1222         public static final UnicodeBlock SMALL_FORM_VARIANTS =
   1223             new UnicodeBlock("SMALL_FORM_VARIANTS",
   1224                              "SMALL FORM VARIANTS",
   1225                              "SMALLFORMVARIANTS");
   1226 
   1227         /**
   1228          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
   1229          * @since 1.2
   1230          */
   1231         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
   1232             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
   1233                              "ARABIC PRESENTATION FORMS-B",
   1234                              "ARABICPRESENTATIONFORMS-B");
   1235 
   1236         /**
   1237          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
   1238          * block.
   1239          * @since 1.2
   1240          */
   1241         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
   1242             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
   1243                              "HALFWIDTH AND FULLWIDTH FORMS",
   1244                              "HALFWIDTHANDFULLWIDTHFORMS");
   1245 
   1246         /**
   1247          * Constant for the "Specials" Unicode character block.
   1248          * @since 1.2
   1249          */
   1250         public static final UnicodeBlock SPECIALS =
   1251             new UnicodeBlock("SPECIALS");
   1252 
   1253         /**
   1254          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
   1255          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
   1256          *             {@link #LOW_SURROGATES}. These new constants match
   1257          *             the block definitions of the Unicode Standard.
   1258          *             The {@link #of(char)} and {@link #of(int)} methods
   1259          *             return the new constants, not SURROGATES_AREA.
   1260          */
   1261         @Deprecated
   1262         public static final UnicodeBlock SURROGATES_AREA =
   1263             new UnicodeBlock("SURROGATES_AREA", false);
   1264 
   1265         /**
   1266          * Constant for the "Syriac" Unicode character block.
   1267          * @since 1.4
   1268          */
   1269         public static final UnicodeBlock SYRIAC =
   1270             new UnicodeBlock("SYRIAC");
   1271 
   1272         /**
   1273          * Constant for the "Thaana" Unicode character block.
   1274          * @since 1.4
   1275          */
   1276         public static final UnicodeBlock THAANA =
   1277             new UnicodeBlock("THAANA");
   1278 
   1279         /**
   1280          * Constant for the "Sinhala" Unicode character block.
   1281          * @since 1.4
   1282          */
   1283         public static final UnicodeBlock SINHALA =
   1284             new UnicodeBlock("SINHALA");
   1285 
   1286         /**
   1287          * Constant for the "Myanmar" Unicode character block.
   1288          * @since 1.4
   1289          */
   1290         public static final UnicodeBlock MYANMAR =
   1291             new UnicodeBlock("MYANMAR");
   1292 
   1293         /**
   1294          * Constant for the "Ethiopic" Unicode character block.
   1295          * @since 1.4
   1296          */
   1297         public static final UnicodeBlock ETHIOPIC =
   1298             new UnicodeBlock("ETHIOPIC");
   1299 
   1300         /**
   1301          * Constant for the "Cherokee" Unicode character block.
   1302          * @since 1.4
   1303          */
   1304         public static final UnicodeBlock CHEROKEE =
   1305             new UnicodeBlock("CHEROKEE");
   1306 
   1307         /**
   1308          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
   1309          * @since 1.4
   1310          */
   1311         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
   1312             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
   1313                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
   1314                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
   1315 
   1316         /**
   1317          * Constant for the "Ogham" Unicode character block.
   1318          * @since 1.4
   1319          */
   1320         public static final UnicodeBlock OGHAM =
   1321             new UnicodeBlock("OGHAM");
   1322 
   1323         /**
   1324          * Constant for the "Runic" Unicode character block.
   1325          * @since 1.4
   1326          */
   1327         public static final UnicodeBlock RUNIC =
   1328             new UnicodeBlock("RUNIC");
   1329 
   1330         /**
   1331          * Constant for the "Khmer" Unicode character block.
   1332          * @since 1.4
   1333          */
   1334         public static final UnicodeBlock KHMER =
   1335             new UnicodeBlock("KHMER");
   1336 
   1337         /**
   1338          * Constant for the "Mongolian" Unicode character block.
   1339          * @since 1.4
   1340          */
   1341         public static final UnicodeBlock MONGOLIAN =
   1342             new UnicodeBlock("MONGOLIAN");
   1343 
   1344         /**
   1345          * Constant for the "Braille Patterns" Unicode character block.
   1346          * @since 1.4
   1347          */
   1348         public static final UnicodeBlock BRAILLE_PATTERNS =
   1349             new UnicodeBlock("BRAILLE_PATTERNS",
   1350                              "BRAILLE PATTERNS",
   1351                              "BRAILLEPATTERNS");
   1352 
   1353         /**
   1354          * Constant for the "CJK Radicals Supplement" Unicode character block.
   1355          * @since 1.4
   1356          */
   1357         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
   1358             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
   1359                              "CJK RADICALS SUPPLEMENT",
   1360                              "CJKRADICALSSUPPLEMENT");
   1361 
   1362         /**
   1363          * Constant for the "Kangxi Radicals" Unicode character block.
   1364          * @since 1.4
   1365          */
   1366         public static final UnicodeBlock KANGXI_RADICALS =
   1367             new UnicodeBlock("KANGXI_RADICALS",
   1368                              "KANGXI RADICALS",
   1369                              "KANGXIRADICALS");
   1370 
   1371         /**
   1372          * Constant for the "Ideographic Description Characters" Unicode character block.
   1373          * @since 1.4
   1374          */
   1375         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
   1376             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
   1377                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
   1378                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
   1379 
   1380         /**
   1381          * Constant for the "Bopomofo Extended" Unicode character block.
   1382          * @since 1.4
   1383          */
   1384         public static final UnicodeBlock BOPOMOFO_EXTENDED =
   1385             new UnicodeBlock("BOPOMOFO_EXTENDED",
   1386                              "BOPOMOFO EXTENDED",
   1387                              "BOPOMOFOEXTENDED");
   1388 
   1389         /**
   1390          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
   1391          * @since 1.4
   1392          */
   1393         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
   1394             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
   1395                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
   1396                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
   1397 
   1398         /**
   1399          * Constant for the "Yi Syllables" Unicode character block.
   1400          * @since 1.4
   1401          */
   1402         public static final UnicodeBlock YI_SYLLABLES =
   1403             new UnicodeBlock("YI_SYLLABLES",
   1404                              "YI SYLLABLES",
   1405                              "YISYLLABLES");
   1406 
   1407         /**
   1408          * Constant for the "Yi Radicals" Unicode character block.
   1409          * @since 1.4
   1410          */
   1411         public static final UnicodeBlock YI_RADICALS =
   1412             new UnicodeBlock("YI_RADICALS",
   1413                              "YI RADICALS",
   1414                              "YIRADICALS");
   1415 
   1416         /**
   1417          * Constant for the "Cyrillic Supplementary" Unicode character block.
   1418          * @since 1.5
   1419          */
   1420         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
   1421             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
   1422                              "CYRILLIC SUPPLEMENTARY",
   1423                              "CYRILLICSUPPLEMENTARY",
   1424                              "CYRILLIC SUPPLEMENT",
   1425                              "CYRILLICSUPPLEMENT");
   1426 
   1427         /**
   1428          * Constant for the "Tagalog" Unicode character block.
   1429          * @since 1.5
   1430          */
   1431         public static final UnicodeBlock TAGALOG =
   1432             new UnicodeBlock("TAGALOG");
   1433 
   1434         /**
   1435          * Constant for the "Hanunoo" Unicode character block.
   1436          * @since 1.5
   1437          */
   1438         public static final UnicodeBlock HANUNOO =
   1439             new UnicodeBlock("HANUNOO");
   1440 
   1441         /**
   1442          * Constant for the "Buhid" Unicode character block.
   1443          * @since 1.5
   1444          */
   1445         public static final UnicodeBlock BUHID =
   1446             new UnicodeBlock("BUHID");
   1447 
   1448         /**
   1449          * Constant for the "Tagbanwa" Unicode character block.
   1450          * @since 1.5
   1451          */
   1452         public static final UnicodeBlock TAGBANWA =
   1453             new UnicodeBlock("TAGBANWA");
   1454 
   1455         /**
   1456          * Constant for the "Limbu" Unicode character block.
   1457          * @since 1.5
   1458          */
   1459         public static final UnicodeBlock LIMBU =
   1460             new UnicodeBlock("LIMBU");
   1461 
   1462         /**
   1463          * Constant for the "Tai Le" Unicode character block.
   1464          * @since 1.5
   1465          */
   1466         public static final UnicodeBlock TAI_LE =
   1467             new UnicodeBlock("TAI_LE",
   1468                              "TAI LE",
   1469                              "TAILE");
   1470 
   1471         /**
   1472          * Constant for the "Khmer Symbols" Unicode character block.
   1473          * @since 1.5
   1474          */
   1475         public static final UnicodeBlock KHMER_SYMBOLS =
   1476             new UnicodeBlock("KHMER_SYMBOLS",
   1477                              "KHMER SYMBOLS",
   1478                              "KHMERSYMBOLS");
   1479 
   1480         /**
   1481          * Constant for the "Phonetic Extensions" Unicode character block.
   1482          * @since 1.5
   1483          */
   1484         public static final UnicodeBlock PHONETIC_EXTENSIONS =
   1485             new UnicodeBlock("PHONETIC_EXTENSIONS",
   1486                              "PHONETIC EXTENSIONS",
   1487                              "PHONETICEXTENSIONS");
   1488 
   1489         /**
   1490          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
   1491          * @since 1.5
   1492          */
   1493         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
   1494             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
   1495                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
   1496                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
   1497 
   1498         /**
   1499          * Constant for the "Supplemental Arrows-A" Unicode character block.
   1500          * @since 1.5
   1501          */
   1502         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
   1503             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
   1504                              "SUPPLEMENTAL ARROWS-A",
   1505                              "SUPPLEMENTALARROWS-A");
   1506 
   1507         /**
   1508          * Constant for the "Supplemental Arrows-B" Unicode character block.
   1509          * @since 1.5
   1510          */
   1511         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
   1512             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
   1513                              "SUPPLEMENTAL ARROWS-B",
   1514                              "SUPPLEMENTALARROWS-B");
   1515 
   1516         /**
   1517          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
   1518          * character block.
   1519          * @since 1.5
   1520          */
   1521         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
   1522             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
   1523                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
   1524                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
   1525 
   1526         /**
   1527          * Constant for the "Supplemental Mathematical Operators" Unicode
   1528          * character block.
   1529          * @since 1.5
   1530          */
   1531         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
   1532             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
   1533                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
   1534                              "SUPPLEMENTALMATHEMATICALOPERATORS");
   1535 
   1536         /**
   1537          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
   1538          * block.
   1539          * @since 1.5
   1540          */
   1541         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
   1542             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
   1543                              "MISCELLANEOUS SYMBOLS AND ARROWS",
   1544                              "MISCELLANEOUSSYMBOLSANDARROWS");
   1545 
   1546         /**
   1547          * Constant for the "Katakana Phonetic Extensions" Unicode character
   1548          * block.
   1549          * @since 1.5
   1550          */
   1551         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
   1552             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
   1553                              "KATAKANA PHONETIC EXTENSIONS",
   1554                              "KATAKANAPHONETICEXTENSIONS");
   1555 
   1556         /**
   1557          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
   1558          * @since 1.5
   1559          */
   1560         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
   1561             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
   1562                              "YIJING HEXAGRAM SYMBOLS",
   1563                              "YIJINGHEXAGRAMSYMBOLS");
   1564 
   1565         /**
   1566          * Constant for the "Variation Selectors" Unicode character block.
   1567          * @since 1.5
   1568          */
   1569         public static final UnicodeBlock VARIATION_SELECTORS =
   1570             new UnicodeBlock("VARIATION_SELECTORS",
   1571                              "VARIATION SELECTORS",
   1572                              "VARIATIONSELECTORS");
   1573 
   1574         /**
   1575          * Constant for the "Linear B Syllabary" Unicode character block.
   1576          * @since 1.5
   1577          */
   1578         public static final UnicodeBlock LINEAR_B_SYLLABARY =
   1579             new UnicodeBlock("LINEAR_B_SYLLABARY",
   1580                              "LINEAR B SYLLABARY",
   1581                              "LINEARBSYLLABARY");
   1582 
   1583         /**
   1584          * Constant for the "Linear B Ideograms" Unicode character block.
   1585          * @since 1.5
   1586          */
   1587         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
   1588             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
   1589                              "LINEAR B IDEOGRAMS",
   1590                              "LINEARBIDEOGRAMS");
   1591 
   1592         /**
   1593          * Constant for the "Aegean Numbers" Unicode character block.
   1594          * @since 1.5
   1595          */
   1596         public static final UnicodeBlock AEGEAN_NUMBERS =
   1597             new UnicodeBlock("AEGEAN_NUMBERS",
   1598                              "AEGEAN NUMBERS",
   1599                              "AEGEANNUMBERS");
   1600 
   1601         /**
   1602          * Constant for the "Old Italic" Unicode character block.
   1603          * @since 1.5
   1604          */
   1605         public static final UnicodeBlock OLD_ITALIC =
   1606             new UnicodeBlock("OLD_ITALIC",
   1607                              "OLD ITALIC",
   1608                              "OLDITALIC");
   1609 
   1610         /**
   1611          * Constant for the "Gothic" Unicode character block.
   1612          * @since 1.5
   1613          */
   1614         public static final UnicodeBlock GOTHIC =
   1615             new UnicodeBlock("GOTHIC");
   1616 
   1617         /**
   1618          * Constant for the "Ugaritic" Unicode character block.
   1619          * @since 1.5
   1620          */
   1621         public static final UnicodeBlock UGARITIC =
   1622             new UnicodeBlock("UGARITIC");
   1623 
   1624         /**
   1625          * Constant for the "Deseret" Unicode character block.
   1626          * @since 1.5
   1627          */
   1628         public static final UnicodeBlock DESERET =
   1629             new UnicodeBlock("DESERET");
   1630 
   1631         /**
   1632          * Constant for the "Shavian" Unicode character block.
   1633          * @since 1.5
   1634          */
   1635         public static final UnicodeBlock SHAVIAN =
   1636             new UnicodeBlock("SHAVIAN");
   1637 
   1638         /**
   1639          * Constant for the "Osmanya" Unicode character block.
   1640          * @since 1.5
   1641          */
   1642         public static final UnicodeBlock OSMANYA =
   1643             new UnicodeBlock("OSMANYA");
   1644 
   1645         /**
   1646          * Constant for the "Cypriot Syllabary" Unicode character block.
   1647          * @since 1.5
   1648          */
   1649         public static final UnicodeBlock CYPRIOT_SYLLABARY =
   1650             new UnicodeBlock("CYPRIOT_SYLLABARY",
   1651                              "CYPRIOT SYLLABARY",
   1652                              "CYPRIOTSYLLABARY");
   1653 
   1654         /**
   1655          * Constant for the "Byzantine Musical Symbols" Unicode character block.
   1656          * @since 1.5
   1657          */
   1658         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
   1659             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
   1660                              "BYZANTINE MUSICAL SYMBOLS",
   1661                              "BYZANTINEMUSICALSYMBOLS");
   1662 
   1663         /**
   1664          * Constant for the "Musical Symbols" Unicode character block.
   1665          * @since 1.5
   1666          */
   1667         public static final UnicodeBlock MUSICAL_SYMBOLS =
   1668             new UnicodeBlock("MUSICAL_SYMBOLS",
   1669                              "MUSICAL SYMBOLS",
   1670                              "MUSICALSYMBOLS");
   1671 
   1672         /**
   1673          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
   1674          * @since 1.5
   1675          */
   1676         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
   1677             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
   1678                              "TAI XUAN JING SYMBOLS",
   1679                              "TAIXUANJINGSYMBOLS");
   1680 
   1681         /**
   1682          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
   1683          * character block.
   1684          * @since 1.5
   1685          */
   1686         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
   1687             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
   1688                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
   1689                              "MATHEMATICALALPHANUMERICSYMBOLS");
   1690 
   1691         /**
   1692          * Constant for the "CJK Unified Ideographs Extension B" Unicode
   1693          * character block.
   1694          * @since 1.5
   1695          */
   1696         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
   1697             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
   1698                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
   1699                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
   1700 
   1701         /**
   1702          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
   1703          * @since 1.5
   1704          */
   1705         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
   1706             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
   1707                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
   1708                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
   1709 
   1710         /**
   1711          * Constant for the "Tags" Unicode character block.
   1712          * @since 1.5
   1713          */
   1714         public static final UnicodeBlock TAGS =
   1715             new UnicodeBlock("TAGS");
   1716 
   1717         /**
   1718          * Constant for the "Variation Selectors Supplement" Unicode character
   1719          * block.
   1720          * @since 1.5
   1721          */
   1722         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
   1723             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
   1724                              "VARIATION SELECTORS SUPPLEMENT",
   1725                              "VARIATIONSELECTORSSUPPLEMENT");
   1726 
   1727         /**
   1728          * Constant for the "Supplementary Private Use Area-A" Unicode character
   1729          * block.
   1730          * @since 1.5
   1731          */
   1732         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
   1733             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
   1734                              "SUPPLEMENTARY PRIVATE USE AREA-A",
   1735                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
   1736 
   1737         /**
   1738          * Constant for the "Supplementary Private Use Area-B" Unicode character
   1739          * block.
   1740          * @since 1.5
   1741          */
   1742         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
   1743             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
   1744                              "SUPPLEMENTARY PRIVATE USE AREA-B",
   1745                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
   1746 
   1747         /**
   1748          * Constant for the "High Surrogates" Unicode character block.
   1749          * This block represents codepoint values in the high surrogate
   1750          * range: U+D800 through U+DB7F
   1751          *
   1752          * @since 1.5
   1753          */
   1754         public static final UnicodeBlock HIGH_SURROGATES =
   1755             new UnicodeBlock("HIGH_SURROGATES",
   1756                              "HIGH SURROGATES",
   1757                              "HIGHSURROGATES");
   1758 
   1759         /**
   1760          * Constant for the "High Private Use Surrogates" Unicode character
   1761          * block.
   1762          * This block represents codepoint values in the private use high
   1763          * surrogate range: U+DB80 through U+DBFF
   1764          *
   1765          * @since 1.5
   1766          */
   1767         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
   1768             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
   1769                              "HIGH PRIVATE USE SURROGATES",
   1770                              "HIGHPRIVATEUSESURROGATES");
   1771 
   1772         /**
   1773          * Constant for the "Low Surrogates" Unicode character block.
   1774          * This block represents codepoint values in the low surrogate
   1775          * range: U+DC00 through U+DFFF
   1776          *
   1777          * @since 1.5
   1778          */
   1779         public static final UnicodeBlock LOW_SURROGATES =
   1780             new UnicodeBlock("LOW_SURROGATES",
   1781                              "LOW SURROGATES",
   1782                              "LOWSURROGATES");
   1783 
   1784         /**
   1785          * Constant for the "Arabic Supplement" Unicode character block.
   1786          * @since 1.7
   1787          */
   1788         public static final UnicodeBlock ARABIC_SUPPLEMENT =
   1789             new UnicodeBlock("ARABIC_SUPPLEMENT",
   1790                              "ARABIC SUPPLEMENT",
   1791                              "ARABICSUPPLEMENT");
   1792 
   1793         /**
   1794          * Constant for the "NKo" Unicode character block.
   1795          * @since 1.7
   1796          */
   1797         public static final UnicodeBlock NKO =
   1798             new UnicodeBlock("NKO");
   1799 
   1800         /**
   1801          * Constant for the "Samaritan" Unicode character block.
   1802          * @since 1.7
   1803          */
   1804         public static final UnicodeBlock SAMARITAN =
   1805             new UnicodeBlock("SAMARITAN");
   1806 
   1807         /**
   1808          * Constant for the "Mandaic" Unicode character block.
   1809          * @since 1.7
   1810          */
   1811         public static final UnicodeBlock MANDAIC =
   1812             new UnicodeBlock("MANDAIC");
   1813 
   1814         /**
   1815          * Constant for the "Ethiopic Supplement" Unicode character block.
   1816          * @since 1.7
   1817          */
   1818         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
   1819             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
   1820                              "ETHIOPIC SUPPLEMENT",
   1821                              "ETHIOPICSUPPLEMENT");
   1822 
   1823         /**
   1824          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
   1825          * Unicode character block.
   1826          * @since 1.7
   1827          */
   1828         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
   1829             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
   1830                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
   1831                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
   1832 
   1833         /**
   1834          * Constant for the "New Tai Lue" Unicode character block.
   1835          * @since 1.7
   1836          */
   1837         public static final UnicodeBlock NEW_TAI_LUE =
   1838             new UnicodeBlock("NEW_TAI_LUE",
   1839                              "NEW TAI LUE",
   1840                              "NEWTAILUE");
   1841 
   1842         /**
   1843          * Constant for the "Buginese" Unicode character block.
   1844          * @since 1.7
   1845          */
   1846         public static final UnicodeBlock BUGINESE =
   1847             new UnicodeBlock("BUGINESE");
   1848 
   1849         /**
   1850          * Constant for the "Tai Tham" Unicode character block.
   1851          * @since 1.7
   1852          */
   1853         public static final UnicodeBlock TAI_THAM =
   1854             new UnicodeBlock("TAI_THAM",
   1855                              "TAI THAM",
   1856                              "TAITHAM");
   1857 
   1858         /**
   1859          * Constant for the "Balinese" Unicode character block.
   1860          * @since 1.7
   1861          */
   1862         public static final UnicodeBlock BALINESE =
   1863             new UnicodeBlock("BALINESE");
   1864 
   1865         /**
   1866          * Constant for the "Sundanese" Unicode character block.
   1867          * @since 1.7
   1868          */
   1869         public static final UnicodeBlock SUNDANESE =
   1870             new UnicodeBlock("SUNDANESE");
   1871 
   1872         /**
   1873          * Constant for the "Batak" Unicode character block.
   1874          * @since 1.7
   1875          */
   1876         public static final UnicodeBlock BATAK =
   1877             new UnicodeBlock("BATAK");
   1878 
   1879         /**
   1880          * Constant for the "Lepcha" Unicode character block.
   1881          * @since 1.7
   1882          */
   1883         public static final UnicodeBlock LEPCHA =
   1884             new UnicodeBlock("LEPCHA");
   1885 
   1886         /**
   1887          * Constant for the "Ol Chiki" Unicode character block.
   1888          * @since 1.7
   1889          */
   1890         public static final UnicodeBlock OL_CHIKI =
   1891             new UnicodeBlock("OL_CHIKI",
   1892                              "OL CHIKI",
   1893                              "OLCHIKI");
   1894 
   1895         /**
   1896          * Constant for the "Vedic Extensions" Unicode character block.
   1897          * @since 1.7
   1898          */
   1899         public static final UnicodeBlock VEDIC_EXTENSIONS =
   1900             new UnicodeBlock("VEDIC_EXTENSIONS",
   1901                              "VEDIC EXTENSIONS",
   1902                              "VEDICEXTENSIONS");
   1903 
   1904         /**
   1905          * Constant for the "Phonetic Extensions Supplement" Unicode character
   1906          * block.
   1907          * @since 1.7
   1908          */
   1909         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
   1910             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
   1911                              "PHONETIC EXTENSIONS SUPPLEMENT",
   1912                              "PHONETICEXTENSIONSSUPPLEMENT");
   1913 
   1914         /**
   1915          * Constant for the "Combining Diacritical Marks Supplement" Unicode
   1916          * character block.
   1917          * @since 1.7
   1918          */
   1919         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
   1920             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
   1921                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
   1922                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
   1923 
   1924         /**
   1925          * Constant for the "Glagolitic" Unicode character block.
   1926          * @since 1.7
   1927          */
   1928         public static final UnicodeBlock GLAGOLITIC =
   1929             new UnicodeBlock("GLAGOLITIC");
   1930 
   1931         /**
   1932          * Constant for the "Latin Extended-C" Unicode character block.
   1933          * @since 1.7
   1934          */
   1935         public static final UnicodeBlock LATIN_EXTENDED_C =
   1936             new UnicodeBlock("LATIN_EXTENDED_C",
   1937                              "LATIN EXTENDED-C",
   1938                              "LATINEXTENDED-C");
   1939 
   1940         /**
   1941          * Constant for the "Coptic" Unicode character block.
   1942          * @since 1.7
   1943          */
   1944         public static final UnicodeBlock COPTIC =
   1945             new UnicodeBlock("COPTIC");
   1946 
   1947         /**
   1948          * Constant for the "Georgian Supplement" Unicode character block.
   1949          * @since 1.7
   1950          */
   1951         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
   1952             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
   1953                              "GEORGIAN SUPPLEMENT",
   1954                              "GEORGIANSUPPLEMENT");
   1955 
   1956         /**
   1957          * Constant for the "Tifinagh" Unicode character block.
   1958          * @since 1.7
   1959          */
   1960         public static final UnicodeBlock TIFINAGH =
   1961             new UnicodeBlock("TIFINAGH");
   1962 
   1963         /**
   1964          * Constant for the "Ethiopic Extended" Unicode character block.
   1965          * @since 1.7
   1966          */
   1967         public static final UnicodeBlock ETHIOPIC_EXTENDED =
   1968             new UnicodeBlock("ETHIOPIC_EXTENDED",
   1969                              "ETHIOPIC EXTENDED",
   1970                              "ETHIOPICEXTENDED");
   1971 
   1972         /**
   1973          * Constant for the "Cyrillic Extended-A" Unicode character block.
   1974          * @since 1.7
   1975          */
   1976         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
   1977             new UnicodeBlock("CYRILLIC_EXTENDED_A",
   1978                              "CYRILLIC EXTENDED-A",
   1979                              "CYRILLICEXTENDED-A");
   1980 
   1981         /**
   1982          * Constant for the "Supplemental Punctuation" Unicode character block.
   1983          * @since 1.7
   1984          */
   1985         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
   1986             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
   1987                              "SUPPLEMENTAL PUNCTUATION",
   1988                              "SUPPLEMENTALPUNCTUATION");
   1989 
   1990         /**
   1991          * Constant for the "CJK Strokes" Unicode character block.
   1992          * @since 1.7
   1993          */
   1994         public static final UnicodeBlock CJK_STROKES =
   1995             new UnicodeBlock("CJK_STROKES",
   1996                              "CJK STROKES",
   1997                              "CJKSTROKES");
   1998 
   1999         /**
   2000          * Constant for the "Lisu" Unicode character block.
   2001          * @since 1.7
   2002          */
   2003         public static final UnicodeBlock LISU =
   2004             new UnicodeBlock("LISU");
   2005 
   2006         /**
   2007          * Constant for the "Vai" Unicode character block.
   2008          * @since 1.7
   2009          */
   2010         public static final UnicodeBlock VAI =
   2011             new UnicodeBlock("VAI");
   2012 
   2013         /**
   2014          * Constant for the "Cyrillic Extended-B" Unicode character block.
   2015          * @since 1.7
   2016          */
   2017         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
   2018             new UnicodeBlock("CYRILLIC_EXTENDED_B",
   2019                              "CYRILLIC EXTENDED-B",
   2020                              "CYRILLICEXTENDED-B");
   2021 
   2022         /**
   2023          * Constant for the "Bamum" Unicode character block.
   2024          * @since 1.7
   2025          */
   2026         public static final UnicodeBlock BAMUM =
   2027             new UnicodeBlock("BAMUM");
   2028 
   2029         /**
   2030          * Constant for the "Modifier Tone Letters" Unicode character block.
   2031          * @since 1.7
   2032          */
   2033         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
   2034             new UnicodeBlock("MODIFIER_TONE_LETTERS",
   2035                              "MODIFIER TONE LETTERS",
   2036                              "MODIFIERTONELETTERS");
   2037 
   2038         /**
   2039          * Constant for the "Latin Extended-D" Unicode character block.
   2040          * @since 1.7
   2041          */
   2042         public static final UnicodeBlock LATIN_EXTENDED_D =
   2043             new UnicodeBlock("LATIN_EXTENDED_D",
   2044                              "LATIN EXTENDED-D",
   2045                              "LATINEXTENDED-D");
   2046 
   2047         /**
   2048          * Constant for the "Syloti Nagri" Unicode character block.
   2049          * @since 1.7
   2050          */
   2051         public static final UnicodeBlock SYLOTI_NAGRI =
   2052             new UnicodeBlock("SYLOTI_NAGRI",
   2053                              "SYLOTI NAGRI",
   2054                              "SYLOTINAGRI");
   2055 
   2056         /**
   2057          * Constant for the "Common Indic Number Forms" Unicode character block.
   2058          * @since 1.7
   2059          */
   2060         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
   2061             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
   2062                              "COMMON INDIC NUMBER FORMS",
   2063                              "COMMONINDICNUMBERFORMS");
   2064 
   2065         /**
   2066          * Constant for the "Phags-pa" Unicode character block.
   2067          * @since 1.7
   2068          */
   2069         public static final UnicodeBlock PHAGS_PA =
   2070             new UnicodeBlock("PHAGS_PA",
   2071                              "PHAGS-PA");
   2072 
   2073         /**
   2074          * Constant for the "Saurashtra" Unicode character block.
   2075          * @since 1.7
   2076          */
   2077         public static final UnicodeBlock SAURASHTRA =
   2078             new UnicodeBlock("SAURASHTRA");
   2079 
   2080         /**
   2081          * Constant for the "Devanagari Extended" Unicode character block.
   2082          * @since 1.7
   2083          */
   2084         public static final UnicodeBlock DEVANAGARI_EXTENDED =
   2085             new UnicodeBlock("DEVANAGARI_EXTENDED",
   2086                              "DEVANAGARI EXTENDED",
   2087                              "DEVANAGARIEXTENDED");
   2088 
   2089         /**
   2090          * Constant for the "Kayah Li" Unicode character block.
   2091          * @since 1.7
   2092          */
   2093         public static final UnicodeBlock KAYAH_LI =
   2094             new UnicodeBlock("KAYAH_LI",
   2095                              "KAYAH LI",
   2096                              "KAYAHLI");
   2097 
   2098         /**
   2099          * Constant for the "Rejang" Unicode character block.
   2100          * @since 1.7
   2101          */
   2102         public static final UnicodeBlock REJANG =
   2103             new UnicodeBlock("REJANG");
   2104 
   2105         /**
   2106          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
   2107          * @since 1.7
   2108          */
   2109         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
   2110             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
   2111                              "HANGUL JAMO EXTENDED-A",
   2112                              "HANGULJAMOEXTENDED-A");
   2113 
   2114         /**
   2115          * Constant for the "Javanese" Unicode character block.
   2116          * @since 1.7
   2117          */
   2118         public static final UnicodeBlock JAVANESE =
   2119             new UnicodeBlock("JAVANESE");
   2120 
   2121         /**
   2122          * Constant for the "Cham" Unicode character block.
   2123          * @since 1.7
   2124          */
   2125         public static final UnicodeBlock CHAM =
   2126             new UnicodeBlock("CHAM");
   2127 
   2128         /**
   2129          * Constant for the "Myanmar Extended-A" Unicode character block.
   2130          * @since 1.7
   2131          */
   2132         public static final UnicodeBlock MYANMAR_EXTENDED_A =
   2133             new UnicodeBlock("MYANMAR_EXTENDED_A",
   2134                              "MYANMAR EXTENDED-A",
   2135                              "MYANMAREXTENDED-A");
   2136 
   2137         /**
   2138          * Constant for the "Tai Viet" Unicode character block.
   2139          * @since 1.7
   2140          */
   2141         public static final UnicodeBlock TAI_VIET =
   2142             new UnicodeBlock("TAI_VIET",
   2143                              "TAI VIET",
   2144                              "TAIVIET");
   2145 
   2146         /**
   2147          * Constant for the "Ethiopic Extended-A" Unicode character block.
   2148          * @since 1.7
   2149          */
   2150         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
   2151             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
   2152                              "ETHIOPIC EXTENDED-A",
   2153                              "ETHIOPICEXTENDED-A");
   2154 
   2155         /**
   2156          * Constant for the "Meetei Mayek" Unicode character block.
   2157          * @since 1.7
   2158          */
   2159         public static final UnicodeBlock MEETEI_MAYEK =
   2160             new UnicodeBlock("MEETEI_MAYEK",
   2161                              "MEETEI MAYEK",
   2162                              "MEETEIMAYEK");
   2163 
   2164         /**
   2165          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
   2166          * @since 1.7
   2167          */
   2168         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
   2169             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
   2170                              "HANGUL JAMO EXTENDED-B",
   2171                              "HANGULJAMOEXTENDED-B");
   2172 
   2173         /**
   2174          * Constant for the "Vertical Forms" Unicode character block.
   2175          * @since 1.7
   2176          */
   2177         public static final UnicodeBlock VERTICAL_FORMS =
   2178             new UnicodeBlock("VERTICAL_FORMS",
   2179                              "VERTICAL FORMS",
   2180                              "VERTICALFORMS");
   2181 
   2182         /**
   2183          * Constant for the "Ancient Greek Numbers" Unicode character block.
   2184          * @since 1.7
   2185          */
   2186         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
   2187             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
   2188                              "ANCIENT GREEK NUMBERS",
   2189                              "ANCIENTGREEKNUMBERS");
   2190 
   2191         /**
   2192          * Constant for the "Ancient Symbols" Unicode character block.
   2193          * @since 1.7
   2194          */
   2195         public static final UnicodeBlock ANCIENT_SYMBOLS =
   2196             new UnicodeBlock("ANCIENT_SYMBOLS",
   2197                              "ANCIENT SYMBOLS",
   2198                              "ANCIENTSYMBOLS");
   2199 
   2200         /**
   2201          * Constant for the "Phaistos Disc" Unicode character block.
   2202          * @since 1.7
   2203          */
   2204         public static final UnicodeBlock PHAISTOS_DISC =
   2205             new UnicodeBlock("PHAISTOS_DISC",
   2206                              "PHAISTOS DISC",
   2207                              "PHAISTOSDISC");
   2208 
   2209         /**
   2210          * Constant for the "Lycian" Unicode character block.
   2211          * @since 1.7
   2212          */
   2213         public static final UnicodeBlock LYCIAN =
   2214             new UnicodeBlock("LYCIAN");
   2215 
   2216         /**
   2217          * Constant for the "Carian" Unicode character block.
   2218          * @since 1.7
   2219          */
   2220         public static final UnicodeBlock CARIAN =
   2221             new UnicodeBlock("CARIAN");
   2222 
   2223         /**
   2224          * Constant for the "Old Persian" Unicode character block.
   2225          * @since 1.7
   2226          */
   2227         public static final UnicodeBlock OLD_PERSIAN =
   2228             new UnicodeBlock("OLD_PERSIAN",
   2229                              "OLD PERSIAN",
   2230                              "OLDPERSIAN");
   2231 
   2232         /**
   2233          * Constant for the "Imperial Aramaic" Unicode character block.
   2234          * @since 1.7
   2235          */
   2236         public static final UnicodeBlock IMPERIAL_ARAMAIC =
   2237             new UnicodeBlock("IMPERIAL_ARAMAIC",
   2238                              "IMPERIAL ARAMAIC",
   2239                              "IMPERIALARAMAIC");
   2240 
   2241         /**
   2242          * Constant for the "Phoenician" Unicode character block.
   2243          * @since 1.7
   2244          */
   2245         public static final UnicodeBlock PHOENICIAN =
   2246             new UnicodeBlock("PHOENICIAN");
   2247 
   2248         /**
   2249          * Constant for the "Lydian" Unicode character block.
   2250          * @since 1.7
   2251          */
   2252         public static final UnicodeBlock LYDIAN =
   2253             new UnicodeBlock("LYDIAN");
   2254 
   2255         /**
   2256          * Constant for the "Kharoshthi" Unicode character block.
   2257          * @since 1.7
   2258          */
   2259         public static final UnicodeBlock KHAROSHTHI =
   2260             new UnicodeBlock("KHAROSHTHI");
   2261 
   2262         /**
   2263          * Constant for the "Old South Arabian" Unicode character block.
   2264          * @since 1.7
   2265          */
   2266         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
   2267             new UnicodeBlock("OLD_SOUTH_ARABIAN",
   2268                              "OLD SOUTH ARABIAN",
   2269                              "OLDSOUTHARABIAN");
   2270 
   2271         /**
   2272          * Constant for the "Avestan" Unicode character block.
   2273          * @since 1.7
   2274          */
   2275         public static final UnicodeBlock AVESTAN =
   2276             new UnicodeBlock("AVESTAN");
   2277 
   2278         /**
   2279          * Constant for the "Inscriptional Parthian" Unicode character block.
   2280          * @since 1.7
   2281          */
   2282         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
   2283             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
   2284                              "INSCRIPTIONAL PARTHIAN",
   2285                              "INSCRIPTIONALPARTHIAN");
   2286 
   2287         /**
   2288          * Constant for the "Inscriptional Pahlavi" Unicode character block.
   2289          * @since 1.7
   2290          */
   2291         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
   2292             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
   2293                              "INSCRIPTIONAL PAHLAVI",
   2294                              "INSCRIPTIONALPAHLAVI");
   2295 
   2296         /**
   2297          * Constant for the "Old Turkic" Unicode character block.
   2298          * @since 1.7
   2299          */
   2300         public static final UnicodeBlock OLD_TURKIC =
   2301             new UnicodeBlock("OLD_TURKIC",
   2302                              "OLD TURKIC",
   2303                              "OLDTURKIC");
   2304 
   2305         /**
   2306          * Constant for the "Rumi Numeral Symbols" Unicode character block.
   2307          * @since 1.7
   2308          */
   2309         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
   2310             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
   2311                              "RUMI NUMERAL SYMBOLS",
   2312                              "RUMINUMERALSYMBOLS");
   2313 
   2314         /**
   2315          * Constant for the "Brahmi" Unicode character block.
   2316          * @since 1.7
   2317          */
   2318         public static final UnicodeBlock BRAHMI =
   2319             new UnicodeBlock("BRAHMI");
   2320 
   2321         /**
   2322          * Constant for the "Kaithi" Unicode character block.
   2323          * @since 1.7
   2324          */
   2325         public static final UnicodeBlock KAITHI =
   2326             new UnicodeBlock("KAITHI");
   2327 
   2328         /**
   2329          * Constant for the "Cuneiform" Unicode character block.
   2330          * @since 1.7
   2331          */
   2332         public static final UnicodeBlock CUNEIFORM =
   2333             new UnicodeBlock("CUNEIFORM");
   2334 
   2335         /**
   2336          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
   2337          * character block.
   2338          * @since 1.7
   2339          */
   2340         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
   2341             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
   2342                              "CUNEIFORM NUMBERS AND PUNCTUATION",
   2343                              "CUNEIFORMNUMBERSANDPUNCTUATION");
   2344 
   2345         /**
   2346          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
   2347          * @since 1.7
   2348          */
   2349         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
   2350             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
   2351                              "EGYPTIAN HIEROGLYPHS",
   2352                              "EGYPTIANHIEROGLYPHS");
   2353 
   2354         /**
   2355          * Constant for the "Bamum Supplement" Unicode character block.
   2356          * @since 1.7
   2357          */
   2358         public static final UnicodeBlock BAMUM_SUPPLEMENT =
   2359             new UnicodeBlock("BAMUM_SUPPLEMENT",
   2360                              "BAMUM SUPPLEMENT",
   2361                              "BAMUMSUPPLEMENT");
   2362 
   2363         /**
   2364          * Constant for the "Kana Supplement" Unicode character block.
   2365          * @since 1.7
   2366          */
   2367         public static final UnicodeBlock KANA_SUPPLEMENT =
   2368             new UnicodeBlock("KANA_SUPPLEMENT",
   2369                              "KANA SUPPLEMENT",
   2370                              "KANASUPPLEMENT");
   2371 
   2372         /**
   2373          * Constant for the "Ancient Greek Musical Notation" Unicode character
   2374          * block.
   2375          * @since 1.7
   2376          */
   2377         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
   2378             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
   2379                              "ANCIENT GREEK MUSICAL NOTATION",
   2380                              "ANCIENTGREEKMUSICALNOTATION");
   2381 
   2382         /**
   2383          * Constant for the "Counting Rod Numerals" Unicode character block.
   2384          * @since 1.7
   2385          */
   2386         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
   2387             new UnicodeBlock("COUNTING_ROD_NUMERALS",
   2388                              "COUNTING ROD NUMERALS",
   2389                              "COUNTINGRODNUMERALS");
   2390 
   2391         /**
   2392          * Constant for the "Mahjong Tiles" Unicode character block.
   2393          * @since 1.7
   2394          */
   2395         public static final UnicodeBlock MAHJONG_TILES =
   2396             new UnicodeBlock("MAHJONG_TILES",
   2397                              "MAHJONG TILES",
   2398                              "MAHJONGTILES");
   2399 
   2400         /**
   2401          * Constant for the "Domino Tiles" Unicode character block.
   2402          * @since 1.7
   2403          */
   2404         public static final UnicodeBlock DOMINO_TILES =
   2405             new UnicodeBlock("DOMINO_TILES",
   2406                              "DOMINO TILES",
   2407                              "DOMINOTILES");
   2408 
   2409         /**
   2410          * Constant for the "Playing Cards" Unicode character block.
   2411          * @since 1.7
   2412          */
   2413         public static final UnicodeBlock PLAYING_CARDS =
   2414             new UnicodeBlock("PLAYING_CARDS",
   2415                              "PLAYING CARDS",
   2416                              "PLAYINGCARDS");
   2417 
   2418         /**
   2419          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
   2420          * block.
   2421          * @since 1.7
   2422          */
   2423         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
   2424             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
   2425                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
   2426                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
   2427 
   2428         /**
   2429          * Constant for the "Enclosed Ideographic Supplement" Unicode character
   2430          * block.
   2431          * @since 1.7
   2432          */
   2433         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
   2434             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
   2435                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
   2436                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
   2437 
   2438         /**
   2439          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
   2440          * character block.
   2441          * @since 1.7
   2442          */
   2443         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
   2444             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
   2445                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
   2446                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
   2447 
   2448         /**
   2449          * Constant for the "Emoticons" Unicode character block.
   2450          * @since 1.7
   2451          */
   2452         public static final UnicodeBlock EMOTICONS =
   2453             new UnicodeBlock("EMOTICONS");
   2454 
   2455         /**
   2456          * Constant for the "Transport And Map Symbols" Unicode character block.
   2457          * @since 1.7
   2458          */
   2459         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
   2460             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
   2461                              "TRANSPORT AND MAP SYMBOLS",
   2462                              "TRANSPORTANDMAPSYMBOLS");
   2463 
   2464         /**
   2465          * Constant for the "Alchemical Symbols" Unicode character block.
   2466          * @since 1.7
   2467          */
   2468         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
   2469             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
   2470                              "ALCHEMICAL SYMBOLS",
   2471                              "ALCHEMICALSYMBOLS");
   2472 
   2473         /**
   2474          * Constant for the "CJK Unified Ideographs Extension C" Unicode
   2475          * character block.
   2476          * @since 1.7
   2477          */
   2478         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
   2479             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
   2480                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
   2481                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
   2482 
   2483         /**
   2484          * Constant for the "CJK Unified Ideographs Extension D" Unicode
   2485          * character block.
   2486          * @since 1.7
   2487          */
   2488         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
   2489             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
   2490                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
   2491                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
   2492 
   2493         /**
   2494          * Constant for the "Arabic Extended-A" Unicode character block.
   2495          * @since 1.8
   2496          */
   2497         public static final UnicodeBlock ARABIC_EXTENDED_A =
   2498             new UnicodeBlock("ARABIC_EXTENDED_A",
   2499                              "ARABIC EXTENDED-A",
   2500                              "ARABICEXTENDED-A");
   2501 
   2502         /**
   2503          * Constant for the "Sundanese Supplement" Unicode character block.
   2504          * @since 1.8
   2505          */
   2506         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
   2507             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
   2508                              "SUNDANESE SUPPLEMENT",
   2509                              "SUNDANESESUPPLEMENT");
   2510 
   2511         /**
   2512          * Constant for the "Meetei Mayek Extensions" Unicode character block.
   2513          * @since 1.8
   2514          */
   2515         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
   2516             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
   2517                              "MEETEI MAYEK EXTENSIONS",
   2518                              "MEETEIMAYEKEXTENSIONS");
   2519 
   2520         /**
   2521          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
   2522          * @since 1.8
   2523          */
   2524         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
   2525             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
   2526                              "MEROITIC HIEROGLYPHS",
   2527                              "MEROITICHIEROGLYPHS");
   2528 
   2529         /**
   2530          * Constant for the "Meroitic Cursive" Unicode character block.
   2531          * @since 1.8
   2532          */
   2533         public static final UnicodeBlock MEROITIC_CURSIVE =
   2534             new UnicodeBlock("MEROITIC_CURSIVE",
   2535                              "MEROITIC CURSIVE",
   2536                              "MEROITICCURSIVE");
   2537 
   2538         /**
   2539          * Constant for the "Sora Sompeng" Unicode character block.
   2540          * @since 1.8
   2541          */
   2542         public static final UnicodeBlock SORA_SOMPENG =
   2543             new UnicodeBlock("SORA_SOMPENG",
   2544                              "SORA SOMPENG",
   2545                              "SORASOMPENG");
   2546 
   2547         /**
   2548          * Constant for the "Chakma" Unicode character block.
   2549          * @since 1.8
   2550          */
   2551         public static final UnicodeBlock CHAKMA =
   2552             new UnicodeBlock("CHAKMA");
   2553 
   2554         /**
   2555          * Constant for the "Sharada" Unicode character block.
   2556          * @since 1.8
   2557          */
   2558         public static final UnicodeBlock SHARADA =
   2559             new UnicodeBlock("SHARADA");
   2560 
   2561         /**
   2562          * Constant for the "Takri" Unicode character block.
   2563          * @since 1.8
   2564          */
   2565         public static final UnicodeBlock TAKRI =
   2566             new UnicodeBlock("TAKRI");
   2567 
   2568         /**
   2569          * Constant for the "Miao" Unicode character block.
   2570          * @since 1.8
   2571          */
   2572         public static final UnicodeBlock MIAO =
   2573             new UnicodeBlock("MIAO");
   2574 
   2575         /**
   2576          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
   2577          * character block.
   2578          * @since 1.8
   2579          */
   2580         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
   2581             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
   2582                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
   2583                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
   2584 
   2585         private static final int blockStarts[] = {
   2586             0x0000,   // 0000..007F; Basic Latin
   2587             0x0080,   // 0080..00FF; Latin-1 Supplement
   2588             0x0100,   // 0100..017F; Latin Extended-A
   2589             0x0180,   // 0180..024F; Latin Extended-B
   2590             0x0250,   // 0250..02AF; IPA Extensions
   2591             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
   2592             0x0300,   // 0300..036F; Combining Diacritical Marks
   2593             0x0370,   // 0370..03FF; Greek and Coptic
   2594             0x0400,   // 0400..04FF; Cyrillic
   2595             0x0500,   // 0500..052F; Cyrillic Supplement
   2596             0x0530,   // 0530..058F; Armenian
   2597             0x0590,   // 0590..05FF; Hebrew
   2598             0x0600,   // 0600..06FF; Arabic
   2599             0x0700,   // 0700..074F; Syriac
   2600             0x0750,   // 0750..077F; Arabic Supplement
   2601             0x0780,   // 0780..07BF; Thaana
   2602             0x07C0,   // 07C0..07FF; NKo
   2603             0x0800,   // 0800..083F; Samaritan
   2604             0x0840,   // 0840..085F; Mandaic
   2605             0x0860,   //             unassigned
   2606             0x08A0,   // 08A0..08FF; Arabic Extended-A
   2607             0x0900,   // 0900..097F; Devanagari
   2608             0x0980,   // 0980..09FF; Bengali
   2609             0x0A00,   // 0A00..0A7F; Gurmukhi
   2610             0x0A80,   // 0A80..0AFF; Gujarati
   2611             0x0B00,   // 0B00..0B7F; Oriya
   2612             0x0B80,   // 0B80..0BFF; Tamil
   2613             0x0C00,   // 0C00..0C7F; Telugu
   2614             0x0C80,   // 0C80..0CFF; Kannada
   2615             0x0D00,   // 0D00..0D7F; Malayalam
   2616             0x0D80,   // 0D80..0DFF; Sinhala
   2617             0x0E00,   // 0E00..0E7F; Thai
   2618             0x0E80,   // 0E80..0EFF; Lao
   2619             0x0F00,   // 0F00..0FFF; Tibetan
   2620             0x1000,   // 1000..109F; Myanmar
   2621             0x10A0,   // 10A0..10FF; Georgian
   2622             0x1100,   // 1100..11FF; Hangul Jamo
   2623             0x1200,   // 1200..137F; Ethiopic
   2624             0x1380,   // 1380..139F; Ethiopic Supplement
   2625             0x13A0,   // 13A0..13FF; Cherokee
   2626             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
   2627             0x1680,   // 1680..169F; Ogham
   2628             0x16A0,   // 16A0..16FF; Runic
   2629             0x1700,   // 1700..171F; Tagalog
   2630             0x1720,   // 1720..173F; Hanunoo
   2631             0x1740,   // 1740..175F; Buhid
   2632             0x1760,   // 1760..177F; Tagbanwa
   2633             0x1780,   // 1780..17FF; Khmer
   2634             0x1800,   // 1800..18AF; Mongolian
   2635             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
   2636             0x1900,   // 1900..194F; Limbu
   2637             0x1950,   // 1950..197F; Tai Le
   2638             0x1980,   // 1980..19DF; New Tai Lue
   2639             0x19E0,   // 19E0..19FF; Khmer Symbols
   2640             0x1A00,   // 1A00..1A1F; Buginese
   2641             0x1A20,   // 1A20..1AAF; Tai Tham
   2642             0x1AB0,   //             unassigned
   2643             0x1B00,   // 1B00..1B7F; Balinese
   2644             0x1B80,   // 1B80..1BBF; Sundanese
   2645             0x1BC0,   // 1BC0..1BFF; Batak
   2646             0x1C00,   // 1C00..1C4F; Lepcha
   2647             0x1C50,   // 1C50..1C7F; Ol Chiki
   2648             0x1C80,   //             unassigned
   2649             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
   2650             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
   2651             0x1D00,   // 1D00..1D7F; Phonetic Extensions
   2652             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
   2653             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
   2654             0x1E00,   // 1E00..1EFF; Latin Extended Additional
   2655             0x1F00,   // 1F00..1FFF; Greek Extended
   2656             0x2000,   // 2000..206F; General Punctuation
   2657             0x2070,   // 2070..209F; Superscripts and Subscripts
   2658             0x20A0,   // 20A0..20CF; Currency Symbols
   2659             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
   2660             0x2100,   // 2100..214F; Letterlike Symbols
   2661             0x2150,   // 2150..218F; Number Forms
   2662             0x2190,   // 2190..21FF; Arrows
   2663             0x2200,   // 2200..22FF; Mathematical Operators
   2664             0x2300,   // 2300..23FF; Miscellaneous Technical
   2665             0x2400,   // 2400..243F; Control Pictures
   2666             0x2440,   // 2440..245F; Optical Character Recognition
   2667             0x2460,   // 2460..24FF; Enclosed Alphanumerics
   2668             0x2500,   // 2500..257F; Box Drawing
   2669             0x2580,   // 2580..259F; Block Elements
   2670             0x25A0,   // 25A0..25FF; Geometric Shapes
   2671             0x2600,   // 2600..26FF; Miscellaneous Symbols
   2672             0x2700,   // 2700..27BF; Dingbats
   2673             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
   2674             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
   2675             0x2800,   // 2800..28FF; Braille Patterns
   2676             0x2900,   // 2900..297F; Supplemental Arrows-B
   2677             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
   2678             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
   2679             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
   2680             0x2C00,   // 2C00..2C5F; Glagolitic
   2681             0x2C60,   // 2C60..2C7F; Latin Extended-C
   2682             0x2C80,   // 2C80..2CFF; Coptic
   2683             0x2D00,   // 2D00..2D2F; Georgian Supplement
   2684             0x2D30,   // 2D30..2D7F; Tifinagh
   2685             0x2D80,   // 2D80..2DDF; Ethiopic Extended
   2686             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
   2687             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
   2688             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
   2689             0x2F00,   // 2F00..2FDF; Kangxi Radicals
   2690             0x2FE0,   //             unassigned
   2691             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
   2692             0x3000,   // 3000..303F; CJK Symbols and Punctuation
   2693             0x3040,   // 3040..309F; Hiragana
   2694             0x30A0,   // 30A0..30FF; Katakana
   2695             0x3100,   // 3100..312F; Bopomofo
   2696             0x3130,   // 3130..318F; Hangul Compatibility Jamo
   2697             0x3190,   // 3190..319F; Kanbun
   2698             0x31A0,   // 31A0..31BF; Bopomofo Extended
   2699             0x31C0,   // 31C0..31EF; CJK Strokes
   2700             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
   2701             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
   2702             0x3300,   // 3300..33FF; CJK Compatibility
   2703             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
   2704             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
   2705             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
   2706             0xA000,   // A000..A48F; Yi Syllables
   2707             0xA490,   // A490..A4CF; Yi Radicals
   2708             0xA4D0,   // A4D0..A4FF; Lisu
   2709             0xA500,   // A500..A63F; Vai
   2710             0xA640,   // A640..A69F; Cyrillic Extended-B
   2711             0xA6A0,   // A6A0..A6FF; Bamum
   2712             0xA700,   // A700..A71F; Modifier Tone Letters
   2713             0xA720,   // A720..A7FF; Latin Extended-D
   2714             0xA800,   // A800..A82F; Syloti Nagri
   2715             0xA830,   // A830..A83F; Common Indic Number Forms
   2716             0xA840,   // A840..A87F; Phags-pa
   2717             0xA880,   // A880..A8DF; Saurashtra
   2718             0xA8E0,   // A8E0..A8FF; Devanagari Extended
   2719             0xA900,   // A900..A92F; Kayah Li
   2720             0xA930,   // A930..A95F; Rejang
   2721             0xA960,   // A960..A97F; Hangul Jamo Extended-A
   2722             0xA980,   // A980..A9DF; Javanese
   2723             0xA9E0,   //             unassigned
   2724             0xAA00,   // AA00..AA5F; Cham
   2725             0xAA60,   // AA60..AA7F; Myanmar Extended-A
   2726             0xAA80,   // AA80..AADF; Tai Viet
   2727             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
   2728             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
   2729             0xAB30,   //             unassigned
   2730             0xABC0,   // ABC0..ABFF; Meetei Mayek
   2731             0xAC00,   // AC00..D7AF; Hangul Syllables
   2732             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
   2733             0xD800,   // D800..DB7F; High Surrogates
   2734             0xDB80,   // DB80..DBFF; High Private Use Surrogates
   2735             0xDC00,   // DC00..DFFF; Low Surrogates
   2736             0xE000,   // E000..F8FF; Private Use Area
   2737             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
   2738             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
   2739             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
   2740             0xFE00,   // FE00..FE0F; Variation Selectors
   2741             0xFE10,   // FE10..FE1F; Vertical Forms
   2742             0xFE20,   // FE20..FE2F; Combining Half Marks
   2743             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
   2744             0xFE50,   // FE50..FE6F; Small Form Variants
   2745             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
   2746             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
   2747             0xFFF0,   // FFF0..FFFF; Specials
   2748             0x10000,  // 10000..1007F; Linear B Syllabary
   2749             0x10080,  // 10080..100FF; Linear B Ideograms
   2750             0x10100,  // 10100..1013F; Aegean Numbers
   2751             0x10140,  // 10140..1018F; Ancient Greek Numbers
   2752             0x10190,  // 10190..101CF; Ancient Symbols
   2753             0x101D0,  // 101D0..101FF; Phaistos Disc
   2754             0x10200,  //               unassigned
   2755             0x10280,  // 10280..1029F; Lycian
   2756             0x102A0,  // 102A0..102DF; Carian
   2757             0x102E0,  //               unassigned
   2758             0x10300,  // 10300..1032F; Old Italic
   2759             0x10330,  // 10330..1034F; Gothic
   2760             0x10350,  //               unassigned
   2761             0x10380,  // 10380..1039F; Ugaritic
   2762             0x103A0,  // 103A0..103DF; Old Persian
   2763             0x103E0,  //               unassigned
   2764             0x10400,  // 10400..1044F; Deseret
   2765             0x10450,  // 10450..1047F; Shavian
   2766             0x10480,  // 10480..104AF; Osmanya
   2767             0x104B0,  //               unassigned
   2768             0x10800,  // 10800..1083F; Cypriot Syllabary
   2769             0x10840,  // 10840..1085F; Imperial Aramaic
   2770             0x10860,  //               unassigned
   2771             0x10900,  // 10900..1091F; Phoenician
   2772             0x10920,  // 10920..1093F; Lydian
   2773             0x10940,  //               unassigned
   2774             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
   2775             0x109A0,  // 109A0..109FF; Meroitic Cursive
   2776             0x10A00,  // 10A00..10A5F; Kharoshthi
   2777             0x10A60,  // 10A60..10A7F; Old South Arabian
   2778             0x10A80,  //               unassigned
   2779             0x10B00,  // 10B00..10B3F; Avestan
   2780             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
   2781             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
   2782             0x10B80,  //               unassigned
   2783             0x10C00,  // 10C00..10C4F; Old Turkic
   2784             0x10C50,  //               unassigned
   2785             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
   2786             0x10E80,  //               unassigned
   2787             0x11000,  // 11000..1107F; Brahmi
   2788             0x11080,  // 11080..110CF; Kaithi
   2789             0x110D0,  // 110D0..110FF; Sora Sompeng
   2790             0x11100,  // 11100..1114F; Chakma
   2791             0x11150,  //               unassigned
   2792             0x11180,  // 11180..111DF; Sharada
   2793             0x111E0,  //               unassigned
   2794             0x11680,  // 11680..116CF; Takri
   2795             0x116D0,  //               unassigned
   2796             0x12000,  // 12000..123FF; Cuneiform
   2797             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
   2798             0x12480,  //               unassigned
   2799             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
   2800             0x13430,  //               unassigned
   2801             0x16800,  // 16800..16A3F; Bamum Supplement
   2802             0x16A40,  //               unassigned
   2803             0x16F00,  // 16F00..16F9F; Miao
   2804             0x16FA0,  //               unassigned
   2805             0x1B000,  // 1B000..1B0FF; Kana Supplement
   2806             0x1B100,  //               unassigned
   2807             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
   2808             0x1D100,  // 1D100..1D1FF; Musical Symbols
   2809             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
   2810             0x1D250,  //               unassigned
   2811             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
   2812             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
   2813             0x1D380,  //               unassigned
   2814             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
   2815             0x1D800,  //               unassigned
   2816             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
   2817             0x1EF00,  //               unassigned
   2818             0x1F000,  // 1F000..1F02F; Mahjong Tiles
   2819             0x1F030,  // 1F030..1F09F; Domino Tiles
   2820             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
   2821             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
   2822             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
   2823             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
   2824             0x1F600,  // 1F600..1F64F; Emoticons
   2825             0x1F650,  //               unassigned
   2826             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
   2827             0x1F700,  // 1F700..1F77F; Alchemical Symbols
   2828             0x1F780,  //               unassigned
   2829             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
   2830             0x2A6E0,  //               unassigned
   2831             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
   2832             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
   2833             0x2B820,  //               unassigned
   2834             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
   2835             0x2FA20,  //               unassigned
   2836             0xE0000,  // E0000..E007F; Tags
   2837             0xE0080,  //               unassigned
   2838             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
   2839             0xE01F0,  //               unassigned
   2840             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
   2841             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
   2842         };
   2843 
   2844         private static final UnicodeBlock[] blocks = {
   2845             BASIC_LATIN,
   2846             LATIN_1_SUPPLEMENT,
   2847             LATIN_EXTENDED_A,
   2848             LATIN_EXTENDED_B,
   2849             IPA_EXTENSIONS,
   2850             SPACING_MODIFIER_LETTERS,
   2851             COMBINING_DIACRITICAL_MARKS,
   2852             GREEK,
   2853             CYRILLIC,
   2854             CYRILLIC_SUPPLEMENTARY,
   2855             ARMENIAN,
   2856             HEBREW,
   2857             ARABIC,
   2858             SYRIAC,
   2859             ARABIC_SUPPLEMENT,
   2860             THAANA,
   2861             NKO,
   2862             SAMARITAN,
   2863             MANDAIC,
   2864             null,
   2865             ARABIC_EXTENDED_A,
   2866             DEVANAGARI,
   2867             BENGALI,
   2868             GURMUKHI,
   2869             GUJARATI,
   2870             ORIYA,
   2871             TAMIL,
   2872             TELUGU,
   2873             KANNADA,
   2874             MALAYALAM,
   2875             SINHALA,
   2876             THAI,
   2877             LAO,
   2878             TIBETAN,
   2879             MYANMAR,
   2880             GEORGIAN,
   2881             HANGUL_JAMO,
   2882             ETHIOPIC,
   2883             ETHIOPIC_SUPPLEMENT,
   2884             CHEROKEE,
   2885             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
   2886             OGHAM,
   2887             RUNIC,
   2888             TAGALOG,
   2889             HANUNOO,
   2890             BUHID,
   2891             TAGBANWA,
   2892             KHMER,
   2893             MONGOLIAN,
   2894             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
   2895             LIMBU,
   2896             TAI_LE,
   2897             NEW_TAI_LUE,
   2898             KHMER_SYMBOLS,
   2899             BUGINESE,
   2900             TAI_THAM,
   2901             null,
   2902             BALINESE,
   2903             SUNDANESE,
   2904             BATAK,
   2905             LEPCHA,
   2906             OL_CHIKI,
   2907             null,
   2908             SUNDANESE_SUPPLEMENT,
   2909             VEDIC_EXTENSIONS,
   2910             PHONETIC_EXTENSIONS,
   2911             PHONETIC_EXTENSIONS_SUPPLEMENT,
   2912             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
   2913             LATIN_EXTENDED_ADDITIONAL,
   2914             GREEK_EXTENDED,
   2915             GENERAL_PUNCTUATION,
   2916             SUPERSCRIPTS_AND_SUBSCRIPTS,
   2917             CURRENCY_SYMBOLS,
   2918             COMBINING_MARKS_FOR_SYMBOLS,
   2919             LETTERLIKE_SYMBOLS,
   2920             NUMBER_FORMS,
   2921             ARROWS,
   2922             MATHEMATICAL_OPERATORS,
   2923             MISCELLANEOUS_TECHNICAL,
   2924             CONTROL_PICTURES,
   2925             OPTICAL_CHARACTER_RECOGNITION,
   2926             ENCLOSED_ALPHANUMERICS,
   2927             BOX_DRAWING,
   2928             BLOCK_ELEMENTS,
   2929             GEOMETRIC_SHAPES,
   2930             MISCELLANEOUS_SYMBOLS,
   2931             DINGBATS,
   2932             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
   2933             SUPPLEMENTAL_ARROWS_A,
   2934             BRAILLE_PATTERNS,
   2935             SUPPLEMENTAL_ARROWS_B,
   2936             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
   2937             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
   2938             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
   2939             GLAGOLITIC,
   2940             LATIN_EXTENDED_C,
   2941             COPTIC,
   2942             GEORGIAN_SUPPLEMENT,
   2943             TIFINAGH,
   2944             ETHIOPIC_EXTENDED,
   2945             CYRILLIC_EXTENDED_A,
   2946             SUPPLEMENTAL_PUNCTUATION,
   2947             CJK_RADICALS_SUPPLEMENT,
   2948             KANGXI_RADICALS,
   2949             null,
   2950             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
   2951             CJK_SYMBOLS_AND_PUNCTUATION,
   2952             HIRAGANA,
   2953             KATAKANA,
   2954             BOPOMOFO,
   2955             HANGUL_COMPATIBILITY_JAMO,
   2956             KANBUN,
   2957             BOPOMOFO_EXTENDED,
   2958             CJK_STROKES,
   2959             KATAKANA_PHONETIC_EXTENSIONS,
   2960             ENCLOSED_CJK_LETTERS_AND_MONTHS,
   2961             CJK_COMPATIBILITY,
   2962             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
   2963             YIJING_HEXAGRAM_SYMBOLS,
   2964             CJK_UNIFIED_IDEOGRAPHS,
   2965             YI_SYLLABLES,
   2966             YI_RADICALS,
   2967             LISU,
   2968             VAI,
   2969             CYRILLIC_EXTENDED_B,
   2970             BAMUM,
   2971             MODIFIER_TONE_LETTERS,
   2972             LATIN_EXTENDED_D,
   2973             SYLOTI_NAGRI,
   2974             COMMON_INDIC_NUMBER_FORMS,
   2975             PHAGS_PA,
   2976             SAURASHTRA,
   2977             DEVANAGARI_EXTENDED,
   2978             KAYAH_LI,
   2979             REJANG,
   2980             HANGUL_JAMO_EXTENDED_A,
   2981             JAVANESE,
   2982             null,
   2983             CHAM,
   2984             MYANMAR_EXTENDED_A,
   2985             TAI_VIET,
   2986             MEETEI_MAYEK_EXTENSIONS,
   2987             ETHIOPIC_EXTENDED_A,
   2988             null,
   2989             MEETEI_MAYEK,
   2990             HANGUL_SYLLABLES,
   2991             HANGUL_JAMO_EXTENDED_B,
   2992             HIGH_SURROGATES,
   2993             HIGH_PRIVATE_USE_SURROGATES,
   2994             LOW_SURROGATES,
   2995             PRIVATE_USE_AREA,
   2996             CJK_COMPATIBILITY_IDEOGRAPHS,
   2997             ALPHABETIC_PRESENTATION_FORMS,
   2998             ARABIC_PRESENTATION_FORMS_A,
   2999             VARIATION_SELECTORS,
   3000             VERTICAL_FORMS,
   3001             COMBINING_HALF_MARKS,
   3002             CJK_COMPATIBILITY_FORMS,
   3003             SMALL_FORM_VARIANTS,
   3004             ARABIC_PRESENTATION_FORMS_B,
   3005             HALFWIDTH_AND_FULLWIDTH_FORMS,
   3006             SPECIALS,
   3007             LINEAR_B_SYLLABARY,
   3008             LINEAR_B_IDEOGRAMS,
   3009             AEGEAN_NUMBERS,
   3010             ANCIENT_GREEK_NUMBERS,
   3011             ANCIENT_SYMBOLS,
   3012             PHAISTOS_DISC,
   3013             null,
   3014             LYCIAN,
   3015             CARIAN,
   3016             null,
   3017             OLD_ITALIC,
   3018             GOTHIC,
   3019             null,
   3020             UGARITIC,
   3021             OLD_PERSIAN,
   3022             null,
   3023             DESERET,
   3024             SHAVIAN,
   3025             OSMANYA,
   3026             null,
   3027             CYPRIOT_SYLLABARY,
   3028             IMPERIAL_ARAMAIC,
   3029             null,
   3030             PHOENICIAN,
   3031             LYDIAN,
   3032             null,
   3033             MEROITIC_HIEROGLYPHS,
   3034             MEROITIC_CURSIVE,
   3035             KHAROSHTHI,
   3036             OLD_SOUTH_ARABIAN,
   3037             null,
   3038             AVESTAN,
   3039             INSCRIPTIONAL_PARTHIAN,
   3040             INSCRIPTIONAL_PAHLAVI,
   3041             null,
   3042             OLD_TURKIC,
   3043             null,
   3044             RUMI_NUMERAL_SYMBOLS,
   3045             null,
   3046             BRAHMI,
   3047             KAITHI,
   3048             SORA_SOMPENG,
   3049             CHAKMA,
   3050             null,
   3051             SHARADA,
   3052             null,
   3053             TAKRI,
   3054             null,
   3055             CUNEIFORM,
   3056             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
   3057             null,
   3058             EGYPTIAN_HIEROGLYPHS,
   3059             null,
   3060             BAMUM_SUPPLEMENT,
   3061             null,
   3062             MIAO,
   3063             null,
   3064             KANA_SUPPLEMENT,
   3065             null,
   3066             BYZANTINE_MUSICAL_SYMBOLS,
   3067             MUSICAL_SYMBOLS,
   3068             ANCIENT_GREEK_MUSICAL_NOTATION,
   3069             null,
   3070             TAI_XUAN_JING_SYMBOLS,
   3071             COUNTING_ROD_NUMERALS,
   3072             null,
   3073             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
   3074             null,
   3075             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
   3076             null,
   3077             MAHJONG_TILES,
   3078             DOMINO_TILES,
   3079             PLAYING_CARDS,
   3080             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
   3081             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
   3082             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
   3083             EMOTICONS,
   3084             null,
   3085             TRANSPORT_AND_MAP_SYMBOLS,
   3086             ALCHEMICAL_SYMBOLS,
   3087             null,
   3088             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
   3089             null,
   3090             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
   3091             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
   3092             null,
   3093             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
   3094             null,
   3095             TAGS,
   3096             null,
   3097             VARIATION_SELECTORS_SUPPLEMENT,
   3098             null,
   3099             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
   3100             SUPPLEMENTARY_PRIVATE_USE_AREA_B
   3101         };
   3102 
   3103 
   3104         /**
   3105          * Returns the object representing the Unicode block containing the
   3106          * given character, or {@code null} if the character is not a
   3107          * member of a defined block.
   3108          *
   3109          * <p><b>Note:</b> This method cannot handle
   3110          * <a href="Character.html#supplementary"> supplementary
   3111          * characters</a>.  To support all Unicode characters, including
   3112          * supplementary characters, use the {@link #of(int)} method.
   3113          *
   3114          * @param   c  The character in question
   3115          * @return  The {@code UnicodeBlock} instance representing the
   3116          *          Unicode block of which this character is a member, or
   3117          *          {@code null} if the character is not a member of any
   3118          *          Unicode block
   3119          */
   3120         public static UnicodeBlock of(char c) {
   3121             return of((int)c);
   3122         }
   3123 
   3124         /**
   3125          * Returns the object representing the Unicode block
   3126          * containing the given character (Unicode code point), or
   3127          * {@code null} if the character is not a member of a
   3128          * defined block.
   3129          *
   3130          * @param   codePoint the character (Unicode code point) in question.
   3131          * @return  The {@code UnicodeBlock} instance representing the
   3132          *          Unicode block of which this character is a member, or
   3133          *          {@code null} if the character is not a member of any
   3134          *          Unicode block
   3135          * @exception IllegalArgumentException if the specified
   3136          * {@code codePoint} is an invalid Unicode code point.
   3137          * @see Character#isValidCodePoint(int)
   3138          * @since   1.5
   3139          */
   3140         public static UnicodeBlock of(int codePoint) {
   3141             if (!isValidCodePoint(codePoint)) {
   3142                 throw new IllegalArgumentException();
   3143             }
   3144 
   3145             int top, bottom, current;
   3146             bottom = 0;
   3147             top = blockStarts.length;
   3148             current = top/2;
   3149 
   3150             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
   3151             while (top - bottom > 1) {
   3152                 if (codePoint >= blockStarts[current]) {
   3153                     bottom = current;
   3154                 } else {
   3155                     top = current;
   3156                 }
   3157                 current = (top + bottom) / 2;
   3158             }
   3159             return blocks[current];
   3160         }
   3161 
   3162         /**
   3163          * Returns the UnicodeBlock with the given name. Block
   3164          * names are determined by The Unicode Standard. The file
   3165          * Blocks-&lt;version&gt;.txt defines blocks for a particular
   3166          * version of the standard. The {@link Character} class specifies
   3167          * the version of the standard that it supports.
   3168          * <p>
   3169          * This method accepts block names in the following forms:
   3170          * <ol>
   3171          * <li> Canonical block names as defined by the Unicode Standard.
   3172          * For example, the standard defines a "Basic Latin" block. Therefore, this
   3173          * method accepts "Basic Latin" as a valid block name. The documentation of
   3174          * each UnicodeBlock provides the canonical name.
   3175          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
   3176          * is a valid block name for the "Basic Latin" block.
   3177          * <li>The text representation of each constant UnicodeBlock identifier.
   3178          * For example, this method will return the {@link #BASIC_LATIN} block if
   3179          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
   3180          * hyphens in the canonical name with underscores.
   3181          * </ol>
   3182          * Finally, character case is ignored for all of the valid block name forms.
   3183          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
   3184          * The en_US locale's case mapping rules are used to provide case-insensitive
   3185          * string comparisons for block name validation.
   3186          * <p>
   3187          * If the Unicode Standard changes block names, both the previous and
   3188          * current names will be accepted.
   3189          *
   3190          * @param blockName A {@code UnicodeBlock} name.
   3191          * @return The {@code UnicodeBlock} instance identified
   3192          *         by {@code blockName}
   3193          * @throws IllegalArgumentException if {@code blockName} is an
   3194          *         invalid name
   3195          * @throws NullPointerException if {@code blockName} is null
   3196          * @since 1.5
   3197          */
   3198         public static final UnicodeBlock forName(String blockName) {
   3199             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
   3200             if (block == null) {
   3201                 throw new IllegalArgumentException();
   3202             }
   3203             return block;
   3204         }
   3205     }
   3206 
   3207 
   3208     /**
   3209      * A family of character subsets representing the character scripts
   3210      * defined in the <a href="http://www.unicode.org/reports/tr24/">
   3211      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
   3212      * character is assigned to a single Unicode script, either a specific
   3213      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
   3214      * one of the following three special values,
   3215      * {@link Character.UnicodeScript#INHERITED Inherited},
   3216      * {@link Character.UnicodeScript#COMMON Common} or
   3217      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
   3218      *
   3219      * @since 1.7
   3220      */
   3221     public static enum UnicodeScript {
   3222         /**
   3223          * Unicode script "Common".
   3224          */
   3225         COMMON,
   3226 
   3227         /**
   3228          * Unicode script "Latin".
   3229          */
   3230         LATIN,
   3231 
   3232         /**
   3233          * Unicode script "Greek".
   3234          */
   3235         GREEK,
   3236 
   3237         /**
   3238          * Unicode script "Cyrillic".
   3239          */
   3240         CYRILLIC,
   3241 
   3242         /**
   3243          * Unicode script "Armenian".
   3244          */
   3245         ARMENIAN,
   3246 
   3247         /**
   3248          * Unicode script "Hebrew".
   3249          */
   3250         HEBREW,
   3251 
   3252         /**
   3253          * Unicode script "Arabic".
   3254          */
   3255         ARABIC,
   3256 
   3257         /**
   3258          * Unicode script "Syriac".
   3259          */
   3260         SYRIAC,
   3261 
   3262         /**
   3263          * Unicode script "Thaana".
   3264          */
   3265         THAANA,
   3266 
   3267         /**
   3268          * Unicode script "Devanagari".
   3269          */
   3270         DEVANAGARI,
   3271 
   3272         /**
   3273          * Unicode script "Bengali".
   3274          */
   3275         BENGALI,
   3276 
   3277         /**
   3278          * Unicode script "Gurmukhi".
   3279          */
   3280         GURMUKHI,
   3281 
   3282         /**
   3283          * Unicode script "Gujarati".
   3284          */
   3285         GUJARATI,
   3286 
   3287         /**
   3288          * Unicode script "Oriya".
   3289          */
   3290         ORIYA,
   3291 
   3292         /**
   3293          * Unicode script "Tamil".
   3294          */
   3295         TAMIL,
   3296 
   3297         /**
   3298          * Unicode script "Telugu".
   3299          */
   3300         TELUGU,
   3301 
   3302         /**
   3303          * Unicode script "Kannada".
   3304          */
   3305         KANNADA,
   3306 
   3307         /**
   3308          * Unicode script "Malayalam".
   3309          */
   3310         MALAYALAM,
   3311 
   3312         /**
   3313          * Unicode script "Sinhala".
   3314          */
   3315         SINHALA,
   3316 
   3317         /**
   3318          * Unicode script "Thai".
   3319          */
   3320         THAI,
   3321 
   3322         /**
   3323          * Unicode script "Lao".
   3324          */
   3325         LAO,
   3326 
   3327         /**
   3328          * Unicode script "Tibetan".
   3329          */
   3330         TIBETAN,
   3331 
   3332         /**
   3333          * Unicode script "Myanmar".
   3334          */
   3335         MYANMAR,
   3336 
   3337         /**
   3338          * Unicode script "Georgian".
   3339          */
   3340         GEORGIAN,
   3341 
   3342         /**
   3343          * Unicode script "Hangul".
   3344          */
   3345         HANGUL,
   3346 
   3347         /**
   3348          * Unicode script "Ethiopic".
   3349          */
   3350         ETHIOPIC,
   3351 
   3352         /**
   3353          * Unicode script "Cherokee".
   3354          */
   3355         CHEROKEE,
   3356 
   3357         /**
   3358          * Unicode script "Canadian_Aboriginal".
   3359          */
   3360         CANADIAN_ABORIGINAL,
   3361 
   3362         /**
   3363          * Unicode script "Ogham".
   3364          */
   3365         OGHAM,
   3366 
   3367         /**
   3368          * Unicode script "Runic".
   3369          */
   3370         RUNIC,
   3371 
   3372         /**
   3373          * Unicode script "Khmer".
   3374          */
   3375         KHMER,
   3376 
   3377         /**
   3378          * Unicode script "Mongolian".
   3379          */
   3380         MONGOLIAN,
   3381 
   3382         /**
   3383          * Unicode script "Hiragana".
   3384          */
   3385         HIRAGANA,
   3386 
   3387         /**
   3388          * Unicode script "Katakana".
   3389          */
   3390         KATAKANA,
   3391 
   3392         /**
   3393          * Unicode script "Bopomofo".
   3394          */
   3395         BOPOMOFO,
   3396 
   3397         /**
   3398          * Unicode script "Han".
   3399          */
   3400         HAN,
   3401 
   3402         /**
   3403          * Unicode script "Yi".
   3404          */
   3405         YI,
   3406 
   3407         /**
   3408          * Unicode script "Old_Italic".
   3409          */
   3410         OLD_ITALIC,
   3411 
   3412         /**
   3413          * Unicode script "Gothic".
   3414          */
   3415         GOTHIC,
   3416 
   3417         /**
   3418          * Unicode script "Deseret".
   3419          */
   3420         DESERET,
   3421 
   3422         /**
   3423          * Unicode script "Inherited".
   3424          */
   3425         INHERITED,
   3426 
   3427         /**
   3428          * Unicode script "Tagalog".
   3429          */
   3430         TAGALOG,
   3431 
   3432         /**
   3433          * Unicode script "Hanunoo".
   3434          */
   3435         HANUNOO,
   3436 
   3437         /**
   3438          * Unicode script "Buhid".
   3439          */
   3440         BUHID,
   3441 
   3442         /**
   3443          * Unicode script "Tagbanwa".
   3444          */
   3445         TAGBANWA,
   3446 
   3447         /**
   3448          * Unicode script "Limbu".
   3449          */
   3450         LIMBU,
   3451 
   3452         /**
   3453          * Unicode script "Tai_Le".
   3454          */
   3455         TAI_LE,
   3456 
   3457         /**
   3458          * Unicode script "Linear_B".
   3459          */
   3460         LINEAR_B,
   3461 
   3462         /**
   3463          * Unicode script "Ugaritic".
   3464          */
   3465         UGARITIC,
   3466 
   3467         /**
   3468          * Unicode script "Shavian".
   3469          */
   3470         SHAVIAN,
   3471 
   3472         /**
   3473          * Unicode script "Osmanya".
   3474          */
   3475         OSMANYA,
   3476 
   3477         /**
   3478          * Unicode script "Cypriot".
   3479          */
   3480         CYPRIOT,
   3481 
   3482         /**
   3483          * Unicode script "Braille".
   3484          */
   3485         BRAILLE,
   3486 
   3487         /**
   3488          * Unicode script "Buginese".
   3489          */
   3490         BUGINESE,
   3491 
   3492         /**
   3493          * Unicode script "Coptic".
   3494          */
   3495         COPTIC,
   3496 
   3497         /**
   3498          * Unicode script "New_Tai_Lue".
   3499          */
   3500         NEW_TAI_LUE,
   3501 
   3502         /**
   3503          * Unicode script "Glagolitic".
   3504          */
   3505         GLAGOLITIC,
   3506 
   3507         /**
   3508          * Unicode script "Tifinagh".
   3509          */
   3510         TIFINAGH,
   3511 
   3512         /**
   3513          * Unicode script "Syloti_Nagri".
   3514          */
   3515         SYLOTI_NAGRI,
   3516 
   3517         /**
   3518          * Unicode script "Old_Persian".
   3519          */
   3520         OLD_PERSIAN,
   3521 
   3522         /**
   3523          * Unicode script "Kharoshthi".
   3524          */
   3525         KHAROSHTHI,
   3526 
   3527         /**
   3528          * Unicode script "Balinese".
   3529          */
   3530         BALINESE,
   3531 
   3532         /**
   3533          * Unicode script "Cuneiform".
   3534          */
   3535         CUNEIFORM,
   3536 
   3537         /**
   3538          * Unicode script "Phoenician".
   3539          */
   3540         PHOENICIAN,
   3541 
   3542         /**
   3543          * Unicode script "Phags_Pa".
   3544          */
   3545         PHAGS_PA,
   3546 
   3547         /**
   3548          * Unicode script "Nko".
   3549          */
   3550         NKO,
   3551 
   3552         /**
   3553          * Unicode script "Sundanese".
   3554          */
   3555         SUNDANESE,
   3556 
   3557         /**
   3558          * Unicode script "Batak".
   3559          */
   3560         BATAK,
   3561 
   3562         /**
   3563          * Unicode script "Lepcha".
   3564          */
   3565         LEPCHA,
   3566 
   3567         /**
   3568          * Unicode script "Ol_Chiki".
   3569          */
   3570         OL_CHIKI,
   3571 
   3572         /**
   3573          * Unicode script "Vai".
   3574          */
   3575         VAI,
   3576 
   3577         /**
   3578          * Unicode script "Saurashtra".
   3579          */
   3580         SAURASHTRA,
   3581 
   3582         /**
   3583          * Unicode script "Kayah_Li".
   3584          */
   3585         KAYAH_LI,
   3586 
   3587         /**
   3588          * Unicode script "Rejang".
   3589          */
   3590         REJANG,
   3591 
   3592         /**
   3593          * Unicode script "Lycian".
   3594          */
   3595         LYCIAN,
   3596 
   3597         /**
   3598          * Unicode script "Carian".
   3599          */
   3600         CARIAN,
   3601 
   3602         /**
   3603          * Unicode script "Lydian".
   3604          */
   3605         LYDIAN,
   3606 
   3607         /**
   3608          * Unicode script "Cham".
   3609          */
   3610         CHAM,
   3611 
   3612         /**
   3613          * Unicode script "Tai_Tham".
   3614          */
   3615         TAI_THAM,
   3616 
   3617         /**
   3618          * Unicode script "Tai_Viet".
   3619          */
   3620         TAI_VIET,
   3621 
   3622         /**
   3623          * Unicode script "Avestan".
   3624          */
   3625         AVESTAN,
   3626 
   3627         /**
   3628          * Unicode script "Egyptian_Hieroglyphs".
   3629          */
   3630         EGYPTIAN_HIEROGLYPHS,
   3631 
   3632         /**
   3633          * Unicode script "Samaritan".
   3634          */
   3635         SAMARITAN,
   3636 
   3637         /**
   3638          * Unicode script "Mandaic".
   3639          */
   3640         MANDAIC,
   3641 
   3642         /**
   3643          * Unicode script "Lisu".
   3644          */
   3645         LISU,
   3646 
   3647         /**
   3648          * Unicode script "Bamum".
   3649          */
   3650         BAMUM,
   3651 
   3652         /**
   3653          * Unicode script "Javanese".
   3654          */
   3655         JAVANESE,
   3656 
   3657         /**
   3658          * Unicode script "Meetei_Mayek".
   3659          */
   3660         MEETEI_MAYEK,
   3661 
   3662         /**
   3663          * Unicode script "Imperial_Aramaic".
   3664          */
   3665         IMPERIAL_ARAMAIC,
   3666 
   3667         /**
   3668          * Unicode script "Old_South_Arabian".
   3669          */
   3670         OLD_SOUTH_ARABIAN,
   3671 
   3672         /**
   3673          * Unicode script "Inscriptional_Parthian".
   3674          */
   3675         INSCRIPTIONAL_PARTHIAN,
   3676 
   3677         /**
   3678          * Unicode script "Inscriptional_Pahlavi".
   3679          */
   3680         INSCRIPTIONAL_PAHLAVI,
   3681 
   3682         /**
   3683          * Unicode script "Old_Turkic".
   3684          */
   3685         OLD_TURKIC,
   3686 
   3687         /**
   3688          * Unicode script "Brahmi".
   3689          */
   3690         BRAHMI,
   3691 
   3692         /**
   3693          * Unicode script "Kaithi".
   3694          */
   3695         KAITHI,
   3696 
   3697         /**
   3698          * Unicode script "Meroitic Hieroglyphs".
   3699          */
   3700         MEROITIC_HIEROGLYPHS,
   3701 
   3702         /**
   3703          * Unicode script "Meroitic Cursive".
   3704          */
   3705         MEROITIC_CURSIVE,
   3706 
   3707         /**
   3708          * Unicode script "Sora Sompeng".
   3709          */
   3710         SORA_SOMPENG,
   3711 
   3712         /**
   3713          * Unicode script "Chakma".
   3714          */
   3715         CHAKMA,
   3716 
   3717         /**
   3718          * Unicode script "Sharada".
   3719          */
   3720         SHARADA,
   3721 
   3722         /**
   3723          * Unicode script "Takri".
   3724          */
   3725         TAKRI,
   3726 
   3727         /**
   3728          * Unicode script "Miao".
   3729          */
   3730         MIAO,
   3731 
   3732         /**
   3733          * Unicode script "Unknown".
   3734          */
   3735         UNKNOWN;
   3736 
   3737         private static final int[] scriptStarts = {
   3738             0x0000,   // 0000..0040; COMMON
   3739             0x0041,   // 0041..005A; LATIN
   3740             0x005B,   // 005B..0060; COMMON
   3741             0x0061,   // 0061..007A; LATIN
   3742             0x007B,   // 007B..00A9; COMMON
   3743             0x00AA,   // 00AA..00AA; LATIN
   3744             0x00AB,   // 00AB..00B9; COMMON
   3745             0x00BA,   // 00BA..00BA; LATIN
   3746             0x00BB,   // 00BB..00BF; COMMON
   3747             0x00C0,   // 00C0..00D6; LATIN
   3748             0x00D7,   // 00D7..00D7; COMMON
   3749             0x00D8,   // 00D8..00F6; LATIN
   3750             0x00F7,   // 00F7..00F7; COMMON
   3751             0x00F8,   // 00F8..02B8; LATIN
   3752             0x02B9,   // 02B9..02DF; COMMON
   3753             0x02E0,   // 02E0..02E4; LATIN
   3754             0x02E5,   // 02E5..02E9; COMMON
   3755             0x02EA,   // 02EA..02EB; BOPOMOFO
   3756             0x02EC,   // 02EC..02FF; COMMON
   3757             0x0300,   // 0300..036F; INHERITED
   3758             0x0370,   // 0370..0373; GREEK
   3759             0x0374,   // 0374..0374; COMMON
   3760             0x0375,   // 0375..037D; GREEK
   3761             0x037E,   // 037E..0383; COMMON
   3762             0x0384,   // 0384..0384; GREEK
   3763             0x0385,   // 0385..0385; COMMON
   3764             0x0386,   // 0386..0386; GREEK
   3765             0x0387,   // 0387..0387; COMMON
   3766             0x0388,   // 0388..03E1; GREEK
   3767             0x03E2,   // 03E2..03EF; COPTIC
   3768             0x03F0,   // 03F0..03FF; GREEK
   3769             0x0400,   // 0400..0484; CYRILLIC
   3770             0x0485,   // 0485..0486; INHERITED
   3771             0x0487,   // 0487..0530; CYRILLIC
   3772             0x0531,   // 0531..0588; ARMENIAN
   3773             0x0589,   // 0589..0589; COMMON
   3774             0x058A,   // 058A..0590; ARMENIAN
   3775             0x0591,   // 0591..05FF; HEBREW
   3776             0x0600,   // 0600..060B; ARABIC
   3777             0x060C,   // 060C..060C; COMMON
   3778             0x060D,   // 060D..061A; ARABIC
   3779             0x061B,   // 061B..061D; COMMON
   3780             0x061E,   // 061E..061E; ARABIC
   3781             0x061F,   // 061F..061F; COMMON
   3782             0x0620,   // 0620..063F; ARABIC
   3783             0x0640,   // 0640..0640; COMMON
   3784             0x0641,   // 0641..064A; ARABIC
   3785             0x064B,   // 064B..0655; INHERITED
   3786             0x0656,   // 0656..065F; ARABIC
   3787             0x0660,   // 0660..0669; COMMON
   3788             0x066A,   // 066A..066F; ARABIC
   3789             0x0670,   // 0670..0670; INHERITED
   3790             0x0671,   // 0671..06DC; ARABIC
   3791             0x06DD,   // 06DD..06DD; COMMON
   3792             0x06DE,   // 06DE..06FF; ARABIC
   3793             0x0700,   // 0700..074F; SYRIAC
   3794             0x0750,   // 0750..077F; ARABIC
   3795             0x0780,   // 0780..07BF; THAANA
   3796             0x07C0,   // 07C0..07FF; NKO
   3797             0x0800,   // 0800..083F; SAMARITAN
   3798             0x0840,   // 0840..089F; MANDAIC
   3799             0x08A0,   // 08A0..08FF; ARABIC
   3800             0x0900,   // 0900..0950; DEVANAGARI
   3801             0x0951,   // 0951..0952; INHERITED
   3802             0x0953,   // 0953..0963; DEVANAGARI
   3803             0x0964,   // 0964..0965; COMMON
   3804             0x0966,   // 0966..0980; DEVANAGARI
   3805             0x0981,   // 0981..0A00; BENGALI
   3806             0x0A01,   // 0A01..0A80; GURMUKHI
   3807             0x0A81,   // 0A81..0B00; GUJARATI
   3808             0x0B01,   // 0B01..0B81; ORIYA
   3809             0x0B82,   // 0B82..0C00; TAMIL
   3810             0x0C01,   // 0C01..0C81; TELUGU
   3811             0x0C82,   // 0C82..0CF0; KANNADA
   3812             0x0D02,   // 0D02..0D81; MALAYALAM
   3813             0x0D82,   // 0D82..0E00; SINHALA
   3814             0x0E01,   // 0E01..0E3E; THAI
   3815             0x0E3F,   // 0E3F..0E3F; COMMON
   3816             0x0E40,   // 0E40..0E80; THAI
   3817             0x0E81,   // 0E81..0EFF; LAO
   3818             0x0F00,   // 0F00..0FD4; TIBETAN
   3819             0x0FD5,   // 0FD5..0FD8; COMMON
   3820             0x0FD9,   // 0FD9..0FFF; TIBETAN
   3821             0x1000,   // 1000..109F; MYANMAR
   3822             0x10A0,   // 10A0..10FA; GEORGIAN
   3823             0x10FB,   // 10FB..10FB; COMMON
   3824             0x10FC,   // 10FC..10FF; GEORGIAN
   3825             0x1100,   // 1100..11FF; HANGUL
   3826             0x1200,   // 1200..139F; ETHIOPIC
   3827             0x13A0,   // 13A0..13FF; CHEROKEE
   3828             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
   3829             0x1680,   // 1680..169F; OGHAM
   3830             0x16A0,   // 16A0..16EA; RUNIC
   3831             0x16EB,   // 16EB..16ED; COMMON
   3832             0x16EE,   // 16EE..16FF; RUNIC
   3833             0x1700,   // 1700..171F; TAGALOG
   3834             0x1720,   // 1720..1734; HANUNOO
   3835             0x1735,   // 1735..173F; COMMON
   3836             0x1740,   // 1740..175F; BUHID
   3837             0x1760,   // 1760..177F; TAGBANWA
   3838             0x1780,   // 1780..17FF; KHMER
   3839             0x1800,   // 1800..1801; MONGOLIAN
   3840             0x1802,   // 1802..1803; COMMON
   3841             0x1804,   // 1804..1804; MONGOLIAN
   3842             0x1805,   // 1805..1805; COMMON
   3843             0x1806,   // 1806..18AF; MONGOLIAN
   3844             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
   3845             0x1900,   // 1900..194F; LIMBU
   3846             0x1950,   // 1950..197F; TAI_LE
   3847             0x1980,   // 1980..19DF; NEW_TAI_LUE
   3848             0x19E0,   // 19E0..19FF; KHMER
   3849             0x1A00,   // 1A00..1A1F; BUGINESE
   3850             0x1A20,   // 1A20..1AFF; TAI_THAM
   3851             0x1B00,   // 1B00..1B7F; BALINESE
   3852             0x1B80,   // 1B80..1BBF; SUNDANESE
   3853             0x1BC0,   // 1BC0..1BFF; BATAK
   3854             0x1C00,   // 1C00..1C4F; LEPCHA
   3855             0x1C50,   // 1C50..1CBF; OL_CHIKI
   3856             0x1CC0,   // 1CC0..1CCF; SUNDANESE
   3857             0x1CD0,   // 1CD0..1CD2; INHERITED
   3858             0x1CD3,   // 1CD3..1CD3; COMMON
   3859             0x1CD4,   // 1CD4..1CE0; INHERITED
   3860             0x1CE1,   // 1CE1..1CE1; COMMON
   3861             0x1CE2,   // 1CE2..1CE8; INHERITED
   3862             0x1CE9,   // 1CE9..1CEC; COMMON
   3863             0x1CED,   // 1CED..1CED; INHERITED
   3864             0x1CEE,   // 1CEE..1CF3; COMMON
   3865             0x1CF4,   // 1CF4..1CF4; INHERITED
   3866             0x1CF5,   // 1CF5..1CFF; COMMON
   3867             0x1D00,   // 1D00..1D25; LATIN
   3868             0x1D26,   // 1D26..1D2A; GREEK
   3869             0x1D2B,   // 1D2B..1D2B; CYRILLIC
   3870             0x1D2C,   // 1D2C..1D5C; LATIN
   3871             0x1D5D,   // 1D5D..1D61; GREEK
   3872             0x1D62,   // 1D62..1D65; LATIN
   3873             0x1D66,   // 1D66..1D6A; GREEK
   3874             0x1D6B,   // 1D6B..1D77; LATIN
   3875             0x1D78,   // 1D78..1D78; CYRILLIC
   3876             0x1D79,   // 1D79..1DBE; LATIN
   3877             0x1DBF,   // 1DBF..1DBF; GREEK
   3878             0x1DC0,   // 1DC0..1DFF; INHERITED
   3879             0x1E00,   // 1E00..1EFF; LATIN
   3880             0x1F00,   // 1F00..1FFF; GREEK
   3881             0x2000,   // 2000..200B; COMMON
   3882             0x200C,   // 200C..200D; INHERITED
   3883             0x200E,   // 200E..2070; COMMON
   3884             0x2071,   // 2071..2073; LATIN
   3885             0x2074,   // 2074..207E; COMMON
   3886             0x207F,   // 207F..207F; LATIN
   3887             0x2080,   // 2080..208F; COMMON
   3888             0x2090,   // 2090..209F; LATIN
   3889             0x20A0,   // 20A0..20CF; COMMON
   3890             0x20D0,   // 20D0..20FF; INHERITED
   3891             0x2100,   // 2100..2125; COMMON
   3892             0x2126,   // 2126..2126; GREEK
   3893             0x2127,   // 2127..2129; COMMON
   3894             0x212A,   // 212A..212B; LATIN
   3895             0x212C,   // 212C..2131; COMMON
   3896             0x2132,   // 2132..2132; LATIN
   3897             0x2133,   // 2133..214D; COMMON
   3898             0x214E,   // 214E..214E; LATIN
   3899             0x214F,   // 214F..215F; COMMON
   3900             0x2160,   // 2160..2188; LATIN
   3901             0x2189,   // 2189..27FF; COMMON
   3902             0x2800,   // 2800..28FF; BRAILLE
   3903             0x2900,   // 2900..2BFF; COMMON
   3904             0x2C00,   // 2C00..2C5F; GLAGOLITIC
   3905             0x2C60,   // 2C60..2C7F; LATIN
   3906             0x2C80,   // 2C80..2CFF; COPTIC
   3907             0x2D00,   // 2D00..2D2F; GEORGIAN
   3908             0x2D30,   // 2D30..2D7F; TIFINAGH
   3909             0x2D80,   // 2D80..2DDF; ETHIOPIC
   3910             0x2DE0,   // 2DE0..2DFF; CYRILLIC
   3911             0x2E00,   // 2E00..2E7F; COMMON
   3912             0x2E80,   // 2E80..2FEF; HAN
   3913             0x2FF0,   // 2FF0..3004; COMMON
   3914             0x3005,   // 3005..3005; HAN
   3915             0x3006,   // 3006..3006; COMMON
   3916             0x3007,   // 3007..3007; HAN
   3917             0x3008,   // 3008..3020; COMMON
   3918             0x3021,   // 3021..3029; HAN
   3919             0x302A,   // 302A..302D; INHERITED
   3920             0x302E,   // 302E..302F; HANGUL
   3921             0x3030,   // 3030..3037; COMMON
   3922             0x3038,   // 3038..303B; HAN
   3923             0x303C,   // 303C..3040; COMMON
   3924             0x3041,   // 3041..3098; HIRAGANA
   3925             0x3099,   // 3099..309A; INHERITED
   3926             0x309B,   // 309B..309C; COMMON
   3927             0x309D,   // 309D..309F; HIRAGANA
   3928             0x30A0,   // 30A0..30A0; COMMON
   3929             0x30A1,   // 30A1..30FA; KATAKANA
   3930             0x30FB,   // 30FB..30FC; COMMON
   3931             0x30FD,   // 30FD..3104; KATAKANA
   3932             0x3105,   // 3105..3130; BOPOMOFO
   3933             0x3131,   // 3131..318F; HANGUL
   3934             0x3190,   // 3190..319F; COMMON
   3935             0x31A0,   // 31A0..31BF; BOPOMOFO
   3936             0x31C0,   // 31C0..31EF; COMMON
   3937             0x31F0,   // 31F0..31FF; KATAKANA
   3938             0x3200,   // 3200..321F; HANGUL
   3939             0x3220,   // 3220..325F; COMMON
   3940             0x3260,   // 3260..327E; HANGUL
   3941             0x327F,   // 327F..32CF; COMMON
   3942             0x32D0,   // 32D0..3357; KATAKANA
   3943             0x3358,   // 3358..33FF; COMMON
   3944             0x3400,   // 3400..4DBF; HAN
   3945             0x4DC0,   // 4DC0..4DFF; COMMON
   3946             0x4E00,   // 4E00..9FFF; HAN
   3947             0xA000,   // A000..A4CF; YI
   3948             0xA4D0,   // A4D0..A4FF; LISU
   3949             0xA500,   // A500..A63F; VAI
   3950             0xA640,   // A640..A69F; CYRILLIC
   3951             0xA6A0,   // A6A0..A6FF; BAMUM
   3952             0xA700,   // A700..A721; COMMON
   3953             0xA722,   // A722..A787; LATIN
   3954             0xA788,   // A788..A78A; COMMON
   3955             0xA78B,   // A78B..A7FF; LATIN
   3956             0xA800,   // A800..A82F; SYLOTI_NAGRI
   3957             0xA830,   // A830..A83F; COMMON
   3958             0xA840,   // A840..A87F; PHAGS_PA
   3959             0xA880,   // A880..A8DF; SAURASHTRA
   3960             0xA8E0,   // A8E0..A8FF; DEVANAGARI
   3961             0xA900,   // A900..A92F; KAYAH_LI
   3962             0xA930,   // A930..A95F; REJANG
   3963             0xA960,   // A960..A97F; HANGUL
   3964             0xA980,   // A980..A9FF; JAVANESE
   3965             0xAA00,   // AA00..AA5F; CHAM
   3966             0xAA60,   // AA60..AA7F; MYANMAR
   3967             0xAA80,   // AA80..AADF; TAI_VIET
   3968             0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
   3969             0xAB01,   // AB01..ABBF; ETHIOPIC
   3970             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
   3971             0xAC00,   // AC00..D7FB; HANGUL
   3972             0xD7FC,   // D7FC..F8FF; UNKNOWN
   3973             0xF900,   // F900..FAFF; HAN
   3974             0xFB00,   // FB00..FB12; LATIN
   3975             0xFB13,   // FB13..FB1C; ARMENIAN
   3976             0xFB1D,   // FB1D..FB4F; HEBREW
   3977             0xFB50,   // FB50..FD3D; ARABIC
   3978             0xFD3E,   // FD3E..FD4F; COMMON
   3979             0xFD50,   // FD50..FDFC; ARABIC
   3980             0xFDFD,   // FDFD..FDFF; COMMON
   3981             0xFE00,   // FE00..FE0F; INHERITED
   3982             0xFE10,   // FE10..FE1F; COMMON
   3983             0xFE20,   // FE20..FE2F; INHERITED
   3984             0xFE30,   // FE30..FE6F; COMMON
   3985             0xFE70,   // FE70..FEFE; ARABIC
   3986             0xFEFF,   // FEFF..FF20; COMMON
   3987             0xFF21,   // FF21..FF3A; LATIN
   3988             0xFF3B,   // FF3B..FF40; COMMON
   3989             0xFF41,   // FF41..FF5A; LATIN
   3990             0xFF5B,   // FF5B..FF65; COMMON
   3991             0xFF66,   // FF66..FF6F; KATAKANA
   3992             0xFF70,   // FF70..FF70; COMMON
   3993             0xFF71,   // FF71..FF9D; KATAKANA
   3994             0xFF9E,   // FF9E..FF9F; COMMON
   3995             0xFFA0,   // FFA0..FFDF; HANGUL
   3996             0xFFE0,   // FFE0..FFFF; COMMON
   3997             0x10000,  // 10000..100FF; LINEAR_B
   3998             0x10100,  // 10100..1013F; COMMON
   3999             0x10140,  // 10140..1018F; GREEK
   4000             0x10190,  // 10190..101FC; COMMON
   4001             0x101FD,  // 101FD..1027F; INHERITED
   4002             0x10280,  // 10280..1029F; LYCIAN
   4003             0x102A0,  // 102A0..102FF; CARIAN
   4004             0x10300,  // 10300..1032F; OLD_ITALIC
   4005             0x10330,  // 10330..1037F; GOTHIC
   4006             0x10380,  // 10380..1039F; UGARITIC
   4007             0x103A0,  // 103A0..103FF; OLD_PERSIAN
   4008             0x10400,  // 10400..1044F; DESERET
   4009             0x10450,  // 10450..1047F; SHAVIAN
   4010             0x10480,  // 10480..107FF; OSMANYA
   4011             0x10800,  // 10800..1083F; CYPRIOT
   4012             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
   4013             0x10900,  // 10900..1091F; PHOENICIAN
   4014             0x10920,  // 10920..1097F; LYDIAN
   4015             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
   4016             0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
   4017             0x10A00,  // 10A00..10A5F; KHAROSHTHI
   4018             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
   4019             0x10B00,  // 10B00..10B3F; AVESTAN
   4020             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
   4021             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
   4022             0x10C00,  // 10C00..10E5F; OLD_TURKIC
   4023             0x10E60,  // 10E60..10FFF; ARABIC
   4024             0x11000,  // 11000..1107F; BRAHMI
   4025             0x11080,  // 11080..110CF; KAITHI
   4026             0x110D0,  // 110D0..110FF; SORA_SOMPENG
   4027             0x11100,  // 11100..1117F; CHAKMA
   4028             0x11180,  // 11180..1167F; SHARADA
   4029             0x11680,  // 11680..116CF; TAKRI
   4030             0x12000,  // 12000..12FFF; CUNEIFORM
   4031             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
   4032             0x16800,  // 16800..16A38; BAMUM
   4033             0x16F00,  // 16F00..16F9F; MIAO
   4034             0x1B000,  // 1B000..1B000; KATAKANA
   4035             0x1B001,  // 1B001..1CFFF; HIRAGANA
   4036             0x1D000,  // 1D000..1D166; COMMON
   4037             0x1D167,  // 1D167..1D169; INHERITED
   4038             0x1D16A,  // 1D16A..1D17A; COMMON
   4039             0x1D17B,  // 1D17B..1D182; INHERITED
   4040             0x1D183,  // 1D183..1D184; COMMON
   4041             0x1D185,  // 1D185..1D18B; INHERITED
   4042             0x1D18C,  // 1D18C..1D1A9; COMMON
   4043             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
   4044             0x1D1AE,  // 1D1AE..1D1FF; COMMON
   4045             0x1D200,  // 1D200..1D2FF; GREEK
   4046             0x1D300,  // 1D300..1EDFF; COMMON
   4047             0x1EE00,  // 1EE00..1EFFF; ARABIC
   4048             0x1F000,  // 1F000..1F1FF; COMMON
   4049             0x1F200,  // 1F200..1F200; HIRAGANA
   4050             0x1F201,  // 1F210..1FFFF; COMMON
   4051             0x20000,  // 20000..E0000; HAN
   4052             0xE0001,  // E0001..E00FF; COMMON
   4053             0xE0100,  // E0100..E01EF; INHERITED
   4054             0xE01F0   // E01F0..10FFFF; UNKNOWN
   4055 
   4056         };
   4057 
   4058         private static final UnicodeScript[] scripts = {
   4059             COMMON,
   4060             LATIN,
   4061             COMMON,
   4062             LATIN,
   4063             COMMON,
   4064             LATIN,
   4065             COMMON,
   4066             LATIN,
   4067             COMMON,
   4068             LATIN,
   4069             COMMON,
   4070             LATIN,
   4071             COMMON,
   4072             LATIN,
   4073             COMMON,
   4074             LATIN,
   4075             COMMON,
   4076             BOPOMOFO,
   4077             COMMON,
   4078             INHERITED,
   4079             GREEK,
   4080             COMMON,
   4081             GREEK,
   4082             COMMON,
   4083             GREEK,
   4084             COMMON,
   4085             GREEK,
   4086             COMMON,
   4087             GREEK,
   4088             COPTIC,
   4089             GREEK,
   4090             CYRILLIC,
   4091             INHERITED,
   4092             CYRILLIC,
   4093             ARMENIAN,
   4094             COMMON,
   4095             ARMENIAN,
   4096             HEBREW,
   4097             ARABIC,
   4098             COMMON,
   4099             ARABIC,
   4100             COMMON,
   4101             ARABIC,
   4102             COMMON,
   4103             ARABIC,
   4104             COMMON,
   4105             ARABIC,
   4106             INHERITED,
   4107             ARABIC,
   4108             COMMON,
   4109             ARABIC,
   4110             INHERITED,
   4111             ARABIC,
   4112             COMMON,
   4113             ARABIC,
   4114             SYRIAC,
   4115             ARABIC,
   4116             THAANA,
   4117             NKO,
   4118             SAMARITAN,
   4119             MANDAIC,
   4120             ARABIC,
   4121             DEVANAGARI,
   4122             INHERITED,
   4123             DEVANAGARI,
   4124             COMMON,
   4125             DEVANAGARI,
   4126             BENGALI,
   4127             GURMUKHI,
   4128             GUJARATI,
   4129             ORIYA,
   4130             TAMIL,
   4131             TELUGU,
   4132             KANNADA,
   4133             MALAYALAM,
   4134             SINHALA,
   4135             THAI,
   4136             COMMON,
   4137             THAI,
   4138             LAO,
   4139             TIBETAN,
   4140             COMMON,
   4141             TIBETAN,
   4142             MYANMAR,
   4143             GEORGIAN,
   4144             COMMON,
   4145             GEORGIAN,
   4146             HANGUL,
   4147             ETHIOPIC,
   4148             CHEROKEE,
   4149             CANADIAN_ABORIGINAL,
   4150             OGHAM,
   4151             RUNIC,
   4152             COMMON,
   4153             RUNIC,
   4154             TAGALOG,
   4155             HANUNOO,
   4156             COMMON,
   4157             BUHID,
   4158             TAGBANWA,
   4159             KHMER,
   4160             MONGOLIAN,
   4161             COMMON,
   4162             MONGOLIAN,
   4163             COMMON,
   4164             MONGOLIAN,
   4165             CANADIAN_ABORIGINAL,
   4166             LIMBU,
   4167             TAI_LE,
   4168             NEW_TAI_LUE,
   4169             KHMER,
   4170             BUGINESE,
   4171             TAI_THAM,
   4172             BALINESE,
   4173             SUNDANESE,
   4174             BATAK,
   4175             LEPCHA,
   4176             OL_CHIKI,
   4177             SUNDANESE,
   4178             INHERITED,
   4179             COMMON,
   4180             INHERITED,
   4181             COMMON,
   4182             INHERITED,
   4183             COMMON,
   4184             INHERITED,
   4185             COMMON,
   4186             INHERITED,
   4187             COMMON,
   4188             LATIN,
   4189             GREEK,
   4190             CYRILLIC,
   4191             LATIN,
   4192             GREEK,
   4193             LATIN,
   4194             GREEK,
   4195             LATIN,
   4196             CYRILLIC,
   4197             LATIN,
   4198             GREEK,
   4199             INHERITED,
   4200             LATIN,
   4201             GREEK,
   4202             COMMON,
   4203             INHERITED,
   4204             COMMON,
   4205             LATIN,
   4206             COMMON,
   4207             LATIN,
   4208             COMMON,
   4209             LATIN,
   4210             COMMON,
   4211             INHERITED,
   4212             COMMON,
   4213             GREEK,
   4214             COMMON,
   4215             LATIN,
   4216             COMMON,
   4217             LATIN,
   4218             COMMON,
   4219             LATIN,
   4220             COMMON,
   4221             LATIN,
   4222             COMMON,
   4223             BRAILLE,
   4224             COMMON,
   4225             GLAGOLITIC,
   4226             LATIN,
   4227             COPTIC,
   4228             GEORGIAN,
   4229             TIFINAGH,
   4230             ETHIOPIC,
   4231             CYRILLIC,
   4232             COMMON,
   4233             HAN,
   4234             COMMON,
   4235             HAN,
   4236             COMMON,
   4237             HAN,
   4238             COMMON,
   4239             HAN,
   4240             INHERITED,
   4241             HANGUL,
   4242             COMMON,
   4243             HAN,
   4244             COMMON,
   4245             HIRAGANA,
   4246             INHERITED,
   4247             COMMON,
   4248             HIRAGANA,
   4249             COMMON,
   4250             KATAKANA,
   4251             COMMON,
   4252             KATAKANA,
   4253             BOPOMOFO,
   4254             HANGUL,
   4255             COMMON,
   4256             BOPOMOFO,
   4257             COMMON,
   4258             KATAKANA,
   4259             HANGUL,
   4260             COMMON,
   4261             HANGUL,
   4262             COMMON,
   4263             KATAKANA,
   4264             COMMON,
   4265             HAN,
   4266             COMMON,
   4267             HAN,
   4268             YI,
   4269             LISU,
   4270             VAI,
   4271             CYRILLIC,
   4272             BAMUM,
   4273             COMMON,
   4274             LATIN,
   4275             COMMON,
   4276             LATIN,
   4277             SYLOTI_NAGRI,
   4278             COMMON,
   4279             PHAGS_PA,
   4280             SAURASHTRA,
   4281             DEVANAGARI,
   4282             KAYAH_LI,
   4283             REJANG,
   4284             HANGUL,
   4285             JAVANESE,
   4286             CHAM,
   4287             MYANMAR,
   4288             TAI_VIET,
   4289             MEETEI_MAYEK,
   4290             ETHIOPIC,
   4291             MEETEI_MAYEK,
   4292             HANGUL,
   4293             UNKNOWN     ,
   4294             HAN,
   4295             LATIN,
   4296             ARMENIAN,
   4297             HEBREW,
   4298             ARABIC,
   4299             COMMON,
   4300             ARABIC,
   4301             COMMON,
   4302             INHERITED,
   4303             COMMON,
   4304             INHERITED,
   4305             COMMON,
   4306             ARABIC,
   4307             COMMON,
   4308             LATIN,
   4309             COMMON,
   4310             LATIN,
   4311             COMMON,
   4312             KATAKANA,
   4313             COMMON,
   4314             KATAKANA,
   4315             COMMON,
   4316             HANGUL,
   4317             COMMON,
   4318             LINEAR_B,
   4319             COMMON,
   4320             GREEK,
   4321             COMMON,
   4322             INHERITED,
   4323             LYCIAN,
   4324             CARIAN,
   4325             OLD_ITALIC,
   4326             GOTHIC,
   4327             UGARITIC,
   4328             OLD_PERSIAN,
   4329             DESERET,
   4330             SHAVIAN,
   4331             OSMANYA,
   4332             CYPRIOT,
   4333             IMPERIAL_ARAMAIC,
   4334             PHOENICIAN,
   4335             LYDIAN,
   4336             MEROITIC_HIEROGLYPHS,
   4337             MEROITIC_CURSIVE,
   4338             KHAROSHTHI,
   4339             OLD_SOUTH_ARABIAN,
   4340             AVESTAN,
   4341             INSCRIPTIONAL_PARTHIAN,
   4342             INSCRIPTIONAL_PAHLAVI,
   4343             OLD_TURKIC,
   4344             ARABIC,
   4345             BRAHMI,
   4346             KAITHI,
   4347             SORA_SOMPENG,
   4348             CHAKMA,
   4349             SHARADA,
   4350             TAKRI,
   4351             CUNEIFORM,
   4352             EGYPTIAN_HIEROGLYPHS,
   4353             BAMUM,
   4354             MIAO,
   4355             KATAKANA,
   4356             HIRAGANA,
   4357             COMMON,
   4358             INHERITED,
   4359             COMMON,
   4360             INHERITED,
   4361             COMMON,
   4362             INHERITED,
   4363             COMMON,
   4364             INHERITED,
   4365             COMMON,
   4366             GREEK,
   4367             COMMON,
   4368             ARABIC,
   4369             COMMON,
   4370             HIRAGANA,
   4371             COMMON,
   4372             HAN,
   4373             COMMON,
   4374             INHERITED,
   4375             UNKNOWN
   4376         };
   4377 
   4378         private static HashMap<String, Character.UnicodeScript> aliases;
   4379         static {
   4380             aliases = new HashMap<>(128);
   4381             aliases.put("ARAB", ARABIC);
   4382             aliases.put("ARMI", IMPERIAL_ARAMAIC);
   4383             aliases.put("ARMN", ARMENIAN);
   4384             aliases.put("AVST", AVESTAN);
   4385             aliases.put("BALI", BALINESE);
   4386             aliases.put("BAMU", BAMUM);
   4387             aliases.put("BATK", BATAK);
   4388             aliases.put("BENG", BENGALI);
   4389             aliases.put("BOPO", BOPOMOFO);
   4390             aliases.put("BRAI", BRAILLE);
   4391             aliases.put("BRAH", BRAHMI);
   4392             aliases.put("BUGI", BUGINESE);
   4393             aliases.put("BUHD", BUHID);
   4394             aliases.put("CAKM", CHAKMA);
   4395             aliases.put("CANS", CANADIAN_ABORIGINAL);
   4396             aliases.put("CARI", CARIAN);
   4397             aliases.put("CHAM", CHAM);
   4398             aliases.put("CHER", CHEROKEE);
   4399             aliases.put("COPT", COPTIC);
   4400             aliases.put("CPRT", CYPRIOT);
   4401             aliases.put("CYRL", CYRILLIC);
   4402             aliases.put("DEVA", DEVANAGARI);
   4403             aliases.put("DSRT", DESERET);
   4404             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
   4405             aliases.put("ETHI", ETHIOPIC);
   4406             aliases.put("GEOR", GEORGIAN);
   4407             aliases.put("GLAG", GLAGOLITIC);
   4408             aliases.put("GOTH", GOTHIC);
   4409             aliases.put("GREK", GREEK);
   4410             aliases.put("GUJR", GUJARATI);
   4411             aliases.put("GURU", GURMUKHI);
   4412             aliases.put("HANG", HANGUL);
   4413             aliases.put("HANI", HAN);
   4414             aliases.put("HANO", HANUNOO);
   4415             aliases.put("HEBR", HEBREW);
   4416             aliases.put("HIRA", HIRAGANA);
   4417             // it appears we don't have the KATAKANA_OR_HIRAGANA
   4418             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
   4419             aliases.put("ITAL", OLD_ITALIC);
   4420             aliases.put("JAVA", JAVANESE);
   4421             aliases.put("KALI", KAYAH_LI);
   4422             aliases.put("KANA", KATAKANA);
   4423             aliases.put("KHAR", KHAROSHTHI);
   4424             aliases.put("KHMR", KHMER);
   4425             aliases.put("KNDA", KANNADA);
   4426             aliases.put("KTHI", KAITHI);
   4427             aliases.put("LANA", TAI_THAM);
   4428             aliases.put("LAOO", LAO);
   4429             aliases.put("LATN", LATIN);
   4430             aliases.put("LEPC", LEPCHA);
   4431             aliases.put("LIMB", LIMBU);
   4432             aliases.put("LINB", LINEAR_B);
   4433             aliases.put("LISU", LISU);
   4434             aliases.put("LYCI", LYCIAN);
   4435             aliases.put("LYDI", LYDIAN);
   4436             aliases.put("MAND", MANDAIC);
   4437             aliases.put("MERC", MEROITIC_CURSIVE);
   4438             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
   4439             aliases.put("MLYM", MALAYALAM);
   4440             aliases.put("MONG", MONGOLIAN);
   4441             aliases.put("MTEI", MEETEI_MAYEK);
   4442             aliases.put("MYMR", MYANMAR);
   4443             aliases.put("NKOO", NKO);
   4444             aliases.put("OGAM", OGHAM);
   4445             aliases.put("OLCK", OL_CHIKI);
   4446             aliases.put("ORKH", OLD_TURKIC);
   4447             aliases.put("ORYA", ORIYA);
   4448             aliases.put("OSMA", OSMANYA);
   4449             aliases.put("PHAG", PHAGS_PA);
   4450             aliases.put("PLRD", MIAO);
   4451             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
   4452             aliases.put("PHNX", PHOENICIAN);
   4453             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
   4454             aliases.put("RJNG", REJANG);
   4455             aliases.put("RUNR", RUNIC);
   4456             aliases.put("SAMR", SAMARITAN);
   4457             aliases.put("SARB", OLD_SOUTH_ARABIAN);
   4458             aliases.put("SAUR", SAURASHTRA);
   4459             aliases.put("SHAW", SHAVIAN);
   4460             aliases.put("SHRD", SHARADA);
   4461             aliases.put("SINH", SINHALA);
   4462             aliases.put("SORA", SORA_SOMPENG);
   4463             aliases.put("SUND", SUNDANESE);
   4464             aliases.put("SYLO", SYLOTI_NAGRI);
   4465             aliases.put("SYRC", SYRIAC);
   4466             aliases.put("TAGB", TAGBANWA);
   4467             aliases.put("TALE", TAI_LE);
   4468             aliases.put("TAKR", TAKRI);
   4469             aliases.put("TALU", NEW_TAI_LUE);
   4470             aliases.put("TAML", TAMIL);
   4471             aliases.put("TAVT", TAI_VIET);
   4472             aliases.put("TELU", TELUGU);
   4473             aliases.put("TFNG", TIFINAGH);
   4474             aliases.put("TGLG", TAGALOG);
   4475             aliases.put("THAA", THAANA);
   4476             aliases.put("THAI", THAI);
   4477             aliases.put("TIBT", TIBETAN);
   4478             aliases.put("UGAR", UGARITIC);
   4479             aliases.put("VAII", VAI);
   4480             aliases.put("XPEO", OLD_PERSIAN);
   4481             aliases.put("XSUX", CUNEIFORM);
   4482             aliases.put("YIII", YI);
   4483             aliases.put("ZINH", INHERITED);
   4484             aliases.put("ZYYY", COMMON);
   4485             aliases.put("ZZZZ", UNKNOWN);
   4486         }
   4487 
   4488         /**
   4489          * Returns the enum constant representing the Unicode script of which
   4490          * the given character (Unicode code point) is assigned to.
   4491          *
   4492          * @param   codePoint the character (Unicode code point) in question.
   4493          * @return  The {@code UnicodeScript} constant representing the
   4494          *          Unicode script of which this character is assigned to.
   4495          *
   4496          * @exception IllegalArgumentException if the specified
   4497          * {@code codePoint} is an invalid Unicode code point.
   4498          * @see Character#isValidCodePoint(int)
   4499          *
   4500          */
   4501         public static UnicodeScript of(int codePoint) {
   4502             if (!isValidCodePoint(codePoint))
   4503                 throw new IllegalArgumentException();
   4504             int type = getType(codePoint);
   4505             // leave SURROGATE and PRIVATE_USE for table lookup
   4506             if (type == UNASSIGNED)
   4507                 return UNKNOWN;
   4508             int index = Arrays.binarySearch(scriptStarts, codePoint);
   4509             if (index < 0)
   4510                 index = -index - 2;
   4511             return scripts[index];
   4512         }
   4513 
   4514         /**
   4515          * Returns the UnicodeScript constant with the given Unicode script
   4516          * name or the script name alias. Script names and their aliases are
   4517          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
   4518          * and PropertyValueAliases&lt;version&gt;.txt define script names
   4519          * and the script name aliases for a particular version of the
   4520          * standard. The {@link Character} class specifies the version of
   4521          * the standard that it supports.
   4522          * <p>
   4523          * Character case is ignored for all of the valid script names.
   4524          * The en_US locale's case mapping rules are used to provide
   4525          * case-insensitive string comparisons for script name validation.
   4526          * <p>
   4527          *
   4528          * @param scriptName A {@code UnicodeScript} name.
   4529          * @return The {@code UnicodeScript} constant identified
   4530          *         by {@code scriptName}
   4531          * @throws IllegalArgumentException if {@code scriptName} is an
   4532          *         invalid name
   4533          * @throws NullPointerException if {@code scriptName} is null
   4534          */
   4535         public static final UnicodeScript forName(String scriptName) {
   4536             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
   4537                                  //.replace(' ', '_'));
   4538             UnicodeScript sc = aliases.get(scriptName);
   4539             if (sc != null)
   4540                 return sc;
   4541             return valueOf(scriptName);
   4542         }
   4543     }
   4544 
   4545     /**
   4546      * The value of the {@code Character}.
   4547      *
   4548      * @serial
   4549      */
   4550     private final char value;
   4551 
   4552     /** use serialVersionUID from JDK 1.0.2 for interoperability */
   4553     private static final long serialVersionUID = 3786198910865385080L;
   4554 
   4555     /**
   4556      * Constructs a newly allocated {@code Character} object that
   4557      * represents the specified {@code char} value.
   4558      *
   4559      * @param  value   the value to be represented by the
   4560      *                  {@code Character} object.
   4561      */
   4562     public Character(char value) {
   4563         this.value = value;
   4564     }
   4565 
   4566     private static class CharacterCache {
   4567         private CharacterCache(){}
   4568 
   4569         static final Character cache[] = new Character[127 + 1];
   4570 
   4571         static {
   4572             for (int i = 0; i < cache.length; i++)
   4573                 cache[i] = new Character((char)i);
   4574         }
   4575     }
   4576 
   4577     /**
   4578      * Returns a <tt>Character</tt> instance representing the specified
   4579      * <tt>char</tt> value.
   4580      * If a new <tt>Character</tt> instance is not required, this method
   4581      * should generally be used in preference to the constructor
   4582      * {@link #Character(char)}, as this method is likely to yield
   4583      * significantly better space and time performance by caching
   4584      * frequently requested values.
   4585      *
   4586      * This method will always cache values in the range {@code
   4587      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
   4588      * cache other values outside of this range.
   4589      *
   4590      * @param  c a char value.
   4591      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
   4592      * @since  1.5
   4593      */
   4594     public static Character valueOf(char c) {
   4595         if (c <= 127) { // must cache
   4596             return CharacterCache.cache[(int)c];
   4597         }
   4598         return new Character(c);
   4599     }
   4600 
   4601     /**
   4602      * Returns the value of this {@code Character} object.
   4603      * @return  the primitive {@code char} value represented by
   4604      *          this object.
   4605      */
   4606     public char charValue() {
   4607         return value;
   4608     }
   4609 
   4610     /**
   4611      * Returns a hash code for this {@code Character}; equal to the result
   4612      * of invoking {@code charValue()}.
   4613      *
   4614      * @return a hash code value for this {@code Character}
   4615      */
   4616     @Override
   4617     public int hashCode() {
   4618         return Character.hashCode(value);
   4619     }
   4620 
   4621     /**
   4622      * Returns a hash code for a {@code char} value; compatible with
   4623      * {@code Character.hashCode()}.
   4624      *
   4625      * @since 1.8
   4626      *
   4627      * @param value The {@code char} for which to return a hash code.
   4628      * @return a hash code value for a {@code char} value.
   4629      */
   4630     public static int hashCode(char value) {
   4631         return (int)value;
   4632     }
   4633 
   4634     /**
   4635      * Compares this object against the specified object.
   4636      * The result is {@code true} if and only if the argument is not
   4637      * {@code null} and is a {@code Character} object that
   4638      * represents the same {@code char} value as this object.
   4639      *
   4640      * @param   obj   the object to compare with.
   4641      * @return  {@code true} if the objects are the same;
   4642      *          {@code false} otherwise.
   4643      */
   4644     public boolean equals(Object obj) {
   4645         if (obj instanceof Character) {
   4646             return value == ((Character)obj).charValue();
   4647         }
   4648         return false;
   4649     }
   4650 
   4651     /**
   4652      * Returns a {@code String} object representing this
   4653      * {@code Character}'s value.  The result is a string of
   4654      * length 1 whose sole component is the primitive
   4655      * {@code char} value represented by this
   4656      * {@code Character} object.
   4657      *
   4658      * @return  a string representation of this object.
   4659      */
   4660     public String toString() {
   4661         char buf[] = {value};
   4662         return String.valueOf(buf);
   4663     }
   4664 
   4665     /**
   4666      * Returns a {@code String} object representing the
   4667      * specified {@code char}.  The result is a string of length
   4668      * 1 consisting solely of the specified {@code char}.
   4669      *
   4670      * @param c the {@code char} to be converted
   4671      * @return the string representation of the specified {@code char}
   4672      * @since 1.4
   4673      */
   4674     public static String toString(char c) {
   4675         return String.valueOf(c);
   4676     }
   4677 
   4678     /**
   4679      * Determines whether the specified code point is a valid
   4680      * <a href="http://www.unicode.org/glossary/#code_point">
   4681      * Unicode code point value</a>.
   4682      *
   4683      * @param  codePoint the Unicode code point to be tested
   4684      * @return {@code true} if the specified code point value is between
   4685      *         {@link #MIN_CODE_POINT} and
   4686      *         {@link #MAX_CODE_POINT} inclusive;
   4687      *         {@code false} otherwise.
   4688      * @since  1.5
   4689      */
   4690     public static boolean isValidCodePoint(int codePoint) {
   4691         // Optimized form of:
   4692         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
   4693         int plane = codePoint >>> 16;
   4694         return plane < ((MAX_CODE_POINT + 1) >>> 16);
   4695     }
   4696 
   4697     /**
   4698      * Determines whether the specified character (Unicode code point)
   4699      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
   4700      * Such code points can be represented using a single {@code char}.
   4701      *
   4702      * @param  codePoint the character (Unicode code point) to be tested
   4703      * @return {@code true} if the specified code point is between
   4704      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
   4705      *         {@code false} otherwise.
   4706      * @since  1.7
   4707      */
   4708     public static boolean isBmpCodePoint(int codePoint) {
   4709         return codePoint >>> 16 == 0;
   4710         // Optimized form of:
   4711         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
   4712         // We consistently use logical shift (>>>) to facilitate
   4713         // additional runtime optimizations.
   4714     }
   4715 
   4716     /**
   4717      * Determines whether the specified character (Unicode code point)
   4718      * is in the <a href="#supplementary">supplementary character</a> range.
   4719      *
   4720      * @param  codePoint the character (Unicode code point) to be tested
   4721      * @return {@code true} if the specified code point is between
   4722      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
   4723      *         {@link #MAX_CODE_POINT} inclusive;
   4724      *         {@code false} otherwise.
   4725      * @since  1.5
   4726      */
   4727     public static boolean isSupplementaryCodePoint(int codePoint) {
   4728         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
   4729             && codePoint <  MAX_CODE_POINT + 1;
   4730     }
   4731 
   4732     /**
   4733      * Determines if the given {@code char} value is a
   4734      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   4735      * Unicode high-surrogate code unit</a>
   4736      * (also known as <i>leading-surrogate code unit</i>).
   4737      *
   4738      * <p>Such values do not represent characters by themselves,
   4739      * but are used in the representation of
   4740      * <a href="#supplementary">supplementary characters</a>
   4741      * in the UTF-16 encoding.
   4742      *
   4743      * @param  ch the {@code char} value to be tested.
   4744      * @return {@code true} if the {@code char} value is between
   4745      *         {@link #MIN_HIGH_SURROGATE} and
   4746      *         {@link #MAX_HIGH_SURROGATE} inclusive;
   4747      *         {@code false} otherwise.
   4748      * @see    Character#isLowSurrogate(char)
   4749      * @see    Character.UnicodeBlock#of(int)
   4750      * @since  1.5
   4751      */
   4752     public static boolean isHighSurrogate(char ch) {
   4753         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
   4754         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
   4755     }
   4756 
   4757     /**
   4758      * Determines if the given {@code char} value is a
   4759      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   4760      * Unicode low-surrogate code unit</a>
   4761      * (also known as <i>trailing-surrogate code unit</i>).
   4762      *
   4763      * <p>Such values do not represent characters by themselves,
   4764      * but are used in the representation of
   4765      * <a href="#supplementary">supplementary characters</a>
   4766      * in the UTF-16 encoding.
   4767      *
   4768      * @param  ch the {@code char} value to be tested.
   4769      * @return {@code true} if the {@code char} value is between
   4770      *         {@link #MIN_LOW_SURROGATE} and
   4771      *         {@link #MAX_LOW_SURROGATE} inclusive;
   4772      *         {@code false} otherwise.
   4773      * @see    Character#isHighSurrogate(char)
   4774      * @since  1.5
   4775      */
   4776     public static boolean isLowSurrogate(char ch) {
   4777         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
   4778     }
   4779 
   4780     /**
   4781      * Determines if the given {@code char} value is a Unicode
   4782      * <i>surrogate code unit</i>.
   4783      *
   4784      * <p>Such values do not represent characters by themselves,
   4785      * but are used in the representation of
   4786      * <a href="#supplementary">supplementary characters</a>
   4787      * in the UTF-16 encoding.
   4788      *
   4789      * <p>A char value is a surrogate code unit if and only if it is either
   4790      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
   4791      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
   4792      *
   4793      * @param  ch the {@code char} value to be tested.
   4794      * @return {@code true} if the {@code char} value is between
   4795      *         {@link #MIN_SURROGATE} and
   4796      *         {@link #MAX_SURROGATE} inclusive;
   4797      *         {@code false} otherwise.
   4798      * @since  1.7
   4799      */
   4800     public static boolean isSurrogate(char ch) {
   4801         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
   4802     }
   4803 
   4804     /**
   4805      * Determines whether the specified pair of {@code char}
   4806      * values is a valid
   4807      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   4808      * Unicode surrogate pair</a>.
   4809 
   4810      * <p>This method is equivalent to the expression:
   4811      * <blockquote><pre>{@code
   4812      * isHighSurrogate(high) && isLowSurrogate(low)
   4813      * }</pre></blockquote>
   4814      *
   4815      * @param  high the high-surrogate code value to be tested
   4816      * @param  low the low-surrogate code value to be tested
   4817      * @return {@code true} if the specified high and
   4818      * low-surrogate code values represent a valid surrogate pair;
   4819      * {@code false} otherwise.
   4820      * @since  1.5
   4821      */
   4822     public static boolean isSurrogatePair(char high, char low) {
   4823         return isHighSurrogate(high) && isLowSurrogate(low);
   4824     }
   4825 
   4826     /**
   4827      * Determines the number of {@code char} values needed to
   4828      * represent the specified character (Unicode code point). If the
   4829      * specified character is equal to or greater than 0x10000, then
   4830      * the method returns 2. Otherwise, the method returns 1.
   4831      *
   4832      * <p>This method doesn't validate the specified character to be a
   4833      * valid Unicode code point. The caller must validate the
   4834      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
   4835      * if necessary.
   4836      *
   4837      * @param   codePoint the character (Unicode code point) to be tested.
   4838      * @return  2 if the character is a valid supplementary character; 1 otherwise.
   4839      * @see     Character#isSupplementaryCodePoint(int)
   4840      * @since   1.5
   4841      */
   4842     public static int charCount(int codePoint) {
   4843         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
   4844     }
   4845 
   4846     /**
   4847      * Converts the specified surrogate pair to its supplementary code
   4848      * point value. This method does not validate the specified
   4849      * surrogate pair. The caller must validate it using {@link
   4850      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
   4851      *
   4852      * @param  high the high-surrogate code unit
   4853      * @param  low the low-surrogate code unit
   4854      * @return the supplementary code point composed from the
   4855      *         specified surrogate pair.
   4856      * @since  1.5
   4857      */
   4858     public static int toCodePoint(char high, char low) {
   4859         // Optimized form of:
   4860         // return ((high - MIN_HIGH_SURROGATE) << 10)
   4861         //         + (low - MIN_LOW_SURROGATE)
   4862         //         + MIN_SUPPLEMENTARY_CODE_POINT;
   4863         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
   4864                                        - (MIN_HIGH_SURROGATE << 10)
   4865                                        - MIN_LOW_SURROGATE);
   4866     }
   4867 
   4868     /**
   4869      * Returns the code point at the given index of the
   4870      * {@code CharSequence}. If the {@code char} value at
   4871      * the given index in the {@code CharSequence} is in the
   4872      * high-surrogate range, the following index is less than the
   4873      * length of the {@code CharSequence}, and the
   4874      * {@code char} value at the following index is in the
   4875      * low-surrogate range, then the supplementary code point
   4876      * corresponding to this surrogate pair is returned. Otherwise,
   4877      * the {@code char} value at the given index is returned.
   4878      *
   4879      * @param seq a sequence of {@code char} values (Unicode code
   4880      * units)
   4881      * @param index the index to the {@code char} values (Unicode
   4882      * code units) in {@code seq} to be converted
   4883      * @return the Unicode code point at the given index
   4884      * @exception NullPointerException if {@code seq} is null.
   4885      * @exception IndexOutOfBoundsException if the value
   4886      * {@code index} is negative or not less than
   4887      * {@link CharSequence#length() seq.length()}.
   4888      * @since  1.5
   4889      */
   4890     public static int codePointAt(CharSequence seq, int index) {
   4891         char c1 = seq.charAt(index);
   4892         if (isHighSurrogate(c1) && ++index < seq.length()) {
   4893             char c2 = seq.charAt(index);
   4894             if (isLowSurrogate(c2)) {
   4895                 return toCodePoint(c1, c2);
   4896             }
   4897         }
   4898         return c1;
   4899     }
   4900 
   4901     /**
   4902      * Returns the code point at the given index of the
   4903      * {@code char} array. If the {@code char} value at
   4904      * the given index in the {@code char} array is in the
   4905      * high-surrogate range, the following index is less than the
   4906      * length of the {@code char} array, and the
   4907      * {@code char} value at the following index is in the
   4908      * low-surrogate range, then the supplementary code point
   4909      * corresponding to this surrogate pair is returned. Otherwise,
   4910      * the {@code char} value at the given index is returned.
   4911      *
   4912      * @param a the {@code char} array
   4913      * @param index the index to the {@code char} values (Unicode
   4914      * code units) in the {@code char} array to be converted
   4915      * @return the Unicode code point at the given index
   4916      * @exception NullPointerException if {@code a} is null.
   4917      * @exception IndexOutOfBoundsException if the value
   4918      * {@code index} is negative or not less than
   4919      * the length of the {@code char} array.
   4920      * @since  1.5
   4921      */
   4922     public static int codePointAt(char[] a, int index) {
   4923         return codePointAtImpl(a, index, a.length);
   4924     }
   4925 
   4926     /**
   4927      * Returns the code point at the given index of the
   4928      * {@code char} array, where only array elements with
   4929      * {@code index} less than {@code limit} can be used. If
   4930      * the {@code char} value at the given index in the
   4931      * {@code char} array is in the high-surrogate range, the
   4932      * following index is less than the {@code limit}, and the
   4933      * {@code char} value at the following index is in the
   4934      * low-surrogate range, then the supplementary code point
   4935      * corresponding to this surrogate pair is returned. Otherwise,
   4936      * the {@code char} value at the given index is returned.
   4937      *
   4938      * @param a the {@code char} array
   4939      * @param index the index to the {@code char} values (Unicode
   4940      * code units) in the {@code char} array to be converted
   4941      * @param limit the index after the last array element that
   4942      * can be used in the {@code char} array
   4943      * @return the Unicode code point at the given index
   4944      * @exception NullPointerException if {@code a} is null.
   4945      * @exception IndexOutOfBoundsException if the {@code index}
   4946      * argument is negative or not less than the {@code limit}
   4947      * argument, or if the {@code limit} argument is negative or
   4948      * greater than the length of the {@code char} array.
   4949      * @since  1.5
   4950      */
   4951     public static int codePointAt(char[] a, int index, int limit) {
   4952         if (index >= limit || limit < 0 || limit > a.length) {
   4953             throw new IndexOutOfBoundsException();
   4954         }
   4955         return codePointAtImpl(a, index, limit);
   4956     }
   4957 
   4958     // throws ArrayIndexOutOfBoundsException if index out of bounds
   4959     static int codePointAtImpl(char[] a, int index, int limit) {
   4960         char c1 = a[index];
   4961         if (isHighSurrogate(c1) && ++index < limit) {
   4962             char c2 = a[index];
   4963             if (isLowSurrogate(c2)) {
   4964                 return toCodePoint(c1, c2);
   4965             }
   4966         }
   4967         return c1;
   4968     }
   4969 
   4970     /**
   4971      * Returns the code point preceding the given index of the
   4972      * {@code CharSequence}. If the {@code char} value at
   4973      * {@code (index - 1)} in the {@code CharSequence} is in
   4974      * the low-surrogate range, {@code (index - 2)} is not
   4975      * negative, and the {@code char} value at {@code (index - 2)}
   4976      * in the {@code CharSequence} is in the
   4977      * high-surrogate range, then the supplementary code point
   4978      * corresponding to this surrogate pair is returned. Otherwise,
   4979      * the {@code char} value at {@code (index - 1)} is
   4980      * returned.
   4981      *
   4982      * @param seq the {@code CharSequence} instance
   4983      * @param index the index following the code point that should be returned
   4984      * @return the Unicode code point value before the given index.
   4985      * @exception NullPointerException if {@code seq} is null.
   4986      * @exception IndexOutOfBoundsException if the {@code index}
   4987      * argument is less than 1 or greater than {@link
   4988      * CharSequence#length() seq.length()}.
   4989      * @since  1.5
   4990      */
   4991     public static int codePointBefore(CharSequence seq, int index) {
   4992         char c2 = seq.charAt(--index);
   4993         if (isLowSurrogate(c2) && index > 0) {
   4994             char c1 = seq.charAt(--index);
   4995             if (isHighSurrogate(c1)) {
   4996                 return toCodePoint(c1, c2);
   4997             }
   4998         }
   4999         return c2;
   5000     }
   5001 
   5002     /**
   5003      * Returns the code point preceding the given index of the
   5004      * {@code char} array. If the {@code char} value at
   5005      * {@code (index - 1)} in the {@code char} array is in
   5006      * the low-surrogate range, {@code (index - 2)} is not
   5007      * negative, and the {@code char} value at {@code (index - 2)}
   5008      * in the {@code char} array is in the
   5009      * high-surrogate range, then the supplementary code point
   5010      * corresponding to this surrogate pair is returned. Otherwise,
   5011      * the {@code char} value at {@code (index - 1)} is
   5012      * returned.
   5013      *
   5014      * @param a the {@code char} array
   5015      * @param index the index following the code point that should be returned
   5016      * @return the Unicode code point value before the given index.
   5017      * @exception NullPointerException if {@code a} is null.
   5018      * @exception IndexOutOfBoundsException if the {@code index}
   5019      * argument is less than 1 or greater than the length of the
   5020      * {@code char} array
   5021      * @since  1.5
   5022      */
   5023     public static int codePointBefore(char[] a, int index) {
   5024         return codePointBeforeImpl(a, index, 0);
   5025     }
   5026 
   5027     /**
   5028      * Returns the code point preceding the given index of the
   5029      * {@code char} array, where only array elements with
   5030      * {@code index} greater than or equal to {@code start}
   5031      * can be used. If the {@code char} value at {@code (index - 1)}
   5032      * in the {@code char} array is in the
   5033      * low-surrogate range, {@code (index - 2)} is not less than
   5034      * {@code start}, and the {@code char} value at
   5035      * {@code (index - 2)} in the {@code char} array is in
   5036      * the high-surrogate range, then the supplementary code point
   5037      * corresponding to this surrogate pair is returned. Otherwise,
   5038      * the {@code char} value at {@code (index - 1)} is
   5039      * returned.
   5040      *
   5041      * @param a the {@code char} array
   5042      * @param index the index following the code point that should be returned
   5043      * @param start the index of the first array element in the
   5044      * {@code char} array
   5045      * @return the Unicode code point value before the given index.
   5046      * @exception NullPointerException if {@code a} is null.
   5047      * @exception IndexOutOfBoundsException if the {@code index}
   5048      * argument is not greater than the {@code start} argument or
   5049      * is greater than the length of the {@code char} array, or
   5050      * if the {@code start} argument is negative or not less than
   5051      * the length of the {@code char} array.
   5052      * @since  1.5
   5053      */
   5054     public static int codePointBefore(char[] a, int index, int start) {
   5055         if (index <= start || start < 0 || start >= a.length) {
   5056             throw new IndexOutOfBoundsException();
   5057         }
   5058         return codePointBeforeImpl(a, index, start);
   5059     }
   5060 
   5061     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
   5062     static int codePointBeforeImpl(char[] a, int index, int start) {
   5063         char c2 = a[--index];
   5064         if (isLowSurrogate(c2) && index > start) {
   5065             char c1 = a[--index];
   5066             if (isHighSurrogate(c1)) {
   5067                 return toCodePoint(c1, c2);
   5068             }
   5069         }
   5070         return c2;
   5071     }
   5072 
   5073     /**
   5074      * Returns the leading surrogate (a
   5075      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   5076      * high surrogate code unit</a>) of the
   5077      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   5078      * surrogate pair</a>
   5079      * representing the specified supplementary character (Unicode
   5080      * code point) in the UTF-16 encoding.  If the specified character
   5081      * is not a
   5082      * <a href="Character.html#supplementary">supplementary character</a>,
   5083      * an unspecified {@code char} is returned.
   5084      *
   5085      * <p>If
   5086      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
   5087      * is {@code true}, then
   5088      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
   5089      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
   5090      * are also always {@code true}.
   5091      *
   5092      * @param   codePoint a supplementary character (Unicode code point)
   5093      * @return  the leading surrogate code unit used to represent the
   5094      *          character in the UTF-16 encoding
   5095      * @since   1.7
   5096      */
   5097     public static char highSurrogate(int codePoint) {
   5098         return (char) ((codePoint >>> 10)
   5099             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
   5100     }
   5101 
   5102     /**
   5103      * Returns the trailing surrogate (a
   5104      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   5105      * low surrogate code unit</a>) of the
   5106      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   5107      * surrogate pair</a>
   5108      * representing the specified supplementary character (Unicode
   5109      * code point) in the UTF-16 encoding.  If the specified character
   5110      * is not a
   5111      * <a href="Character.html#supplementary">supplementary character</a>,
   5112      * an unspecified {@code char} is returned.
   5113      *
   5114      * <p>If
   5115      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
   5116      * is {@code true}, then
   5117      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
   5118      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
   5119      * are also always {@code true}.
   5120      *
   5121      * @param   codePoint a supplementary character (Unicode code point)
   5122      * @return  the trailing surrogate code unit used to represent the
   5123      *          character in the UTF-16 encoding
   5124      * @since   1.7
   5125      */
   5126     public static char lowSurrogate(int codePoint) {
   5127         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
   5128     }
   5129 
   5130     /**
   5131      * Converts the specified character (Unicode code point) to its
   5132      * UTF-16 representation. If the specified code point is a BMP
   5133      * (Basic Multilingual Plane or Plane 0) value, the same value is
   5134      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
   5135      * specified code point is a supplementary character, its
   5136      * surrogate values are stored in {@code dst[dstIndex]}
   5137      * (high-surrogate) and {@code dst[dstIndex+1]}
   5138      * (low-surrogate), and 2 is returned.
   5139      *
   5140      * @param  codePoint the character (Unicode code point) to be converted.
   5141      * @param  dst an array of {@code char} in which the
   5142      * {@code codePoint}'s UTF-16 value is stored.
   5143      * @param dstIndex the start index into the {@code dst}
   5144      * array where the converted value is stored.
   5145      * @return 1 if the code point is a BMP code point, 2 if the
   5146      * code point is a supplementary code point.
   5147      * @exception IllegalArgumentException if the specified
   5148      * {@code codePoint} is not a valid Unicode code point.
   5149      * @exception NullPointerException if the specified {@code dst} is null.
   5150      * @exception IndexOutOfBoundsException if {@code dstIndex}
   5151      * is negative or not less than {@code dst.length}, or if
   5152      * {@code dst} at {@code dstIndex} doesn't have enough
   5153      * array element(s) to store the resulting {@code char}
   5154      * value(s). (If {@code dstIndex} is equal to
   5155      * {@code dst.length-1} and the specified
   5156      * {@code codePoint} is a supplementary character, the
   5157      * high-surrogate value is not stored in
   5158      * {@code dst[dstIndex]}.)
   5159      * @since  1.5
   5160      */
   5161     public static int toChars(int codePoint, char[] dst, int dstIndex) {
   5162         if (isBmpCodePoint(codePoint)) {
   5163             dst[dstIndex] = (char) codePoint;
   5164             return 1;
   5165         } else if (isValidCodePoint(codePoint)) {
   5166             toSurrogates(codePoint, dst, dstIndex);
   5167             return 2;
   5168         } else {
   5169             throw new IllegalArgumentException();
   5170         }
   5171     }
   5172 
   5173     /**
   5174      * Converts the specified character (Unicode code point) to its
   5175      * UTF-16 representation stored in a {@code char} array. If
   5176      * the specified code point is a BMP (Basic Multilingual Plane or
   5177      * Plane 0) value, the resulting {@code char} array has
   5178      * the same value as {@code codePoint}. If the specified code
   5179      * point is a supplementary code point, the resulting
   5180      * {@code char} array has the corresponding surrogate pair.
   5181      *
   5182      * @param  codePoint a Unicode code point
   5183      * @return a {@code char} array having
   5184      *         {@code codePoint}'s UTF-16 representation.
   5185      * @exception IllegalArgumentException if the specified
   5186      * {@code codePoint} is not a valid Unicode code point.
   5187      * @since  1.5
   5188      */
   5189     public static char[] toChars(int codePoint) {
   5190         if (isBmpCodePoint(codePoint)) {
   5191             return new char[] { (char) codePoint };
   5192         } else if (isValidCodePoint(codePoint)) {
   5193             char[] result = new char[2];
   5194             toSurrogates(codePoint, result, 0);
   5195             return result;
   5196         } else {
   5197             throw new IllegalArgumentException();
   5198         }
   5199     }
   5200 
   5201     static void toSurrogates(int codePoint, char[] dst, int index) {
   5202         // We write elements "backwards" to guarantee all-or-nothing
   5203         dst[index+1] = lowSurrogate(codePoint);
   5204         dst[index] = highSurrogate(codePoint);
   5205     }
   5206 
   5207     /**
   5208      * Returns the number of Unicode code points in the text range of
   5209      * the specified char sequence. The text range begins at the
   5210      * specified {@code beginIndex} and extends to the
   5211      * {@code char} at index {@code endIndex - 1}. Thus the
   5212      * length (in {@code char}s) of the text range is
   5213      * {@code endIndex-beginIndex}. Unpaired surrogates within
   5214      * the text range count as one code point each.
   5215      *
   5216      * @param seq the char sequence
   5217      * @param beginIndex the index to the first {@code char} of
   5218      * the text range.
   5219      * @param endIndex the index after the last {@code char} of
   5220      * the text range.
   5221      * @return the number of Unicode code points in the specified text
   5222      * range
   5223      * @exception NullPointerException if {@code seq} is null.
   5224      * @exception IndexOutOfBoundsException if the
   5225      * {@code beginIndex} is negative, or {@code endIndex}
   5226      * is larger than the length of the given sequence, or
   5227      * {@code beginIndex} is larger than {@code endIndex}.
   5228      * @since  1.5
   5229      */
   5230     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
   5231         int length = seq.length();
   5232         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
   5233             throw new IndexOutOfBoundsException();
   5234         }
   5235         int n = endIndex - beginIndex;
   5236         for (int i = beginIndex; i < endIndex; ) {
   5237             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
   5238                 isLowSurrogate(seq.charAt(i))) {
   5239                 n--;
   5240                 i++;
   5241             }
   5242         }
   5243         return n;
   5244     }
   5245 
   5246     /**
   5247      * Returns the number of Unicode code points in a subarray of the
   5248      * {@code char} array argument. The {@code offset}
   5249      * argument is the index of the first {@code char} of the
   5250      * subarray and the {@code count} argument specifies the
   5251      * length of the subarray in {@code char}s. Unpaired
   5252      * surrogates within the subarray count as one code point each.
   5253      *
   5254      * @param a the {@code char} array
   5255      * @param offset the index of the first {@code char} in the
   5256      * given {@code char} array
   5257      * @param count the length of the subarray in {@code char}s
   5258      * @return the number of Unicode code points in the specified subarray
   5259      * @exception NullPointerException if {@code a} is null.
   5260      * @exception IndexOutOfBoundsException if {@code offset} or
   5261      * {@code count} is negative, or if {@code offset +
   5262      * count} is larger than the length of the given array.
   5263      * @since  1.5
   5264      */
   5265     public static int codePointCount(char[] a, int offset, int count) {
   5266         if (count > a.length - offset || offset < 0 || count < 0) {
   5267             throw new IndexOutOfBoundsException();
   5268         }
   5269         return codePointCountImpl(a, offset, count);
   5270     }
   5271 
   5272     static int codePointCountImpl(char[] a, int offset, int count) {
   5273         int endIndex = offset + count;
   5274         int n = count;
   5275         for (int i = offset; i < endIndex; ) {
   5276             if (isHighSurrogate(a[i++]) && i < endIndex &&
   5277                 isLowSurrogate(a[i])) {
   5278                 n--;
   5279                 i++;
   5280             }
   5281         }
   5282         return n;
   5283     }
   5284 
   5285     /**
   5286      * Returns the index within the given char sequence that is offset
   5287      * from the given {@code index} by {@code codePointOffset}
   5288      * code points. Unpaired surrogates within the text range given by
   5289      * {@code index} and {@code codePointOffset} count as
   5290      * one code point each.
   5291      *
   5292      * @param seq the char sequence
   5293      * @param index the index to be offset
   5294      * @param codePointOffset the offset in code points
   5295      * @return the index within the char sequence
   5296      * @exception NullPointerException if {@code seq} is null.
   5297      * @exception IndexOutOfBoundsException if {@code index}
   5298      *   is negative or larger then the length of the char sequence,
   5299      *   or if {@code codePointOffset} is positive and the
   5300      *   subsequence starting with {@code index} has fewer than
   5301      *   {@code codePointOffset} code points, or if
   5302      *   {@code codePointOffset} is negative and the subsequence
   5303      *   before {@code index} has fewer than the absolute value
   5304      *   of {@code codePointOffset} code points.
   5305      * @since 1.5
   5306      */
   5307     public static int offsetByCodePoints(CharSequence seq, int index,
   5308                                          int codePointOffset) {
   5309         int length = seq.length();
   5310         if (index < 0 || index > length) {
   5311             throw new IndexOutOfBoundsException();
   5312         }
   5313 
   5314         int x = index;
   5315         if (codePointOffset >= 0) {
   5316             int i;
   5317             for (i = 0; x < length && i < codePointOffset; i++) {
   5318                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
   5319                     isLowSurrogate(seq.charAt(x))) {
   5320                     x++;
   5321                 }
   5322             }
   5323             if (i < codePointOffset) {
   5324                 throw new IndexOutOfBoundsException();
   5325             }
   5326         } else {
   5327             int i;
   5328             for (i = codePointOffset; x > 0 && i < 0; i++) {
   5329                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
   5330                     isHighSurrogate(seq.charAt(x-1))) {
   5331                     x--;
   5332                 }
   5333             }
   5334             if (i < 0) {
   5335                 throw new IndexOutOfBoundsException();
   5336             }
   5337         }
   5338         return x;
   5339     }
   5340 
   5341     /**
   5342      * Returns the index within the given {@code char} subarray
   5343      * that is offset from the given {@code index} by
   5344      * {@code codePointOffset} code points. The
   5345      * {@code start} and {@code count} arguments specify a
   5346      * subarray of the {@code char} array. Unpaired surrogates
   5347      * within the text range given by {@code index} and
   5348      * {@code codePointOffset} count as one code point each.
   5349      *
   5350      * @param a the {@code char} array
   5351      * @param start the index of the first {@code char} of the
   5352      * subarray
   5353      * @param count the length of the subarray in {@code char}s
   5354      * @param index the index to be offset
   5355      * @param codePointOffset the offset in code points
   5356      * @return the index within the subarray
   5357      * @exception NullPointerException if {@code a} is null.
   5358      * @exception IndexOutOfBoundsException
   5359      *   if {@code start} or {@code count} is negative,
   5360      *   or if {@code start + count} is larger than the length of
   5361      *   the given array,
   5362      *   or if {@code index} is less than {@code start} or
   5363      *   larger then {@code start + count},
   5364      *   or if {@code codePointOffset} is positive and the text range
   5365      *   starting with {@code index} and ending with {@code start + count - 1}
   5366      *   has fewer than {@code codePointOffset} code
   5367      *   points,
   5368      *   or if {@code codePointOffset} is negative and the text range
   5369      *   starting with {@code start} and ending with {@code index - 1}
   5370      *   has fewer than the absolute value of
   5371      *   {@code codePointOffset} code points.
   5372      * @since 1.5
   5373      */
   5374     public static int offsetByCodePoints(char[] a, int start, int count,
   5375                                          int index, int codePointOffset) {
   5376         if (count > a.length-start || start < 0 || count < 0
   5377             || index < start || index > start+count) {
   5378             throw new IndexOutOfBoundsException();
   5379         }
   5380         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
   5381     }
   5382 
   5383     static int offsetByCodePointsImpl(char[]a, int start, int count,
   5384                                       int index, int codePointOffset) {
   5385         int x = index;
   5386         if (codePointOffset >= 0) {
   5387             int limit = start + count;
   5388             int i;
   5389             for (i = 0; x < limit && i < codePointOffset; i++) {
   5390                 if (isHighSurrogate(a[x++]) && x < limit &&
   5391                     isLowSurrogate(a[x])) {
   5392                     x++;
   5393                 }
   5394             }
   5395             if (i < codePointOffset) {
   5396                 throw new IndexOutOfBoundsException();
   5397             }
   5398         } else {
   5399             int i;
   5400             for (i = codePointOffset; x > start && i < 0; i++) {
   5401                 if (isLowSurrogate(a[--x]) && x > start &&
   5402                     isHighSurrogate(a[x-1])) {
   5403                     x--;
   5404                 }
   5405             }
   5406             if (i < 0) {
   5407                 throw new IndexOutOfBoundsException();
   5408             }
   5409         }
   5410         return x;
   5411     }
   5412 
   5413     /**
   5414      * Determines if the specified character is a lowercase character.
   5415      * <p>
   5416      * A character is lowercase if its general category type, provided
   5417      * by {@code Character.getType(ch)}, is
   5418      * {@code LOWERCASE_LETTER}, or it has contributory property
   5419      * Other_Lowercase as defined by the Unicode Standard.
   5420      * <p>
   5421      * The following are examples of lowercase characters:
   5422      * <blockquote><pre>
   5423      * a b c d e f g h i j k l m n o p q r s t u v w x y z
   5424      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
   5425      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
   5426      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
   5427      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
   5428      * </pre></blockquote>
   5429      * <p> Many other Unicode characters are lowercase too.
   5430      *
   5431      * <p><b>Note:</b> This method cannot handle <a
   5432      * href="#supplementary"> supplementary characters</a>. To support
   5433      * all Unicode characters, including supplementary characters, use
   5434      * the {@link #isLowerCase(int)} method.
   5435      *
   5436      * @param   ch   the character to be tested.
   5437      * @return  {@code true} if the character is lowercase;
   5438      *          {@code false} otherwise.
   5439      * @see     Character#isLowerCase(char)
   5440      * @see     Character#isTitleCase(char)
   5441      * @see     Character#toLowerCase(char)
   5442      * @see     Character#getType(char)
   5443      */
   5444     public static boolean isLowerCase(char ch) {
   5445         return isLowerCase((int)ch);
   5446     }
   5447 
   5448     /**
   5449      * Determines if the specified character (Unicode code point) is a
   5450      * lowercase character.
   5451      * <p>
   5452      * A character is lowercase if its general category type, provided
   5453      * by {@link Character#getType getType(codePoint)}, is
   5454      * {@code LOWERCASE_LETTER}, or it has contributory property
   5455      * Other_Lowercase as defined by the Unicode Standard.
   5456      * <p>
   5457      * The following are examples of lowercase characters:
   5458      * <blockquote><pre>
   5459      * a b c d e f g h i j k l m n o p q r s t u v w x y z
   5460      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
   5461      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
   5462      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
   5463      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
   5464      * </pre></blockquote>
   5465      * <p> Many other Unicode characters are lowercase too.
   5466      *
   5467      * @param   codePoint the character (Unicode code point) to be tested.
   5468      * @return  {@code true} if the character is lowercase;
   5469      *          {@code false} otherwise.
   5470      * @see     Character#isLowerCase(int)
   5471      * @see     Character#isTitleCase(int)
   5472      * @see     Character#toLowerCase(int)
   5473      * @see     Character#getType(int)
   5474      * @since   1.5
   5475      */
   5476     public static boolean isLowerCase(int codePoint) {
   5477         return isLowerCaseImpl(codePoint);
   5478     }
   5479 
   5480     @FastNative
   5481     static native boolean isLowerCaseImpl(int codePoint);
   5482 
   5483     /**
   5484      * Determines if the specified character is an uppercase character.
   5485      * <p>
   5486      * A character is uppercase if its general category type, provided by
   5487      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
   5488      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
   5489      * <p>
   5490      * The following are examples of uppercase characters:
   5491      * <blockquote><pre>
   5492      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
   5493      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
   5494      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
   5495      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
   5496      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
   5497      * </pre></blockquote>
   5498      * <p> Many other Unicode characters are uppercase too.
   5499      *
   5500      * <p><b>Note:</b> This method cannot handle <a
   5501      * href="#supplementary"> supplementary characters</a>. To support
   5502      * all Unicode characters, including supplementary characters, use
   5503      * the {@link #isUpperCase(int)} method.
   5504      *
   5505      * @param   ch   the character to be tested.
   5506      * @return  {@code true} if the character is uppercase;
   5507      *          {@code false} otherwise.
   5508      * @see     Character#isLowerCase(char)
   5509      * @see     Character#isTitleCase(char)
   5510      * @see     Character#toUpperCase(char)
   5511      * @see     Character#getType(char)
   5512      * @since   1.0
   5513      */
   5514     public static boolean isUpperCase(char ch) {
   5515         return isUpperCase((int)ch);
   5516     }
   5517 
   5518     /**
   5519      * Determines if the specified character (Unicode code point) is an uppercase character.
   5520      * <p>
   5521      * A character is uppercase if its general category type, provided by
   5522      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
   5523      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
   5524      * <p>
   5525      * The following are examples of uppercase characters:
   5526      * <blockquote><pre>
   5527      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
   5528      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
   5529      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
   5530      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
   5531      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
   5532      * </pre></blockquote>
   5533      * <p> Many other Unicode characters are uppercase too.<p>
   5534      *
   5535      * @param   codePoint the character (Unicode code point) to be tested.
   5536      * @return  {@code true} if the character is uppercase;
   5537      *          {@code false} otherwise.
   5538      * @see     Character#isLowerCase(int)
   5539      * @see     Character#isTitleCase(int)
   5540      * @see     Character#toUpperCase(int)
   5541      * @see     Character#getType(int)
   5542      * @since   1.5
   5543      */
   5544     public static boolean isUpperCase(int codePoint) {
   5545         return isUpperCaseImpl(codePoint);
   5546     }
   5547 
   5548     @FastNative
   5549     static native boolean isUpperCaseImpl(int codePoint);
   5550 
   5551 
   5552     /**
   5553      * Determines if the specified character is a titlecase character.
   5554      * <p>
   5555      * A character is a titlecase character if its general
   5556      * category type, provided by {@code Character.getType(ch)},
   5557      * is {@code TITLECASE_LETTER}.
   5558      * <p>
   5559      * Some characters look like pairs of Latin letters. For example, there
   5560      * is an uppercase letter that looks like "LJ" and has a corresponding
   5561      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
   5562      * is the appropriate form to use when rendering a word in lowercase
   5563      * with initial capitals, as for a book title.
   5564      * <p>
   5565      * These are some of the Unicode characters for which this method returns
   5566      * {@code true}:
   5567      * <ul>
   5568      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
   5569      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
   5570      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
   5571      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
   5572      * </ul>
   5573      * <p> Many other Unicode characters are titlecase too.
   5574      *
   5575      * <p><b>Note:</b> This method cannot handle <a
   5576      * href="#supplementary"> supplementary characters</a>. To support
   5577      * all Unicode characters, including supplementary characters, use
   5578      * the {@link #isTitleCase(int)} method.
   5579      *
   5580      * @param   ch   the character to be tested.
   5581      * @return  {@code true} if the character is titlecase;
   5582      *          {@code false} otherwise.
   5583      * @see     Character#isLowerCase(char)
   5584      * @see     Character#isUpperCase(char)
   5585      * @see     Character#toTitleCase(char)
   5586      * @see     Character#getType(char)
   5587      * @since   1.0.2
   5588      */
   5589     public static boolean isTitleCase(char ch) {
   5590         return isTitleCase((int)ch);
   5591     }
   5592 
   5593     /**
   5594      * Determines if the specified character (Unicode code point) is a titlecase character.
   5595      * <p>
   5596      * A character is a titlecase character if its general
   5597      * category type, provided by {@link Character#getType(int) getType(codePoint)},
   5598      * is {@code TITLECASE_LETTER}.
   5599      * <p>
   5600      * Some characters look like pairs of Latin letters. For example, there
   5601      * is an uppercase letter that looks like "LJ" and has a corresponding
   5602      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
   5603      * is the appropriate form to use when rendering a word in lowercase
   5604      * with initial capitals, as for a book title.
   5605      * <p>
   5606      * These are some of the Unicode characters for which this method returns
   5607      * {@code true}:
   5608      * <ul>
   5609      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
   5610      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
   5611      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
   5612      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
   5613      * </ul>
   5614      * <p> Many other Unicode characters are titlecase too.<p>
   5615      *
   5616      * @param   codePoint the character (Unicode code point) to be tested.
   5617      * @return  {@code true} if the character is titlecase;
   5618      *          {@code false} otherwise.
   5619      * @see     Character#isLowerCase(int)
   5620      * @see     Character#isUpperCase(int)
   5621      * @see     Character#toTitleCase(int)
   5622      * @see     Character#getType(int)
   5623      * @since   1.5
   5624      */
   5625     public static boolean isTitleCase(int codePoint) {
   5626         return isTitleCaseImpl(codePoint);
   5627     }
   5628 
   5629     @FastNative
   5630     static native boolean isTitleCaseImpl(int codePoint);
   5631 
   5632     /**
   5633      * Determines if the specified character is a digit.
   5634      * <p>
   5635      * A character is a digit if its general category type, provided
   5636      * by {@code Character.getType(ch)}, is
   5637      * {@code DECIMAL_DIGIT_NUMBER}.
   5638      * <p>
   5639      * Some Unicode character ranges that contain digits:
   5640      * <ul>
   5641      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
   5642      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
   5643      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
   5644      *     Arabic-Indic digits
   5645      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
   5646      *     Extended Arabic-Indic digits
   5647      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
   5648      *     Devanagari digits
   5649      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
   5650      *     Fullwidth digits
   5651      * </ul>
   5652      *
   5653      * Many other character ranges contain digits as well.
   5654      *
   5655      * <p><b>Note:</b> This method cannot handle <a
   5656      * href="#supplementary"> supplementary characters</a>. To support
   5657      * all Unicode characters, including supplementary characters, use
   5658      * the {@link #isDigit(int)} method.
   5659      *
   5660      * @param   ch   the character to be tested.
   5661      * @return  {@code true} if the character is a digit;
   5662      *          {@code false} otherwise.
   5663      * @see     Character#digit(char, int)
   5664      * @see     Character#forDigit(int, int)
   5665      * @see     Character#getType(char)
   5666      */
   5667     public static boolean isDigit(char ch) {
   5668         return isDigit((int)ch);
   5669     }
   5670 
   5671     /**
   5672      * Determines if the specified character (Unicode code point) is a digit.
   5673      * <p>
   5674      * A character is a digit if its general category type, provided
   5675      * by {@link Character#getType(int) getType(codePoint)}, is
   5676      * {@code DECIMAL_DIGIT_NUMBER}.
   5677      * <p>
   5678      * Some Unicode character ranges that contain digits:
   5679      * <ul>
   5680      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
   5681      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
   5682      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
   5683      *     Arabic-Indic digits
   5684      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
   5685      *     Extended Arabic-Indic digits
   5686      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
   5687      *     Devanagari digits
   5688      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
   5689      *     Fullwidth digits
   5690      * </ul>
   5691      *
   5692      * Many other character ranges contain digits as well.
   5693      *
   5694      * @param   codePoint the character (Unicode code point) to be tested.
   5695      * @return  {@code true} if the character is a digit;
   5696      *          {@code false} otherwise.
   5697      * @see     Character#forDigit(int, int)
   5698      * @see     Character#getType(int)
   5699      * @since   1.5
   5700      */
   5701     public static boolean isDigit(int codePoint) {
   5702         return isDigitImpl(codePoint);
   5703     }
   5704 
   5705     @FastNative
   5706     static native boolean isDigitImpl(int codePoint);
   5707 
   5708     /**
   5709      * Determines if a character is defined in Unicode.
   5710      * <p>
   5711      * A character is defined if at least one of the following is true:
   5712      * <ul>
   5713      * <li>It has an entry in the UnicodeData file.
   5714      * <li>It has a value in a range defined by the UnicodeData file.
   5715      * </ul>
   5716      *
   5717      * <p><b>Note:</b> This method cannot handle <a
   5718      * href="#supplementary"> supplementary characters</a>. To support
   5719      * all Unicode characters, including supplementary characters, use
   5720      * the {@link #isDefined(int)} method.
   5721      *
   5722      * @param   ch   the character to be tested
   5723      * @return  {@code true} if the character has a defined meaning
   5724      *          in Unicode; {@code false} otherwise.
   5725      * @see     Character#isDigit(char)
   5726      * @see     Character#isLetter(char)
   5727      * @see     Character#isLetterOrDigit(char)
   5728      * @see     Character#isLowerCase(char)
   5729      * @see     Character#isTitleCase(char)
   5730      * @see     Character#isUpperCase(char)
   5731      * @since   1.0.2
   5732      */
   5733     public static boolean isDefined(char ch) {
   5734         return isDefined((int)ch);
   5735     }
   5736 
   5737     /**
   5738      * Determines if a character (Unicode code point) is defined in Unicode.
   5739      * <p>
   5740      * A character is defined if at least one of the following is true:
   5741      * <ul>
   5742      * <li>It has an entry in the UnicodeData file.
   5743      * <li>It has a value in a range defined by the UnicodeData file.
   5744      * </ul>
   5745      *
   5746      * @param   codePoint the character (Unicode code point) to be tested.
   5747      * @return  {@code true} if the character has a defined meaning
   5748      *          in Unicode; {@code false} otherwise.
   5749      * @see     Character#isDigit(int)
   5750      * @see     Character#isLetter(int)
   5751      * @see     Character#isLetterOrDigit(int)
   5752      * @see     Character#isLowerCase(int)
   5753      * @see     Character#isTitleCase(int)
   5754      * @see     Character#isUpperCase(int)
   5755      * @since   1.5
   5756      */
   5757     public static boolean isDefined(int codePoint) {
   5758         return isDefinedImpl(codePoint);
   5759     }
   5760 
   5761     @FastNative
   5762     static native boolean isDefinedImpl(int codePoint);
   5763 
   5764     /**
   5765      * Determines if the specified character is a letter.
   5766      * <p>
   5767      * A character is considered to be a letter if its general
   5768      * category type, provided by {@code Character.getType(ch)},
   5769      * is any of the following:
   5770      * <ul>
   5771      * <li> {@code UPPERCASE_LETTER}
   5772      * <li> {@code LOWERCASE_LETTER}
   5773      * <li> {@code TITLECASE_LETTER}
   5774      * <li> {@code MODIFIER_LETTER}
   5775      * <li> {@code OTHER_LETTER}
   5776      * </ul>
   5777      *
   5778      * Not all letters have case. Many characters are
   5779      * letters but are neither uppercase nor lowercase nor titlecase.
   5780      *
   5781      * <p><b>Note:</b> This method cannot handle <a
   5782      * href="#supplementary"> supplementary characters</a>. To support
   5783      * all Unicode characters, including supplementary characters, use
   5784      * the {@link #isLetter(int)} method.
   5785      *
   5786      * @param   ch   the character to be tested.
   5787      * @return  {@code true} if the character is a letter;
   5788      *          {@code false} otherwise.
   5789      * @see     Character#isDigit(char)
   5790      * @see     Character#isJavaIdentifierStart(char)
   5791      * @see     Character#isJavaLetter(char)
   5792      * @see     Character#isJavaLetterOrDigit(char)
   5793      * @see     Character#isLetterOrDigit(char)
   5794      * @see     Character#isLowerCase(char)
   5795      * @see     Character#isTitleCase(char)
   5796      * @see     Character#isUnicodeIdentifierStart(char)
   5797      * @see     Character#isUpperCase(char)
   5798      */
   5799     public static boolean isLetter(char ch) {
   5800         return isLetter((int)ch);
   5801     }
   5802 
   5803     /**
   5804      * Determines if the specified character (Unicode code point) is a letter.
   5805      * <p>
   5806      * A character is considered to be a letter if its general
   5807      * category type, provided by {@link Character#getType(int) getType(codePoint)},
   5808      * is any of the following:
   5809      * <ul>
   5810      * <li> {@code UPPERCASE_LETTER}
   5811      * <li> {@code LOWERCASE_LETTER}
   5812      * <li> {@code TITLECASE_LETTER}
   5813      * <li> {@code MODIFIER_LETTER}
   5814      * <li> {@code OTHER_LETTER}
   5815      * </ul>
   5816      *
   5817      * Not all letters have case. Many characters are
   5818      * letters but are neither uppercase nor lowercase nor titlecase.
   5819      *
   5820      * @param   codePoint the character (Unicode code point) to be tested.
   5821      * @return  {@code true} if the character is a letter;
   5822      *          {@code false} otherwise.
   5823      * @see     Character#isDigit(int)
   5824      * @see     Character#isJavaIdentifierStart(int)
   5825      * @see     Character#isLetterOrDigit(int)
   5826      * @see     Character#isLowerCase(int)
   5827      * @see     Character#isTitleCase(int)
   5828      * @see     Character#isUnicodeIdentifierStart(int)
   5829      * @see     Character#isUpperCase(int)
   5830      * @since   1.5
   5831      */
   5832     public static boolean isLetter(int codePoint) {
   5833         return isLetterImpl(codePoint);
   5834     }
   5835 
   5836     @FastNative
   5837     static native boolean isLetterImpl(int codePoint);
   5838 
   5839     /**
   5840      * Determines if the specified character is a letter or digit.
   5841      * <p>
   5842      * A character is considered to be a letter or digit if either
   5843      * {@code Character.isLetter(char ch)} or
   5844      * {@code Character.isDigit(char ch)} returns
   5845      * {@code true} for the character.
   5846      *
   5847      * <p><b>Note:</b> This method cannot handle <a
   5848      * href="#supplementary"> supplementary characters</a>. To support
   5849      * all Unicode characters, including supplementary characters, use
   5850      * the {@link #isLetterOrDigit(int)} method.
   5851      *
   5852      * @param   ch   the character to be tested.
   5853      * @return  {@code true} if the character is a letter or digit;
   5854      *          {@code false} otherwise.
   5855      * @see     Character#isDigit(char)
   5856      * @see     Character#isJavaIdentifierPart(char)
   5857      * @see     Character#isJavaLetter(char)
   5858      * @see     Character#isJavaLetterOrDigit(char)
   5859      * @see     Character#isLetter(char)
   5860      * @see     Character#isUnicodeIdentifierPart(char)
   5861      * @since   1.0.2
   5862      */
   5863     public static boolean isLetterOrDigit(char ch) {
   5864         return isLetterOrDigit((int)ch);
   5865     }
   5866 
   5867     /**
   5868      * Determines if the specified character (Unicode code point) is a letter or digit.
   5869      * <p>
   5870      * A character is considered to be a letter or digit if either
   5871      * {@link #isLetter(int) isLetter(codePoint)} or
   5872      * {@link #isDigit(int) isDigit(codePoint)} returns
   5873      * {@code true} for the character.
   5874      *
   5875      * @param   codePoint the character (Unicode code point) to be tested.
   5876      * @return  {@code true} if the character is a letter or digit;
   5877      *          {@code false} otherwise.
   5878      * @see     Character#isDigit(int)
   5879      * @see     Character#isJavaIdentifierPart(int)
   5880      * @see     Character#isLetter(int)
   5881      * @see     Character#isUnicodeIdentifierPart(int)
   5882      * @since   1.5
   5883      */
   5884     public static boolean isLetterOrDigit(int codePoint) {
   5885         return isLetterOrDigitImpl(codePoint);
   5886     }
   5887 
   5888     @FastNative
   5889     static native boolean isLetterOrDigitImpl(int codePoint);
   5890 
   5891     /**
   5892      * Determines if the specified character is permissible as the first
   5893      * character in a Java identifier.
   5894      * <p>
   5895      * A character may start a Java identifier if and only if
   5896      * one of the following is true:
   5897      * <ul>
   5898      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
   5899      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
   5900      * <li> {@code ch} is a currency symbol (such as {@code '$'})
   5901      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
   5902      * </ul>
   5903      *
   5904      * @param   ch the character to be tested.
   5905      * @return  {@code true} if the character may start a Java
   5906      *          identifier; {@code false} otherwise.
   5907      * @see     Character#isJavaLetterOrDigit(char)
   5908      * @see     Character#isJavaIdentifierStart(char)
   5909      * @see     Character#isJavaIdentifierPart(char)
   5910      * @see     Character#isLetter(char)
   5911      * @see     Character#isLetterOrDigit(char)
   5912      * @see     Character#isUnicodeIdentifierStart(char)
   5913      * @since   1.02
   5914      * @deprecated Replaced by isJavaIdentifierStart(char).
   5915      */
   5916     @Deprecated
   5917     public static boolean isJavaLetter(char ch) {
   5918         return isJavaIdentifierStart(ch);
   5919     }
   5920 
   5921     /**
   5922      * Determines if the specified character may be part of a Java
   5923      * identifier as other than the first character.
   5924      * <p>
   5925      * A character may be part of a Java identifier if and only if any
   5926      * of the following are true:
   5927      * <ul>
   5928      * <li>  it is a letter
   5929      * <li>  it is a currency symbol (such as {@code '$'})
   5930      * <li>  it is a connecting punctuation character (such as {@code '_'})
   5931      * <li>  it is a digit
   5932      * <li>  it is a numeric letter (such as a Roman numeral character)
   5933      * <li>  it is a combining mark
   5934      * <li>  it is a non-spacing mark
   5935      * <li> {@code isIdentifierIgnorable} returns
   5936      * {@code true} for the character.
   5937      * </ul>
   5938      *
   5939      * @param   ch the character to be tested.
   5940      * @return  {@code true} if the character may be part of a
   5941      *          Java identifier; {@code false} otherwise.
   5942      * @see     Character#isJavaLetter(char)
   5943      * @see     Character#isJavaIdentifierStart(char)
   5944      * @see     Character#isJavaIdentifierPart(char)
   5945      * @see     Character#isLetter(char)
   5946      * @see     Character#isLetterOrDigit(char)
   5947      * @see     Character#isUnicodeIdentifierPart(char)
   5948      * @see     Character#isIdentifierIgnorable(char)
   5949      * @since   1.02
   5950      * @deprecated Replaced by isJavaIdentifierPart(char).
   5951      */
   5952     @Deprecated
   5953     public static boolean isJavaLetterOrDigit(char ch) {
   5954         return isJavaIdentifierPart(ch);
   5955     }
   5956 
   5957     /**
   5958      * Determines if the specified character (Unicode code point) is an alphabet.
   5959      * <p>
   5960      * A character is considered to be alphabetic if its general category type,
   5961      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
   5962      * the following:
   5963      * <ul>
   5964      * <li> <code>UPPERCASE_LETTER</code>
   5965      * <li> <code>LOWERCASE_LETTER</code>
   5966      * <li> <code>TITLECASE_LETTER</code>
   5967      * <li> <code>MODIFIER_LETTER</code>
   5968      * <li> <code>OTHER_LETTER</code>
   5969      * <li> <code>LETTER_NUMBER</code>
   5970      * </ul>
   5971      * or it has contributory property Other_Alphabetic as defined by the
   5972      * Unicode Standard.
   5973      *
   5974      * @param   codePoint the character (Unicode code point) to be tested.
   5975      * @return  <code>true</code> if the character is a Unicode alphabet
   5976      *          character, <code>false</code> otherwise.
   5977      * @since   1.7
   5978      */
   5979     public static boolean isAlphabetic(int codePoint) {
   5980         return isAlphabeticImpl(codePoint);
   5981     }
   5982 
   5983     @FastNative
   5984     static native boolean isAlphabeticImpl(int codePoint);
   5985 
   5986 
   5987     /**
   5988      * Determines if the specified character (Unicode code point) is a CJKV
   5989      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
   5990      * the Unicode Standard.
   5991      *
   5992      * @param   codePoint the character (Unicode code point) to be tested.
   5993      * @return  <code>true</code> if the character is a Unicode ideograph
   5994      *          character, <code>false</code> otherwise.
   5995      * @since   1.7
   5996      */
   5997     public static boolean isIdeographic(int codePoint) {
   5998         return isIdeographicImpl(codePoint);
   5999     }
   6000     @FastNative
   6001     static native boolean isIdeographicImpl(int codePoint);
   6002 
   6003     /**
   6004      * Determines if the specified character is
   6005      * permissible as the first character in a Java identifier.
   6006      * <p>
   6007      * A character may start a Java identifier if and only if
   6008      * one of the following conditions is true:
   6009      * <ul>
   6010      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
   6011      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
   6012      * <li> {@code ch} is a currency symbol (such as {@code '$'})
   6013      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
   6014      * </ul>
   6015      *
   6016      * <p><b>Note:</b> This method cannot handle <a
   6017      * href="#supplementary"> supplementary characters</a>. To support
   6018      * all Unicode characters, including supplementary characters, use
   6019      * the {@link #isJavaIdentifierStart(int)} method.
   6020      *
   6021      * @param   ch the character to be tested.
   6022      * @return  {@code true} if the character may start a Java identifier;
   6023      *          {@code false} otherwise.
   6024      * @see     Character#isJavaIdentifierPart(char)
   6025      * @see     Character#isLetter(char)
   6026      * @see     Character#isUnicodeIdentifierStart(char)
   6027      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   6028      * @since   1.1
   6029      */
   6030     public static boolean isJavaIdentifierStart(char ch) {
   6031         return isJavaIdentifierStart((int)ch);
   6032     }
   6033 
   6034     /**
   6035      * Determines if the character (Unicode code point) is
   6036      * permissible as the first character in a Java identifier.
   6037      * <p>
   6038      * A character may start a Java identifier if and only if
   6039      * one of the following conditions is true:
   6040      * <ul>
   6041      * <li> {@link #isLetter(int) isLetter(codePoint)}
   6042      *      returns {@code true}
   6043      * <li> {@link #getType(int) getType(codePoint)}
   6044      *      returns {@code LETTER_NUMBER}
   6045      * <li> the referenced character is a currency symbol (such as {@code '$'})
   6046      * <li> the referenced character is a connecting punctuation character
   6047      *      (such as {@code '_'}).
   6048      * </ul>
   6049      *
   6050      * @param   codePoint the character (Unicode code point) to be tested.
   6051      * @return  {@code true} if the character may start a Java identifier;
   6052      *          {@code false} otherwise.
   6053      * @see     Character#isJavaIdentifierPart(int)
   6054      * @see     Character#isLetter(int)
   6055      * @see     Character#isUnicodeIdentifierStart(int)
   6056      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   6057      * @since   1.5
   6058      */
   6059     public static boolean isJavaIdentifierStart(int codePoint) {
   6060         // Use precomputed bitmasks to optimize the ASCII range.
   6061         if (codePoint < 64) {
   6062             return (codePoint == '$'); // There's only one character in this range.
   6063         } else if (codePoint < 128) {
   6064             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   6065         }
   6066         return ((1 << getType(codePoint))
   6067                 & ((1 << UPPERCASE_LETTER)
   6068                    | (1 << LOWERCASE_LETTER)
   6069                    | (1  << TITLECASE_LETTER)
   6070                    | (1  << MODIFIER_LETTER)
   6071                    | (1  << OTHER_LETTER)
   6072                    | (1  << CURRENCY_SYMBOL)
   6073                    | (1  << CONNECTOR_PUNCTUATION)
   6074                    | (1  << LETTER_NUMBER))) != 0;
   6075     }
   6076 
   6077     /**
   6078      * Determines if the specified character may be part of a Java
   6079      * identifier as other than the first character.
   6080      * <p>
   6081      * A character may be part of a Java identifier if any of the following
   6082      * are true:
   6083      * <ul>
   6084      * <li>  it is a letter
   6085      * <li>  it is a currency symbol (such as {@code '$'})
   6086      * <li>  it is a connecting punctuation character (such as {@code '_'})
   6087      * <li>  it is a digit
   6088      * <li>  it is a numeric letter (such as a Roman numeral character)
   6089      * <li>  it is a combining mark
   6090      * <li>  it is a non-spacing mark
   6091      * <li> {@code isIdentifierIgnorable} returns
   6092      * {@code true} for the character
   6093      * </ul>
   6094      *
   6095      * <p><b>Note:</b> This method cannot handle <a
   6096      * href="#supplementary"> supplementary characters</a>. To support
   6097      * all Unicode characters, including supplementary characters, use
   6098      * the {@link #isJavaIdentifierPart(int)} method.
   6099      *
   6100      * @param   ch      the character to be tested.
   6101      * @return {@code true} if the character may be part of a
   6102      *          Java identifier; {@code false} otherwise.
   6103      * @see     Character#isIdentifierIgnorable(char)
   6104      * @see     Character#isJavaIdentifierStart(char)
   6105      * @see     Character#isLetterOrDigit(char)
   6106      * @see     Character#isUnicodeIdentifierPart(char)
   6107      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   6108      * @since   1.1
   6109      */
   6110     public static boolean isJavaIdentifierPart(char ch) {
   6111         return isJavaIdentifierPart((int)ch);
   6112     }
   6113 
   6114     /**
   6115      * Determines if the character (Unicode code point) may be part of a Java
   6116      * identifier as other than the first character.
   6117      * <p>
   6118      * A character may be part of a Java identifier if any of the following
   6119      * are true:
   6120      * <ul>
   6121      * <li>  it is a letter
   6122      * <li>  it is a currency symbol (such as {@code '$'})
   6123      * <li>  it is a connecting punctuation character (such as {@code '_'})
   6124      * <li>  it is a digit
   6125      * <li>  it is a numeric letter (such as a Roman numeral character)
   6126      * <li>  it is a combining mark
   6127      * <li>  it is a non-spacing mark
   6128      * <li> {@link #isIdentifierIgnorable(int)
   6129      * isIdentifierIgnorable(codePoint)} returns {@code true} for
   6130      * the character
   6131      * </ul>
   6132      *
   6133      * @param   codePoint the character (Unicode code point) to be tested.
   6134      * @return {@code true} if the character may be part of a
   6135      *          Java identifier; {@code false} otherwise.
   6136      * @see     Character#isIdentifierIgnorable(int)
   6137      * @see     Character#isJavaIdentifierStart(int)
   6138      * @see     Character#isLetterOrDigit(int)
   6139      * @see     Character#isUnicodeIdentifierPart(int)
   6140      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
   6141      * @since   1.5
   6142      */
   6143     public static boolean isJavaIdentifierPart(int codePoint) {
   6144         // Use precomputed bitmasks to optimize the ASCII range.
   6145         if (codePoint < 64) {
   6146             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
   6147         } else if (codePoint < 128) {
   6148             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   6149         }
   6150         return ((1 << getType(codePoint))
   6151                 & ((1 << UPPERCASE_LETTER)
   6152                    | (1 << LOWERCASE_LETTER)
   6153                    | (1 << TITLECASE_LETTER)
   6154                    | (1 << MODIFIER_LETTER)
   6155                    | (1 << OTHER_LETTER)
   6156                    | (1 << CURRENCY_SYMBOL)
   6157                    | (1 << CONNECTOR_PUNCTUATION)
   6158                    | (1 << DECIMAL_DIGIT_NUMBER)
   6159                    | (1 << LETTER_NUMBER)
   6160                    | (1 << FORMAT)
   6161                    | (1 << COMBINING_SPACING_MARK)
   6162                    | (1 << NON_SPACING_MARK))) != 0
   6163                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
   6164                 || (codePoint >= 0x7f && codePoint <= 0x9f);
   6165     }
   6166 
   6167     /**
   6168      * Determines if the specified character is permissible as the
   6169      * first character in a Unicode identifier.
   6170      * <p>
   6171      * A character may start a Unicode identifier if and only if
   6172      * one of the following conditions is true:
   6173      * <ul>
   6174      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
   6175      * <li> {@link #getType(char) getType(ch)} returns
   6176      *      {@code LETTER_NUMBER}.
   6177      * </ul>
   6178      *
   6179      * <p><b>Note:</b> This method cannot handle <a
   6180      * href="#supplementary"> supplementary characters</a>. To support
   6181      * all Unicode characters, including supplementary characters, use
   6182      * the {@link #isUnicodeIdentifierStart(int)} method.
   6183      *
   6184      * @param   ch      the character to be tested.
   6185      * @return  {@code true} if the character may start a Unicode
   6186      *          identifier; {@code false} otherwise.
   6187      * @see     Character#isJavaIdentifierStart(char)
   6188      * @see     Character#isLetter(char)
   6189      * @see     Character#isUnicodeIdentifierPart(char)
   6190      * @since   1.1
   6191      */
   6192     public static boolean isUnicodeIdentifierStart(char ch) {
   6193         return isUnicodeIdentifierStart((int)ch);
   6194     }
   6195 
   6196     /**
   6197      * Determines if the specified character (Unicode code point) is permissible as the
   6198      * first character in a Unicode identifier.
   6199      * <p>
   6200      * A character may start a Unicode identifier if and only if
   6201      * one of the following conditions is true:
   6202      * <ul>
   6203      * <li> {@link #isLetter(int) isLetter(codePoint)}
   6204      *      returns {@code true}
   6205      * <li> {@link #getType(int) getType(codePoint)}
   6206      *      returns {@code LETTER_NUMBER}.
   6207      * </ul>
   6208      * @param   codePoint the character (Unicode code point) to be tested.
   6209      * @return  {@code true} if the character may start a Unicode
   6210      *          identifier; {@code false} otherwise.
   6211      * @see     Character#isJavaIdentifierStart(int)
   6212      * @see     Character#isLetter(int)
   6213      * @see     Character#isUnicodeIdentifierPart(int)
   6214      * @since   1.5
   6215      */
   6216     public static boolean isUnicodeIdentifierStart(int codePoint) {
   6217         return isUnicodeIdentifierStartImpl(codePoint);
   6218     }
   6219 
   6220     @FastNative
   6221     static native boolean isUnicodeIdentifierStartImpl(int codePoint);
   6222 
   6223     /**
   6224      * Determines if the specified character may be part of a Unicode
   6225      * identifier as other than the first character.
   6226      * <p>
   6227      * A character may be part of a Unicode identifier if and only if
   6228      * one of the following statements is true:
   6229      * <ul>
   6230      * <li>  it is a letter
   6231      * <li>  it is a connecting punctuation character (such as {@code '_'})
   6232      * <li>  it is a digit
   6233      * <li>  it is a numeric letter (such as a Roman numeral character)
   6234      * <li>  it is a combining mark
   6235      * <li>  it is a non-spacing mark
   6236      * <li> {@code isIdentifierIgnorable} returns
   6237      * {@code true} for this character.
   6238      * </ul>
   6239      *
   6240      * <p><b>Note:</b> This method cannot handle <a
   6241      * href="#supplementary"> supplementary characters</a>. To support
   6242      * all Unicode characters, including supplementary characters, use
   6243      * the {@link #isUnicodeIdentifierPart(int)} method.
   6244      *
   6245      * @param   ch      the character to be tested.
   6246      * @return  {@code true} if the character may be part of a
   6247      *          Unicode identifier; {@code false} otherwise.
   6248      * @see     Character#isIdentifierIgnorable(char)
   6249      * @see     Character#isJavaIdentifierPart(char)
   6250      * @see     Character#isLetterOrDigit(char)
   6251      * @see     Character#isUnicodeIdentifierStart(char)
   6252      * @since   1.1
   6253      */
   6254     public static boolean isUnicodeIdentifierPart(char ch) {
   6255         return isUnicodeIdentifierPart((int)ch);
   6256     }
   6257 
   6258     /**
   6259      * Determines if the specified character (Unicode code point) may be part of a Unicode
   6260      * identifier as other than the first character.
   6261      * <p>
   6262      * A character may be part of a Unicode identifier if and only if
   6263      * one of the following statements is true:
   6264      * <ul>
   6265      * <li>  it is a letter
   6266      * <li>  it is a connecting punctuation character (such as {@code '_'})
   6267      * <li>  it is a digit
   6268      * <li>  it is a numeric letter (such as a Roman numeral character)
   6269      * <li>  it is a combining mark
   6270      * <li>  it is a non-spacing mark
   6271      * <li> {@code isIdentifierIgnorable} returns
   6272      * {@code true} for this character.
   6273      * </ul>
   6274      * @param   codePoint the character (Unicode code point) to be tested.
   6275      * @return  {@code true} if the character may be part of a
   6276      *          Unicode identifier; {@code false} otherwise.
   6277      * @see     Character#isIdentifierIgnorable(int)
   6278      * @see     Character#isJavaIdentifierPart(int)
   6279      * @see     Character#isLetterOrDigit(int)
   6280      * @see     Character#isUnicodeIdentifierStart(int)
   6281      * @since   1.5
   6282      */
   6283     public static boolean isUnicodeIdentifierPart(int codePoint) {
   6284         return isUnicodeIdentifierPartImpl(codePoint);
   6285     }
   6286 
   6287     @FastNative
   6288     static native boolean isUnicodeIdentifierPartImpl(int codePoint);
   6289 
   6290     /**
   6291      * Determines if the specified character should be regarded as
   6292      * an ignorable character in a Java identifier or a Unicode identifier.
   6293      * <p>
   6294      * The following Unicode characters are ignorable in a Java identifier
   6295      * or a Unicode identifier:
   6296      * <ul>
   6297      * <li>ISO control characters that are not whitespace
   6298      * <ul>
   6299      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
   6300      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
   6301      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
   6302      * </ul>
   6303      *
   6304      * <li>all characters that have the {@code FORMAT} general
   6305      * category value
   6306      * </ul>
   6307      *
   6308      * <p><b>Note:</b> This method cannot handle <a
   6309      * href="#supplementary"> supplementary characters</a>. To support
   6310      * all Unicode characters, including supplementary characters, use
   6311      * the {@link #isIdentifierIgnorable(int)} method.
   6312      *
   6313      * @param   ch      the character to be tested.
   6314      * @return  {@code true} if the character is an ignorable control
   6315      *          character that may be part of a Java or Unicode identifier;
   6316      *           {@code false} otherwise.
   6317      * @see     Character#isJavaIdentifierPart(char)
   6318      * @see     Character#isUnicodeIdentifierPart(char)
   6319      * @since   1.1
   6320      */
   6321     public static boolean isIdentifierIgnorable(char ch) {
   6322         return isIdentifierIgnorable((int)ch);
   6323     }
   6324 
   6325     /**
   6326      * Determines if the specified character (Unicode code point) should be regarded as
   6327      * an ignorable character in a Java identifier or a Unicode identifier.
   6328      * <p>
   6329      * The following Unicode characters are ignorable in a Java identifier
   6330      * or a Unicode identifier:
   6331      * <ul>
   6332      * <li>ISO control characters that are not whitespace
   6333      * <ul>
   6334      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
   6335      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
   6336      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
   6337      * </ul>
   6338      *
   6339      * <li>all characters that have the {@code FORMAT} general
   6340      * category value
   6341      * </ul>
   6342      *
   6343      * @param   codePoint the character (Unicode code point) to be tested.
   6344      * @return  {@code true} if the character is an ignorable control
   6345      *          character that may be part of a Java or Unicode identifier;
   6346      *          {@code false} otherwise.
   6347      * @see     Character#isJavaIdentifierPart(int)
   6348      * @see     Character#isUnicodeIdentifierPart(int)
   6349      * @since   1.5
   6350      */
   6351     public static boolean isIdentifierIgnorable(int codePoint) {
   6352         return isIdentifierIgnorableImpl(codePoint);
   6353     }
   6354 
   6355     @FastNative
   6356     static native boolean isIdentifierIgnorableImpl(int codePoint);
   6357 
   6358     /**
   6359      * Converts the character argument to lowercase using case
   6360      * mapping information from the UnicodeData file.
   6361      * <p>
   6362      * Note that
   6363      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
   6364      * does not always return {@code true} for some ranges of
   6365      * characters, particularly those that are symbols or ideographs.
   6366      *
   6367      * <p>In general, {@link String#toLowerCase()} should be used to map
   6368      * characters to lowercase. {@code String} case mapping methods
   6369      * have several benefits over {@code Character} case mapping methods.
   6370      * {@code String} case mapping methods can perform locale-sensitive
   6371      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6372      * the {@code Character} case mapping methods cannot.
   6373      *
   6374      * <p><b>Note:</b> This method cannot handle <a
   6375      * href="#supplementary"> supplementary characters</a>. To support
   6376      * all Unicode characters, including supplementary characters, use
   6377      * the {@link #toLowerCase(int)} method.
   6378      *
   6379      * @param   ch   the character to be converted.
   6380      * @return  the lowercase equivalent of the character, if any;
   6381      *          otherwise, the character itself.
   6382      * @see     Character#isLowerCase(char)
   6383      * @see     String#toLowerCase()
   6384      */
   6385     public static char toLowerCase(char ch) {
   6386         return (char)toLowerCase((int)ch);
   6387     }
   6388 
   6389     /**
   6390      * Converts the character (Unicode code point) argument to
   6391      * lowercase using case mapping information from the UnicodeData
   6392      * file.
   6393      *
   6394      * <p> Note that
   6395      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
   6396      * does not always return {@code true} for some ranges of
   6397      * characters, particularly those that are symbols or ideographs.
   6398      *
   6399      * <p>In general, {@link String#toLowerCase()} should be used to map
   6400      * characters to lowercase. {@code String} case mapping methods
   6401      * have several benefits over {@code Character} case mapping methods.
   6402      * {@code String} case mapping methods can perform locale-sensitive
   6403      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6404      * the {@code Character} case mapping methods cannot.
   6405      *
   6406      * @param   codePoint   the character (Unicode code point) to be converted.
   6407      * @return  the lowercase equivalent of the character (Unicode code
   6408      *          point), if any; otherwise, the character itself.
   6409      * @see     Character#isLowerCase(int)
   6410      * @see     String#toLowerCase()
   6411      *
   6412      * @since   1.5
   6413      */
   6414     public static int toLowerCase(int codePoint) {
   6415         if (codePoint >= 'A' && codePoint <= 'Z') {
   6416             return codePoint + ('a' - 'A');
   6417         }
   6418 
   6419         // All ASCII codepoints except the ones above remain unchanged.
   6420         if (codePoint < 0x80) {
   6421             return codePoint;
   6422         }
   6423 
   6424         return toLowerCaseImpl(codePoint);
   6425     }
   6426 
   6427     @FastNative
   6428     static native int toLowerCaseImpl(int codePoint);
   6429 
   6430     /**
   6431      * Converts the character argument to uppercase using case mapping
   6432      * information from the UnicodeData file.
   6433      * <p>
   6434      * Note that
   6435      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
   6436      * does not always return {@code true} for some ranges of
   6437      * characters, particularly those that are symbols or ideographs.
   6438      *
   6439      * <p>In general, {@link String#toUpperCase()} should be used to map
   6440      * characters to uppercase. {@code String} case mapping methods
   6441      * have several benefits over {@code Character} case mapping methods.
   6442      * {@code String} case mapping methods can perform locale-sensitive
   6443      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6444      * the {@code Character} case mapping methods cannot.
   6445      *
   6446      * <p><b>Note:</b> This method cannot handle <a
   6447      * href="#supplementary"> supplementary characters</a>. To support
   6448      * all Unicode characters, including supplementary characters, use
   6449      * the {@link #toUpperCase(int)} method.
   6450      *
   6451      * @param   ch   the character to be converted.
   6452      * @return  the uppercase equivalent of the character, if any;
   6453      *          otherwise, the character itself.
   6454      * @see     Character#isUpperCase(char)
   6455      * @see     String#toUpperCase()
   6456      */
   6457     public static char toUpperCase(char ch) {
   6458         return (char)toUpperCase((int)ch);
   6459     }
   6460 
   6461     /**
   6462      * Converts the character (Unicode code point) argument to
   6463      * uppercase using case mapping information from the UnicodeData
   6464      * file.
   6465      *
   6466      * <p>Note that
   6467      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
   6468      * does not always return {@code true} for some ranges of
   6469      * characters, particularly those that are symbols or ideographs.
   6470      *
   6471      * <p>In general, {@link String#toUpperCase()} should be used to map
   6472      * characters to uppercase. {@code String} case mapping methods
   6473      * have several benefits over {@code Character} case mapping methods.
   6474      * {@code String} case mapping methods can perform locale-sensitive
   6475      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
   6476      * the {@code Character} case mapping methods cannot.
   6477      *
   6478      * @param   codePoint   the character (Unicode code point) to be converted.
   6479      * @return  the uppercase equivalent of the character, if any;
   6480      *          otherwise, the character itself.
   6481      * @see     Character#isUpperCase(int)
   6482      * @see     String#toUpperCase()
   6483      *
   6484      * @since   1.5
   6485      */
   6486     public static int toUpperCase(int codePoint) {
   6487         if (codePoint >= 'a' && codePoint <= 'z') {
   6488             return codePoint - ('a' - 'A');
   6489         }
   6490 
   6491         // All ASCII codepoints except the ones above remain unchanged.
   6492         if (codePoint < 0x80) {
   6493             return codePoint;
   6494         }
   6495 
   6496         return toUpperCaseImpl(codePoint);
   6497     }
   6498 
   6499     @FastNative
   6500     static native int toUpperCaseImpl(int codePoint);
   6501 
   6502     /**
   6503      * Converts the character argument to titlecase using case mapping
   6504      * information from the UnicodeData file. If a character has no
   6505      * explicit titlecase mapping and is not itself a titlecase char
   6506      * according to UnicodeData, then the uppercase mapping is
   6507      * returned as an equivalent titlecase mapping. If the
   6508      * {@code char} argument is already a titlecase
   6509      * {@code char}, the same {@code char} value will be
   6510      * returned.
   6511      * <p>
   6512      * Note that
   6513      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
   6514      * does not always return {@code true} for some ranges of
   6515      * characters.
   6516      *
   6517      * <p><b>Note:</b> This method cannot handle <a
   6518      * href="#supplementary"> supplementary characters</a>. To support
   6519      * all Unicode characters, including supplementary characters, use
   6520      * the {@link #toTitleCase(int)} method.
   6521      *
   6522      * @param   ch   the character to be converted.
   6523      * @return  the titlecase equivalent of the character, if any;
   6524      *          otherwise, the character itself.
   6525      * @see     Character#isTitleCase(char)
   6526      * @see     Character#toLowerCase(char)
   6527      * @see     Character#toUpperCase(char)
   6528      * @since   1.0.2
   6529      */
   6530     public static char toTitleCase(char ch) {
   6531         return (char)toTitleCase((int)ch);
   6532     }
   6533 
   6534     /**
   6535      * Converts the character (Unicode code point) argument to titlecase using case mapping
   6536      * information from the UnicodeData file. If a character has no
   6537      * explicit titlecase mapping and is not itself a titlecase char
   6538      * according to UnicodeData, then the uppercase mapping is
   6539      * returned as an equivalent titlecase mapping. If the
   6540      * character argument is already a titlecase
   6541      * character, the same character value will be
   6542      * returned.
   6543      *
   6544      * <p>Note that
   6545      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
   6546      * does not always return {@code true} for some ranges of
   6547      * characters.
   6548      *
   6549      * @param   codePoint   the character (Unicode code point) to be converted.
   6550      * @return  the titlecase equivalent of the character, if any;
   6551      *          otherwise, the character itself.
   6552      * @see     Character#isTitleCase(int)
   6553      * @see     Character#toLowerCase(int)
   6554      * @see     Character#toUpperCase(int)
   6555      * @since   1.5
   6556      */
   6557     public static int toTitleCase(int codePoint) {
   6558         return toTitleCaseImpl(codePoint);
   6559     }
   6560 
   6561     @FastNative
   6562     static native int toTitleCaseImpl(int codePoint);
   6563 
   6564     /**
   6565      * Returns the numeric value of the character {@code ch} in the
   6566      * specified radix.
   6567      * <p>
   6568      * If the radix is not in the range {@code MIN_RADIX} &le;
   6569      * {@code radix} &le; {@code MAX_RADIX} or if the
   6570      * value of {@code ch} is not a valid digit in the specified
   6571      * radix, {@code -1} is returned. A character is a valid digit
   6572      * if at least one of the following is true:
   6573      * <ul>
   6574      * <li>The method {@code isDigit} is {@code true} of the character
   6575      *     and the Unicode decimal digit value of the character (or its
   6576      *     single-character decomposition) is less than the specified radix.
   6577      *     In this case the decimal digit value is returned.
   6578      * <li>The character is one of the uppercase Latin letters
   6579      *     {@code 'A'} through {@code 'Z'} and its code is less than
   6580      *     {@code radix + 'A' - 10}.
   6581      *     In this case, {@code ch - 'A' + 10}
   6582      *     is returned.
   6583      * <li>The character is one of the lowercase Latin letters
   6584      *     {@code 'a'} through {@code 'z'} and its code is less than
   6585      *     {@code radix + 'a' - 10}.
   6586      *     In this case, {@code ch - 'a' + 10}
   6587      *     is returned.
   6588      * <li>The character is one of the fullwidth uppercase Latin letters A
   6589      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
   6590      *     and its code is less than
   6591      *     {@code radix + '\u005CuFF21' - 10}.
   6592      *     In this case, {@code ch - '\u005CuFF21' + 10}
   6593      *     is returned.
   6594      * <li>The character is one of the fullwidth lowercase Latin letters a
   6595      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
   6596      *     and its code is less than
   6597      *     {@code radix + '\u005CuFF41' - 10}.
   6598      *     In this case, {@code ch - '\u005CuFF41' + 10}
   6599      *     is returned.
   6600      * </ul>
   6601      *
   6602      * <p><b>Note:</b> This method cannot handle <a
   6603      * href="#supplementary"> supplementary characters</a>. To support
   6604      * all Unicode characters, including supplementary characters, use
   6605      * the {@link #digit(int, int)} method.
   6606      *
   6607      * @param   ch      the character to be converted.
   6608      * @param   radix   the radix.
   6609      * @return  the numeric value represented by the character in the
   6610      *          specified radix.
   6611      * @see     Character#forDigit(int, int)
   6612      * @see     Character#isDigit(char)
   6613      */
   6614     public static int digit(char ch, int radix) {
   6615         return digit((int)ch, radix);
   6616     }
   6617 
   6618     /**
   6619      * Returns the numeric value of the specified character (Unicode
   6620      * code point) in the specified radix.
   6621      *
   6622      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
   6623      * {@code radix} &le; {@code MAX_RADIX} or if the
   6624      * character is not a valid digit in the specified
   6625      * radix, {@code -1} is returned. A character is a valid digit
   6626      * if at least one of the following is true:
   6627      * <ul>
   6628      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
   6629      *     and the Unicode decimal digit value of the character (or its
   6630      *     single-character decomposition) is less than the specified radix.
   6631      *     In this case the decimal digit value is returned.
   6632      * <li>The character is one of the uppercase Latin letters
   6633      *     {@code 'A'} through {@code 'Z'} and its code is less than
   6634      *     {@code radix + 'A' - 10}.
   6635      *     In this case, {@code codePoint - 'A' + 10}
   6636      *     is returned.
   6637      * <li>The character is one of the lowercase Latin letters
   6638      *     {@code 'a'} through {@code 'z'} and its code is less than
   6639      *     {@code radix + 'a' - 10}.
   6640      *     In this case, {@code codePoint - 'a' + 10}
   6641      *     is returned.
   6642      * <li>The character is one of the fullwidth uppercase Latin letters A
   6643      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
   6644      *     and its code is less than
   6645      *     {@code radix + '\u005CuFF21' - 10}.
   6646      *     In this case,
   6647      *     {@code codePoint - '\u005CuFF21' + 10}
   6648      *     is returned.
   6649      * <li>The character is one of the fullwidth lowercase Latin letters a
   6650      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
   6651      *     and its code is less than
   6652      *     {@code radix + '\u005CuFF41'- 10}.
   6653      *     In this case,
   6654      *     {@code codePoint - '\u005CuFF41' + 10}
   6655      *     is returned.
   6656      * </ul>
   6657      *
   6658      * @param   codePoint the character (Unicode code point) to be converted.
   6659      * @param   radix   the radix.
   6660      * @return  the numeric value represented by the character in the
   6661      *          specified radix.
   6662      * @see     Character#forDigit(int, int)
   6663      * @see     Character#isDigit(int)
   6664      * @since   1.5
   6665      */
   6666     public static int digit(int codePoint, int radix) {
   6667         if (radix < MIN_RADIX || radix > MAX_RADIX) {
   6668             return -1;
   6669         }
   6670         if (codePoint < 128) {
   6671             // Optimized for ASCII
   6672             int result = -1;
   6673             if ('0' <= codePoint && codePoint <= '9') {
   6674                 result = codePoint - '0';
   6675             } else if ('a' <= codePoint && codePoint <= 'z') {
   6676                 result = 10 + (codePoint - 'a');
   6677             } else if ('A' <= codePoint && codePoint <= 'Z') {
   6678                 result = 10 + (codePoint - 'A');
   6679             }
   6680             return result < radix ? result : -1;
   6681         }
   6682         return digitImpl(codePoint, radix);
   6683     }
   6684 
   6685     @FastNative
   6686     native static int digitImpl(int codePoint, int radix);
   6687 
   6688     /**
   6689      * Returns the {@code int} value that the specified Unicode
   6690      * character represents. For example, the character
   6691      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
   6692      * an int with a value of 50.
   6693      * <p>
   6694      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
   6695      * {@code '\u005Cu005A'}), lowercase
   6696      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
   6697      * full width variant ({@code '\u005CuFF21'} through
   6698      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
   6699      * {@code '\u005CuFF5A'}) forms have numeric values from 10
   6700      * through 35. This is independent of the Unicode specification,
   6701      * which does not assign numeric values to these {@code char}
   6702      * values.
   6703      * <p>
   6704      * If the character does not have a numeric value, then -1 is returned.
   6705      * If the character has a numeric value that cannot be represented as a
   6706      * nonnegative integer (for example, a fractional value), then -2
   6707      * is returned.
   6708      *
   6709      * <p><b>Note:</b> This method cannot handle <a
   6710      * href="#supplementary"> supplementary characters</a>. To support
   6711      * all Unicode characters, including supplementary characters, use
   6712      * the {@link #getNumericValue(int)} method.
   6713      *
   6714      * @param   ch      the character to be converted.
   6715      * @return  the numeric value of the character, as a nonnegative {@code int}
   6716      *           value; -2 if the character has a numeric value that is not a
   6717      *          nonnegative integer; -1 if the character has no numeric value.
   6718      * @see     Character#forDigit(int, int)
   6719      * @see     Character#isDigit(char)
   6720      * @since   1.1
   6721      */
   6722     public static int getNumericValue(char ch) {
   6723         return getNumericValue((int)ch);
   6724     }
   6725 
   6726     /**
   6727      * Returns the {@code int} value that the specified
   6728      * character (Unicode code point) represents. For example, the character
   6729      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
   6730      * an {@code int} with a value of 50.
   6731      * <p>
   6732      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
   6733      * {@code '\u005Cu005A'}), lowercase
   6734      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
   6735      * full width variant ({@code '\u005CuFF21'} through
   6736      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
   6737      * {@code '\u005CuFF5A'}) forms have numeric values from 10
   6738      * through 35. This is independent of the Unicode specification,
   6739      * which does not assign numeric values to these {@code char}
   6740      * values.
   6741      * <p>
   6742      * If the character does not have a numeric value, then -1 is returned.
   6743      * If the character has a numeric value that cannot be represented as a
   6744      * nonnegative integer (for example, a fractional value), then -2
   6745      * is returned.
   6746      *
   6747      * @param   codePoint the character (Unicode code point) to be converted.
   6748      * @return  the numeric value of the character, as a nonnegative {@code int}
   6749      *          value; -2 if the character has a numeric value that is not a
   6750      *          nonnegative integer; -1 if the character has no numeric value.
   6751      * @see     Character#forDigit(int, int)
   6752      * @see     Character#isDigit(int)
   6753      * @since   1.5
   6754      */
   6755     public static int getNumericValue(int codePoint) {
   6756         // This is both an optimization and papers over differences between Java and ICU.
   6757         if (codePoint < 128) {
   6758             if (codePoint >= '0' && codePoint <= '9') {
   6759                 return codePoint - '0';
   6760             }
   6761             if (codePoint >= 'a' && codePoint <= 'z') {
   6762                 return codePoint - ('a' - 10);
   6763             }
   6764             if (codePoint >= 'A' && codePoint <= 'Z') {
   6765                 return codePoint - ('A' - 10);
   6766             }
   6767             return -1;
   6768         }
   6769         // Full-width uppercase A-Z.
   6770         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
   6771             return codePoint - 0xff17;
   6772         }
   6773         // Full-width lowercase a-z.
   6774         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
   6775             return codePoint - 0xff37;
   6776         }
   6777         return getNumericValueImpl(codePoint);
   6778     }
   6779 
   6780     @FastNative
   6781     native static int getNumericValueImpl(int codePoint);
   6782 
   6783     /**
   6784      * Determines if the specified character is ISO-LATIN-1 white space.
   6785      * This method returns {@code true} for the following five
   6786      * characters only:
   6787      * <table summary="truechars">
   6788      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
   6789      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
   6790      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
   6791      *     <td>{@code NEW LINE}</td></tr>
   6792      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
   6793      *     <td>{@code FORM FEED}</td></tr>
   6794      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
   6795      *     <td>{@code CARRIAGE RETURN}</td></tr>
   6796      * <tr><td>{@code ' '}</td>             <td>{@code U+0020}</td>
   6797      *     <td>{@code SPACE}</td></tr>
   6798      * </table>
   6799      *
   6800      * @param      ch   the character to be tested.
   6801      * @return     {@code true} if the character is ISO-LATIN-1 white
   6802      *             space; {@code false} otherwise.
   6803      * @see        Character#isSpaceChar(char)
   6804      * @see        Character#isWhitespace(char)
   6805      * @deprecated Replaced by isWhitespace(char).
   6806      */
   6807     @Deprecated
   6808     public static boolean isSpace(char ch) {
   6809         return (ch <= 0x0020) &&
   6810             (((((1L << 0x0009) |
   6811             (1L << 0x000A) |
   6812             (1L << 0x000C) |
   6813             (1L << 0x000D) |
   6814             (1L << 0x0020)) >> ch) & 1L) != 0);
   6815     }
   6816 
   6817 
   6818     /**
   6819      * Determines if the specified character is a Unicode space character.
   6820      * A character is considered to be a space character if and only if
   6821      * it is specified to be a space character by the Unicode Standard. This
   6822      * method returns true if the character's general category type is any of
   6823      * the following:
   6824      * <ul>
   6825      * <li> {@code SPACE_SEPARATOR}
   6826      * <li> {@code LINE_SEPARATOR}
   6827      * <li> {@code PARAGRAPH_SEPARATOR}
   6828      * </ul>
   6829      *
   6830      * <p><b>Note:</b> This method cannot handle <a
   6831      * href="#supplementary"> supplementary characters</a>. To support
   6832      * all Unicode characters, including supplementary characters, use
   6833      * the {@link #isSpaceChar(int)} method.
   6834      *
   6835      * @param   ch      the character to be tested.
   6836      * @return  {@code true} if the character is a space character;
   6837      *          {@code false} otherwise.
   6838      * @see     Character#isWhitespace(char)
   6839      * @since   1.1
   6840      */
   6841     public static boolean isSpaceChar(char ch) {
   6842         return isSpaceChar((int)ch);
   6843     }
   6844 
   6845     /**
   6846      * Determines if the specified character (Unicode code point) is a
   6847      * Unicode space character.  A character is considered to be a
   6848      * space character if and only if it is specified to be a space
   6849      * character by the Unicode Standard. This method returns true if
   6850      * the character's general category type is any of the following:
   6851      *
   6852      * <ul>
   6853      * <li> {@link #SPACE_SEPARATOR}
   6854      * <li> {@link #LINE_SEPARATOR}
   6855      * <li> {@link #PARAGRAPH_SEPARATOR}
   6856      * </ul>
   6857      *
   6858      * @param   codePoint the character (Unicode code point) to be tested.
   6859      * @return  {@code true} if the character is a space character;
   6860      *          {@code false} otherwise.
   6861      * @see     Character#isWhitespace(int)
   6862      * @since   1.5
   6863      */
   6864     public static boolean isSpaceChar(int codePoint) {
   6865         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
   6866         // SPACE or NO-BREAK SPACE?
   6867         if (codePoint == 0x20 || codePoint == 0xa0) {
   6868             return true;
   6869         }
   6870         if (codePoint < 0x1000) {
   6871             return false;
   6872         }
   6873         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
   6874         if (codePoint == 0x1680 || codePoint == 0x180e) {
   6875             return true;
   6876         }
   6877         if (codePoint < 0x2000) {
   6878             return false;
   6879         }
   6880         if (codePoint <= 0xffff) {
   6881             // Other whitespace from General Punctuation...
   6882             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
   6883                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
   6884         }
   6885         // Let icu4c worry about non-BMP code points.
   6886         return isSpaceCharImpl(codePoint);
   6887     }
   6888 
   6889     @FastNative
   6890     static native boolean isSpaceCharImpl(int codePoint);
   6891 
   6892     /**
   6893      * Determines if the specified character is white space according to Java.
   6894      * A character is a Java whitespace character if and only if it satisfies
   6895      * one of the following criteria:
   6896      * <ul>
   6897      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
   6898      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
   6899      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
   6900      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
   6901      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
   6902      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
   6903      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
   6904      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
   6905      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
   6906      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
   6907      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
   6908      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
   6909      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
   6910      * </ul>
   6911      *
   6912      * <p><b>Note:</b> This method cannot handle <a
   6913      * href="#supplementary"> supplementary characters</a>. To support
   6914      * all Unicode characters, including supplementary characters, use
   6915      * the {@link #isWhitespace(int)} method.
   6916      *
   6917      * @param   ch the character to be tested.
   6918      * @return  {@code true} if the character is a Java whitespace
   6919      *          character; {@code false} otherwise.
   6920      * @see     Character#isSpaceChar(char)
   6921      * @since   1.1
   6922      */
   6923     public static boolean isWhitespace(char ch) {
   6924         return isWhitespace((int)ch);
   6925     }
   6926 
   6927     /**
   6928      * Determines if the specified character (Unicode code point) is
   6929      * white space according to Java.  A character is a Java
   6930      * whitespace character if and only if it satisfies one of the
   6931      * following criteria:
   6932      * <ul>
   6933      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
   6934      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
   6935      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
   6936      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
   6937      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
   6938      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
   6939      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
   6940      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
   6941      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
   6942      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
   6943      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
   6944      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
   6945      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
   6946      * </ul>
   6947      * <p>
   6948      *
   6949      * @param   codePoint the character (Unicode code point) to be tested.
   6950      * @return  {@code true} if the character is a Java whitespace
   6951      *          character; {@code false} otherwise.
   6952      * @see     Character#isSpaceChar(int)
   6953      * @since   1.5
   6954      */
   6955     public static boolean isWhitespace(int codePoint) {
   6956         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
   6957         // Any ASCII whitespace character?
   6958         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
   6959             return true;
   6960         }
   6961         if (codePoint < 0x1000) {
   6962             return false;
   6963         }
   6964         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
   6965         if (codePoint == 0x1680 || codePoint == 0x180e) {
   6966             return true;
   6967         }
   6968         if (codePoint < 0x2000) {
   6969             return false;
   6970         }
   6971         // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
   6972         if (codePoint == 0x2007 || codePoint == 0x202f) {
   6973             return false;
   6974         }
   6975         if (codePoint <= 0xffff) {
   6976             // Other whitespace from General Punctuation...
   6977             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
   6978                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
   6979         }
   6980         // Let icu4c worry about non-BMP code points.
   6981         return isWhitespaceImpl(codePoint);
   6982     }
   6983 
   6984     @FastNative
   6985     native static boolean isWhitespaceImpl(int codePoint);
   6986 
   6987     /**
   6988      * Determines if the specified character is an ISO control
   6989      * character.  A character is considered to be an ISO control
   6990      * character if its code is in the range {@code '\u005Cu0000'}
   6991      * through {@code '\u005Cu001F'} or in the range
   6992      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
   6993      *
   6994      * <p><b>Note:</b> This method cannot handle <a
   6995      * href="#supplementary"> supplementary characters</a>. To support
   6996      * all Unicode characters, including supplementary characters, use
   6997      * the {@link #isISOControl(int)} method.
   6998      *
   6999      * @param   ch      the character to be tested.
   7000      * @return  {@code true} if the character is an ISO control character;
   7001      *          {@code false} otherwise.
   7002      *
   7003      * @see     Character#isSpaceChar(char)
   7004      * @see     Character#isWhitespace(char)
   7005      * @since   1.1
   7006      */
   7007     public static boolean isISOControl(char ch) {
   7008         return isISOControl((int)ch);
   7009     }
   7010 
   7011     /**
   7012      * Determines if the referenced character (Unicode code point) is an ISO control
   7013      * character.  A character is considered to be an ISO control
   7014      * character if its code is in the range {@code '\u005Cu0000'}
   7015      * through {@code '\u005Cu001F'} or in the range
   7016      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
   7017      *
   7018      * @param   codePoint the character (Unicode code point) to be tested.
   7019      * @return  {@code true} if the character is an ISO control character;
   7020      *          {@code false} otherwise.
   7021      * @see     Character#isSpaceChar(int)
   7022      * @see     Character#isWhitespace(int)
   7023      * @since   1.5
   7024      */
   7025     public static boolean isISOControl(int codePoint) {
   7026         // Optimized form of:
   7027         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
   7028         //     (codePoint >= 0x7F && codePoint <= 0x9F);
   7029         return codePoint <= 0x9F &&
   7030             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
   7031     }
   7032 
   7033     /**
   7034      * Returns a value indicating a character's general category.
   7035      *
   7036      * <p><b>Note:</b> This method cannot handle <a
   7037      * href="#supplementary"> supplementary characters</a>. To support
   7038      * all Unicode characters, including supplementary characters, use
   7039      * the {@link #getType(int)} method.
   7040      *
   7041      * @param   ch      the character to be tested.
   7042      * @return  a value of type {@code int} representing the
   7043      *          character's general category.
   7044      * @see     Character#COMBINING_SPACING_MARK
   7045      * @see     Character#CONNECTOR_PUNCTUATION
   7046      * @see     Character#CONTROL
   7047      * @see     Character#CURRENCY_SYMBOL
   7048      * @see     Character#DASH_PUNCTUATION
   7049      * @see     Character#DECIMAL_DIGIT_NUMBER
   7050      * @see     Character#ENCLOSING_MARK
   7051      * @see     Character#END_PUNCTUATION
   7052      * @see     Character#FINAL_QUOTE_PUNCTUATION
   7053      * @see     Character#FORMAT
   7054      * @see     Character#INITIAL_QUOTE_PUNCTUATION
   7055      * @see     Character#LETTER_NUMBER
   7056      * @see     Character#LINE_SEPARATOR
   7057      * @see     Character#LOWERCASE_LETTER
   7058      * @see     Character#MATH_SYMBOL
   7059      * @see     Character#MODIFIER_LETTER
   7060      * @see     Character#MODIFIER_SYMBOL
   7061      * @see     Character#NON_SPACING_MARK
   7062      * @see     Character#OTHER_LETTER
   7063      * @see     Character#OTHER_NUMBER
   7064      * @see     Character#OTHER_PUNCTUATION
   7065      * @see     Character#OTHER_SYMBOL
   7066      * @see     Character#PARAGRAPH_SEPARATOR
   7067      * @see     Character#PRIVATE_USE
   7068      * @see     Character#SPACE_SEPARATOR
   7069      * @see     Character#START_PUNCTUATION
   7070      * @see     Character#SURROGATE
   7071      * @see     Character#TITLECASE_LETTER
   7072      * @see     Character#UNASSIGNED
   7073      * @see     Character#UPPERCASE_LETTER
   7074      * @since   1.1
   7075      */
   7076     public static int getType(char ch) {
   7077         return getType((int)ch);
   7078     }
   7079 
   7080     /**
   7081      * Returns a value indicating a character's general category.
   7082      *
   7083      * @param   codePoint the character (Unicode code point) to be tested.
   7084      * @return  a value of type {@code int} representing the
   7085      *          character's general category.
   7086      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
   7087      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
   7088      * @see     Character#CONTROL CONTROL
   7089      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
   7090      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
   7091      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
   7092      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
   7093      * @see     Character#END_PUNCTUATION END_PUNCTUATION
   7094      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
   7095      * @see     Character#FORMAT FORMAT
   7096      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
   7097      * @see     Character#LETTER_NUMBER LETTER_NUMBER
   7098      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
   7099      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
   7100      * @see     Character#MATH_SYMBOL MATH_SYMBOL
   7101      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
   7102      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
   7103      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
   7104      * @see     Character#OTHER_LETTER OTHER_LETTER
   7105      * @see     Character#OTHER_NUMBER OTHER_NUMBER
   7106      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
   7107      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
   7108      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
   7109      * @see     Character#PRIVATE_USE PRIVATE_USE
   7110      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
   7111      * @see     Character#START_PUNCTUATION START_PUNCTUATION
   7112      * @see     Character#SURROGATE SURROGATE
   7113      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
   7114      * @see     Character#UNASSIGNED UNASSIGNED
   7115      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
   7116      * @since   1.5
   7117      */
   7118     public static int getType(int codePoint) {
   7119         int type = getTypeImpl(codePoint);
   7120         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
   7121         if (type <= Character.FORMAT) {
   7122             return type;
   7123         }
   7124         return (type + 1);
   7125     }
   7126 
   7127     @FastNative
   7128     static native int getTypeImpl(int codePoint);
   7129 
   7130     /**
   7131      * Determines the character representation for a specific digit in
   7132      * the specified radix. If the value of {@code radix} is not a
   7133      * valid radix, or the value of {@code digit} is not a valid
   7134      * digit in the specified radix, the null character
   7135      * ({@code '\u005Cu0000'}) is returned.
   7136      * <p>
   7137      * The {@code radix} argument is valid if it is greater than or
   7138      * equal to {@code MIN_RADIX} and less than or equal to
   7139      * {@code MAX_RADIX}. The {@code digit} argument is valid if
   7140      * {@code 0 <= digit < radix}.
   7141      * <p>
   7142      * If the digit is less than 10, then
   7143      * {@code '0' + digit} is returned. Otherwise, the value
   7144      * {@code 'a' + digit - 10} is returned.
   7145      *
   7146      * @param   digit   the number to convert to a character.
   7147      * @param   radix   the radix.
   7148      * @return  the {@code char} representation of the specified digit
   7149      *          in the specified radix.
   7150      * @see     Character#MIN_RADIX
   7151      * @see     Character#MAX_RADIX
   7152      * @see     Character#digit(char, int)
   7153      */
   7154     public static char forDigit(int digit, int radix) {
   7155         if ((digit >= radix) || (digit < 0)) {
   7156             return '\0';
   7157         }
   7158         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
   7159             return '\0';
   7160         }
   7161         if (digit < 10) {
   7162             return (char)('0' + digit);
   7163         }
   7164         return (char)('a' - 10 + digit);
   7165     }
   7166 
   7167     /**
   7168      * Returns the Unicode directionality property for the given
   7169      * character.  Character directionality is used to calculate the
   7170      * visual ordering of text. The directionality value of undefined
   7171      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
   7172      *
   7173      * <p><b>Note:</b> This method cannot handle <a
   7174      * href="#supplementary"> supplementary characters</a>. To support
   7175      * all Unicode characters, including supplementary characters, use
   7176      * the {@link #getDirectionality(int)} method.
   7177      *
   7178      * @param  ch {@code char} for which the directionality property
   7179      *            is requested.
   7180      * @return the directionality property of the {@code char} value.
   7181      *
   7182      * @see Character#DIRECTIONALITY_UNDEFINED
   7183      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
   7184      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
   7185      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
   7186      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
   7187      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
   7188      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
   7189      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
   7190      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
   7191      * @see Character#DIRECTIONALITY_NONSPACING_MARK
   7192      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
   7193      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
   7194      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
   7195      * @see Character#DIRECTIONALITY_WHITESPACE
   7196      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
   7197      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
   7198      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
   7199      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
   7200      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
   7201      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
   7202      * @since 1.4
   7203      */
   7204     public static byte getDirectionality(char ch) {
   7205         return getDirectionality((int)ch);
   7206     }
   7207 
   7208     /**
   7209      * Returns the Unicode directionality property for the given
   7210      * character (Unicode code point).  Character directionality is
   7211      * used to calculate the visual ordering of text. The
   7212      * directionality value of undefined character is {@link
   7213      * #DIRECTIONALITY_UNDEFINED}.
   7214      *
   7215      * @param   codePoint the character (Unicode code point) for which
   7216      *          the directionality property is requested.
   7217      * @return the directionality property of the character.
   7218      *
   7219      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
   7220      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
   7221      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
   7222      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
   7223      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
   7224      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
   7225      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
   7226      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
   7227      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
   7228      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
   7229      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
   7230      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
   7231      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
   7232      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
   7233      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
   7234      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
   7235      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
   7236      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
   7237      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
   7238      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
   7239      * @since    1.5
   7240      */
   7241     public static byte getDirectionality(int codePoint) {
   7242         if (getType(codePoint) == Character.UNASSIGNED) {
   7243             return Character.DIRECTIONALITY_UNDEFINED;
   7244         }
   7245 
   7246         byte directionality = getDirectionalityImpl(codePoint);
   7247         if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
   7248             return DIRECTIONALITY[directionality];
   7249         }
   7250         return Character.DIRECTIONALITY_UNDEFINED;
   7251     }
   7252 
   7253     @FastNative
   7254     native static byte getDirectionalityImpl(int codePoint);
   7255     /**
   7256      * Determines whether the character is mirrored according to the
   7257      * Unicode specification.  Mirrored characters should have their
   7258      * glyphs horizontally mirrored when displayed in text that is
   7259      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
   7260      * PARENTHESIS is semantically defined to be an <i>opening
   7261      * parenthesis</i>.  This will appear as a "(" in text that is
   7262      * left-to-right but as a ")" in text that is right-to-left.
   7263      *
   7264      * <p><b>Note:</b> This method cannot handle <a
   7265      * href="#supplementary"> supplementary characters</a>. To support
   7266      * all Unicode characters, including supplementary characters, use
   7267      * the {@link #isMirrored(int)} method.
   7268      *
   7269      * @param  ch {@code char} for which the mirrored property is requested
   7270      * @return {@code true} if the char is mirrored, {@code false}
   7271      *         if the {@code char} is not mirrored or is not defined.
   7272      * @since 1.4
   7273      */
   7274     public static boolean isMirrored(char ch) {
   7275         return isMirrored((int)ch);
   7276     }
   7277 
   7278     /**
   7279      * Determines whether the specified character (Unicode code point)
   7280      * is mirrored according to the Unicode specification.  Mirrored
   7281      * characters should have their glyphs horizontally mirrored when
   7282      * displayed in text that is right-to-left.  For example,
   7283      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
   7284      * defined to be an <i>opening parenthesis</i>.  This will appear
   7285      * as a "(" in text that is left-to-right but as a ")" in text
   7286      * that is right-to-left.
   7287      *
   7288      * @param   codePoint the character (Unicode code point) to be tested.
   7289      * @return  {@code true} if the character is mirrored, {@code false}
   7290      *          if the character is not mirrored or is not defined.
   7291      * @since   1.5
   7292      */
   7293     public static boolean isMirrored(int codePoint) {
   7294         return isMirroredImpl(codePoint);
   7295     }
   7296 
   7297     @FastNative
   7298     native static boolean isMirroredImpl(int codePoint);
   7299     /**
   7300      * Compares two {@code Character} objects numerically.
   7301      *
   7302      * @param   anotherCharacter   the {@code Character} to be compared.
   7303 
   7304      * @return  the value {@code 0} if the argument {@code Character}
   7305      *          is equal to this {@code Character}; a value less than
   7306      *          {@code 0} if this {@code Character} is numerically less
   7307      *          than the {@code Character} argument; and a value greater than
   7308      *          {@code 0} if this {@code Character} is numerically greater
   7309      *          than the {@code Character} argument (unsigned comparison).
   7310      *          Note that this is strictly a numerical comparison; it is not
   7311      *          locale-dependent.
   7312      * @since   1.2
   7313      */
   7314     public int compareTo(Character anotherCharacter) {
   7315         return compare(this.value, anotherCharacter.value);
   7316     }
   7317 
   7318     /**
   7319      * Compares two {@code char} values numerically.
   7320      * The value returned is identical to what would be returned by:
   7321      * <pre>
   7322      *    Character.valueOf(x).compareTo(Character.valueOf(y))
   7323      * </pre>
   7324      *
   7325      * @param  x the first {@code char} to compare
   7326      * @param  y the second {@code char} to compare
   7327      * @return the value {@code 0} if {@code x == y};
   7328      *         a value less than {@code 0} if {@code x < y}; and
   7329      *         a value greater than {@code 0} if {@code x > y}
   7330      * @since 1.7
   7331      */
   7332     public static int compare(char x, char y) {
   7333         return x - y;
   7334     }
   7335 
   7336     /**
   7337      * The number of bits used to represent a <tt>char</tt> value in unsigned
   7338      * binary form, constant {@code 16}.
   7339      *
   7340      * @since 1.5
   7341      */
   7342     public static final int SIZE = 16;
   7343 
   7344     /**
   7345      * The number of bytes used to represent a {@code char} value in unsigned
   7346      * binary form.
   7347      *
   7348      * @since 1.8
   7349      */
   7350     public static final int BYTES = SIZE / Byte.SIZE;
   7351 
   7352     /**
   7353      * Returns the value obtained by reversing the order of the bytes in the
   7354      * specified <tt>char</tt> value.
   7355      *
   7356      * @param ch The {@code char} of which to reverse the byte order.
   7357      * @return the value obtained by reversing (or, equivalently, swapping)
   7358      *     the bytes in the specified <tt>char</tt> value.
   7359      * @since 1.5
   7360      */
   7361     public static char reverseBytes(char ch) {
   7362         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
   7363     }
   7364 
   7365     /**
   7366      * Returns the Unicode name of the specified character
   7367      * {@code codePoint}, or null if the code point is
   7368      * {@link #UNASSIGNED unassigned}.
   7369      * <p>
   7370      * Note: if the specified character is not assigned a name by
   7371      * the <i>UnicodeData</i> file (part of the Unicode Character
   7372      * Database maintained by the Unicode Consortium), the returned
   7373      * name is the same as the result of expression.
   7374      *
   7375      * <blockquote>{@code
   7376      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
   7377      *     + " "
   7378      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
   7379      *
   7380      * }</blockquote>
   7381      *
   7382      * @param  codePoint the character (Unicode code point)
   7383      *
   7384      * @return the Unicode name of the specified character, or null if
   7385      *         the code point is unassigned.
   7386      *
   7387      * @exception IllegalArgumentException if the specified
   7388      *            {@code codePoint} is not a valid Unicode
   7389      *            code point.
   7390      *
   7391      * @since 1.7
   7392      */
   7393     public static String getName(int codePoint) {
   7394         if (!isValidCodePoint(codePoint)) {
   7395             throw new IllegalArgumentException();
   7396         }
   7397         String name = getNameImpl(codePoint);
   7398         if (name != null)
   7399             return name;
   7400         if (getType(codePoint) == UNASSIGNED)
   7401             return null;
   7402         UnicodeBlock block = UnicodeBlock.of(codePoint);
   7403         if (block != null)
   7404             return block.toString().replace('_', ' ') + " "
   7405                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
   7406         // should never come here
   7407         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
   7408     }
   7409 
   7410     private static native String getNameImpl(int codePoint);
   7411 }
   7412