Home | History | Annotate | Download | only in lang
      1 /*
      2  *  Licensed to the Apache Software Foundation (ASF) under one or more
      3  *  contributor license agreements.  See the NOTICE file distributed with
      4  *  this work for additional information regarding copyright ownership.
      5  *  The ASF licenses this file to You under the Apache License, Version 2.0
      6  *  (the "License"); you may not use this file except in compliance with
      7  *  the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  *  Unless required by applicable law or agreed to in writing, software
     12  *  distributed under the License is distributed on an "AS IS" BASIS,
     13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  *  See the License for the specific language governing permissions and
     15  *  limitations under the License.
     16  */
     17 
     18 package java.lang;
     19 
     20 import java.io.Serializable;
     21 
     22 /**
     23  * The wrapper for the primitive type {@code char}. This class also provides a
     24  * number of utility methods for working with characters.
     25  *
     26  * <p>Character data is kept up to date as Unicode evolves.
     27  * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of
     28  * the {@code Locale} documentation for details of the Unicode versions implemented by current
     29  * and historical Android releases.
     30  *
     31  * <p>The Unicode specification, character tables, and other information are available at
     32  * <a href="http://www.unicode.org/">http://www.unicode.org/</a>.
     33  *
     34  * <p>Unicode characters are referred to as <i>code points</i>. The range of valid
     35  * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
     36  * is the code point range U+0000 to U+FFFF. Characters above the BMP are
     37  * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
     38  * encoding and {@code char} pairs are used to represent code points in the
     39  * supplementary range. A pair of {@code char} values that represent a
     40  * supplementary character are made up of a <i>high surrogate</i> with a value
     41  * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
     42  * 0xDC00 to 0xDFFF.
     43  * <p>
     44  * On the Java platform a {@code char} value represents either a single BMP code
     45  * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
     46  * is used to represent all Unicode code points.
     47  *
     48  * <a name="unicode_categories"><h3>Unicode categories</h3></a>
     49  * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
     50  * grouped semantically to provide a convenient overview. This table is also useful in
     51  * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
     52  * <span class="datatable">
     53  * <style type="text/css">
     54  * .datatable td { padding-right: 20px; }
     55  * </style>
     56  * <p><table>
     57  * <tr> <td> Cn </td> <td> Unassigned </td>  <td>{@link #UNASSIGNED}</td> </tr>
     58  * <tr> <td> Cc </td> <td> Control </td>     <td>{@link #CONTROL}</td> </tr>
     59  * <tr> <td> Cf </td> <td> Format </td>      <td>{@link #FORMAT}</td> </tr>
     60  * <tr> <td> Cf </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
     61  * <tr> <td> Cf </td> <td> Surrogate </td>   <td>{@link #SURROGATE}</td> </tr>
     62  * <tr> <td><br></td> </tr>
     63  * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
     64  * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
     65  * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
     66  * <tr> <td> Lm </td> <td> Modifier letter </td>  <td>{@link #MODIFIER_LETTER}</td> </tr>
     67  * <tr> <td> Lo </td> <td> Other letter </td>     <td>{@link #OTHER_LETTER}</td> </tr>
     68  * <tr> <td><br></td> </tr>
     69  * <tr> <td> Mn </td> <td> Non-spacing mark </td>       <td>{@link #NON_SPACING_MARK}</td> </tr>
     70  * <tr> <td> Me </td> <td> Enclosing mark </td>         <td>{@link #ENCLOSING_MARK}</td> </tr>
     71  * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
     72  * <tr> <td><br></td> </tr>
     73  * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
     74  * <tr> <td> Nl </td> <td> Letter number </td>        <td>{@link #LETTER_NUMBER}</td> </tr>
     75  * <tr> <td> No </td> <td> Other number </td>         <td>{@link #OTHER_NUMBER}</td> </tr>
     76  * <tr> <td><br></td> </tr>
     77  * <tr> <td> Pd </td> <td> Dash punctuation </td>          <td>{@link #DASH_PUNCTUATION}</td> </tr>
     78  * <tr> <td> Ps </td> <td> Start punctuation </td>         <td>{@link #START_PUNCTUATION}</td> </tr>
     79  * <tr> <td> Pe </td> <td> End punctuation </td>           <td>{@link #END_PUNCTUATION}</td> </tr>
     80  * <tr> <td> Pc </td> <td> Connector punctuation </td>     <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
     81  * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
     82  * <tr> <td> Pf </td> <td> Final quote punctuation </td>   <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
     83  * <tr> <td> Po </td> <td> Other punctuation </td>         <td>{@link #OTHER_PUNCTUATION}</td> </tr>
     84  * <tr> <td><br></td> </tr>
     85  * <tr> <td> Sm </td> <td> Math symbol </td>     <td>{@link #MATH_SYMBOL}</td> </tr>
     86  * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
     87  * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
     88  * <tr> <td> So </td> <td> Other symbol </td>    <td>{@link #OTHER_SYMBOL}</td> </tr>
     89  * <tr> <td><br></td> </tr>
     90  * <tr> <td> Zs </td> <td> Space separator </td>     <td>{@link #SPACE_SEPARATOR}</td> </tr>
     91  * <tr> <td> Zl </td> <td> Line separator </td>      <td>{@link #LINE_SEPARATOR}</td> </tr>
     92  * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
     93  * </table>
     94  * </span>
     95  *
     96  * @since 1.0
     97  */
     98 public final class Character implements Serializable, Comparable<Character> {
     99     private static final long serialVersionUID = 3786198910865385080L;
    100 
    101     private final char value;
    102 
    103     /**
    104      * The minimum {@code Character} value.
    105      */
    106     public static final char MIN_VALUE = '\u0000';
    107 
    108     /**
    109      * The maximum {@code Character} value.
    110      */
    111     public static final char MAX_VALUE = '\uffff';
    112 
    113     /**
    114      * The minimum radix used for conversions between characters and integers.
    115      */
    116     public static final int MIN_RADIX = 2;
    117 
    118     /**
    119      * The maximum radix used for conversions between characters and integers.
    120      */
    121     public static final int MAX_RADIX = 36;
    122 
    123     /**
    124      * The {@link Class} object that represents the primitive type {@code char}.
    125      */
    126     @SuppressWarnings("unchecked")
    127     public static final Class<Character> TYPE
    128             = (Class<Character>) char[].class.getComponentType();
    129 
    130     // Note: This can't be set to "char.class", since *that* is
    131     // defined to be "java.lang.Character.TYPE";
    132 
    133     /**
    134      * Unicode category constant Cn.
    135      */
    136     public static final byte UNASSIGNED = 0;
    137 
    138     /**
    139      * Unicode category constant Lu.
    140      */
    141     public static final byte UPPERCASE_LETTER = 1;
    142 
    143     /**
    144      * Unicode category constant Ll.
    145      */
    146     public static final byte LOWERCASE_LETTER = 2;
    147 
    148     /**
    149      * Unicode category constant Lt.
    150      */
    151     public static final byte TITLECASE_LETTER = 3;
    152 
    153     /**
    154      * Unicode category constant Lm.
    155      */
    156     public static final byte MODIFIER_LETTER = 4;
    157 
    158     /**
    159      * Unicode category constant Lo.
    160      */
    161     public static final byte OTHER_LETTER = 5;
    162 
    163     /**
    164      * Unicode category constant Mn.
    165      */
    166     public static final byte NON_SPACING_MARK = 6;
    167 
    168     /**
    169      * Unicode category constant Me.
    170      */
    171     public static final byte ENCLOSING_MARK = 7;
    172 
    173     /**
    174      * Unicode category constant Mc.
    175      */
    176     public static final byte COMBINING_SPACING_MARK = 8;
    177 
    178     /**
    179      * Unicode category constant Nd.
    180      */
    181     public static final byte DECIMAL_DIGIT_NUMBER = 9;
    182 
    183     /**
    184      * Unicode category constant Nl.
    185      */
    186     public static final byte LETTER_NUMBER = 10;
    187 
    188     /**
    189      * Unicode category constant No.
    190      */
    191     public static final byte OTHER_NUMBER = 11;
    192 
    193     /**
    194      * Unicode category constant Zs.
    195      */
    196     public static final byte SPACE_SEPARATOR = 12;
    197 
    198     /**
    199      * Unicode category constant Zl.
    200      */
    201     public static final byte LINE_SEPARATOR = 13;
    202 
    203     /**
    204      * Unicode category constant Zp.
    205      */
    206     public static final byte PARAGRAPH_SEPARATOR = 14;
    207 
    208     /**
    209      * Unicode category constant Cc.
    210      */
    211     public static final byte CONTROL = 15;
    212 
    213     /**
    214      * Unicode category constant Cf.
    215      */
    216     public static final byte FORMAT = 16;
    217 
    218     /**
    219      * Unicode category constant Co.
    220      */
    221     public static final byte PRIVATE_USE = 18;
    222 
    223     /**
    224      * Unicode category constant Cs.
    225      */
    226     public static final byte SURROGATE = 19;
    227 
    228     /**
    229      * Unicode category constant Pd.
    230      */
    231     public static final byte DASH_PUNCTUATION = 20;
    232 
    233     /**
    234      * Unicode category constant Ps.
    235      */
    236     public static final byte START_PUNCTUATION = 21;
    237 
    238     /**
    239      * Unicode category constant Pe.
    240      */
    241     public static final byte END_PUNCTUATION = 22;
    242 
    243     /**
    244      * Unicode category constant Pc.
    245      */
    246     public static final byte CONNECTOR_PUNCTUATION = 23;
    247 
    248     /**
    249      * Unicode category constant Po.
    250      */
    251     public static final byte OTHER_PUNCTUATION = 24;
    252 
    253     /**
    254      * Unicode category constant Sm.
    255      */
    256     public static final byte MATH_SYMBOL = 25;
    257 
    258     /**
    259      * Unicode category constant Sc.
    260      */
    261     public static final byte CURRENCY_SYMBOL = 26;
    262 
    263     /**
    264      * Unicode category constant Sk.
    265      */
    266     public static final byte MODIFIER_SYMBOL = 27;
    267 
    268     /**
    269      * Unicode category constant So.
    270      */
    271     public static final byte OTHER_SYMBOL = 28;
    272 
    273     /**
    274      * Unicode category constant Pi.
    275      *
    276      * @since 1.4
    277      */
    278     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
    279 
    280     /**
    281      * Unicode category constant Pf.
    282      *
    283      * @since 1.4
    284      */
    285     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
    286 
    287     /**
    288      * Unicode bidirectional constant.
    289      *
    290      * @since 1.4
    291      */
    292     public static final byte DIRECTIONALITY_UNDEFINED = -1;
    293 
    294     /**
    295      * Unicode bidirectional constant L.
    296      *
    297      * @since 1.4
    298      */
    299     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
    300 
    301     /**
    302      * Unicode bidirectional constant R.
    303      *
    304      * @since 1.4
    305      */
    306     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
    307 
    308     /**
    309      * Unicode bidirectional constant AL.
    310      *
    311      * @since 1.4
    312      */
    313     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
    314 
    315     /**
    316      * Unicode bidirectional constant EN.
    317      *
    318      * @since 1.4
    319      */
    320     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
    321 
    322     /**
    323      * Unicode bidirectional constant ES.
    324      *
    325      * @since 1.4
    326      */
    327     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
    328 
    329     /**
    330      * Unicode bidirectional constant ET.
    331      *
    332      * @since 1.4
    333      */
    334     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
    335 
    336     /**
    337      * Unicode bidirectional constant AN.
    338      *
    339      * @since 1.4
    340      */
    341     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
    342 
    343     /**
    344      * Unicode bidirectional constant CS.
    345      *
    346      * @since 1.4
    347      */
    348     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
    349 
    350     /**
    351      * Unicode bidirectional constant NSM.
    352      *
    353      * @since 1.4
    354      */
    355     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
    356 
    357     /**
    358      * Unicode bidirectional constant BN.
    359      *
    360      * @since 1.4
    361      */
    362     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
    363 
    364     /**
    365      * Unicode bidirectional constant B.
    366      *
    367      * @since 1.4
    368      */
    369     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
    370 
    371     /**
    372      * Unicode bidirectional constant S.
    373      *
    374      * @since 1.4
    375      */
    376     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
    377 
    378     /**
    379      * Unicode bidirectional constant WS.
    380      *
    381      * @since 1.4
    382      */
    383     public static final byte DIRECTIONALITY_WHITESPACE = 12;
    384 
    385     /**
    386      * Unicode bidirectional constant ON.
    387      *
    388      * @since 1.4
    389      */
    390     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
    391 
    392     /**
    393      * Unicode bidirectional constant LRE.
    394      *
    395      * @since 1.4
    396      */
    397     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
    398 
    399     /**
    400      * Unicode bidirectional constant LRO.
    401      *
    402      * @since 1.4
    403      */
    404     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
    405 
    406     /**
    407      * Unicode bidirectional constant RLE.
    408      *
    409      * @since 1.4
    410      */
    411     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
    412 
    413     /**
    414      * Unicode bidirectional constant RLO.
    415      *
    416      * @since 1.4
    417      */
    418     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
    419 
    420     /**
    421      * Unicode bidirectional constant PDF.
    422      *
    423      * @since 1.4
    424      */
    425     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
    426 
    427     /**
    428      * The minimum value of a high surrogate or leading surrogate unit in UTF-16
    429      * encoding, {@code '\uD800'}.
    430      *
    431      * @since 1.5
    432      */
    433     public static final char MIN_HIGH_SURROGATE = '\uD800';
    434 
    435     /**
    436      * The maximum value of a high surrogate or leading surrogate unit in UTF-16
    437      * encoding, {@code '\uDBFF'}.
    438      *
    439      * @since 1.5
    440      */
    441     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
    442 
    443     /**
    444      * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
    445      * encoding, {@code '\uDC00'}.
    446      *
    447      * @since 1.5
    448      */
    449     public static final char MIN_LOW_SURROGATE = '\uDC00';
    450 
    451     /**
    452      * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
    453      * encoding, {@code '\uDFFF'}.
    454      *
    455      * @since 1.5
    456      */
    457     public static final char MAX_LOW_SURROGATE = '\uDFFF';
    458 
    459     /**
    460      * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
    461      *
    462      * @since 1.5
    463      */
    464     public static final char MIN_SURROGATE = '\uD800';
    465 
    466     /**
    467      * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
    468      *
    469      * @since 1.5
    470      */
    471     public static final char MAX_SURROGATE = '\uDFFF';
    472 
    473     /**
    474      * The minimum value of a supplementary code point, {@code U+010000}.
    475      *
    476      * @since 1.5
    477      */
    478     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
    479 
    480     /**
    481      * The minimum code point value, {@code U+0000}.
    482      *
    483      * @since 1.5
    484      */
    485     public static final int MIN_CODE_POINT = 0x000000;
    486 
    487     /**
    488      * The maximum code point value, {@code U+10FFFF}.
    489      *
    490      * @since 1.5
    491      */
    492     public static final int MAX_CODE_POINT = 0x10FFFF;
    493 
    494     /**
    495      * The number of bits required to represent a {@code Character} value
    496      * unsigned form.
    497      *
    498      * @since 1.5
    499      */
    500     public static final int SIZE = 16;
    501 
    502     // BEGIN android-removed
    503     // Unicode 3.0.1 (same as Unicode 3.0.0)
    504     // private static final String bidiKeys = ...
    505 
    506     // private static final char[] bidiValues = ...
    507 
    508     // private static final char[] mirrored = ...
    509 
    510     // Unicode 3.0.1 (same as Unicode 3.0.0)
    511     // private static final String typeKeys = ...
    512 
    513     // private static final char[] typeValues = ...
    514 
    515     // private static final int[] typeValuesCache = ...
    516 
    517     // Unicode 3.0.1 (same as Unicode 3.0.0)
    518     // private static final String uppercaseKeys = ...
    519 
    520     // private static final char[] uppercaseValues = ...
    521 
    522     // private static final int[] uppercaseValuesCache = ...
    523 
    524     // private static final String lowercaseKeys = ...
    525 
    526     // private static final char[] lowercaseValues = ...
    527 
    528     // private static final int[] lowercaseValuesCache = ...
    529 
    530     // private static final String digitKeys = ...
    531 
    532     // private static final char[] digitValues = ...
    533 
    534     // private static final char[] typeTags = ...
    535     // END android-removed
    536 
    537     // BEGIN android-note
    538     // put this in a helper class so that it's only initialized on demand?
    539     // END android-note
    540     private static final byte[] DIRECTIONALITY = new byte[] {
    541             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
    542             DIRECTIONALITY_EUROPEAN_NUMBER,
    543             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
    544             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
    545             DIRECTIONALITY_ARABIC_NUMBER,
    546             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
    547             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
    548             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
    549             DIRECTIONALITY_OTHER_NEUTRALS,
    550             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
    551             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
    552             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
    553             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
    554             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
    555             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
    556             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
    557 
    558     // BEGIN android-removed
    559     // private static final int ISJAVASTART = 1;
    560 
    561     // private static final int ISJAVAPART = 2;
    562 
    563     // Unicode 3.0.1 (same as Unicode 3.0.0)
    564     // private static final String titlecaseKeys = ...
    565 
    566     // private static final char[] titlecaseValues = ...
    567 
    568     // Unicode 3.0.0 (NOT the same as Unicode 3.0.1)
    569     // private static final String numericKeys = ...
    570 
    571     // private static final char[] numericValues = ...
    572     // END android-removed
    573 
    574     /*
    575      * Represents a subset of the Unicode character set.
    576      */
    577     public static class Subset {
    578         String name;
    579 
    580         /**
    581          * Constructs a new {@code Subset}.
    582          *
    583          * @param string
    584          *            this subset's name.
    585          */
    586         protected Subset(String string) {
    587             if (string == null) {
    588                 throw new NullPointerException();
    589             }
    590             name = string;
    591         }
    592 
    593         /**
    594          * Compares this character subset with the specified object. Uses
    595          * {@link java.lang.Object#equals(Object)} to do the comparison.
    596          *
    597          * @param object
    598          *            the object to compare this character subset with.
    599          * @return {@code true} if {@code object} is this subset, that is, if
    600          *         {@code object == this}; {@code false} otherwise.
    601          */
    602         @Override
    603         public final boolean equals(Object object) {
    604             return super.equals(object);
    605         }
    606 
    607         /**
    608          * Returns the integer hash code for this character subset.
    609          *
    610          * @return this subset's hash code, which is the hash code computed by
    611          *         {@link java.lang.Object#hashCode()}.
    612          */
    613         @Override
    614         public final int hashCode() {
    615             return super.hashCode();
    616         }
    617 
    618         /**
    619          * Returns the string representation of this subset.
    620          *
    621          * @return this subset's name.
    622          */
    623         @Override
    624         public final String toString() {
    625             return name;
    626         }
    627     }
    628 
    629     /**
    630      * Represents a block of Unicode characters, as defined by the Unicode 4.0.1
    631      * specification.
    632      *
    633      * @since 1.2
    634      */
    635     public static final class UnicodeBlock extends Subset {
    636         /**
    637          * The &quot;Surrogates Area&quot; Unicode Block.
    638          *
    639          * @deprecated As of Java 5, this block has been replaced by
    640          *             {@link #HIGH_SURROGATES},
    641          *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
    642          *             {@link #LOW_SURROGATES}.
    643          */
    644         @Deprecated
    645         public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0);
    646         /**
    647          * The &quot;Basic Latin&quot; Unicode Block.
    648          *
    649          * @since 1.2
    650          */
    651         public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f);
    652         /**
    653          * The &quot;Latin-1 Supplement&quot; Unicode Block.
    654          *
    655          * @since 1.2
    656          */
    657         public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff);
    658         /**
    659          * The &quot;Latin Extended-A&quot; Unicode Block.
    660          *
    661          * @since 1.2
    662          */
    663         public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f);
    664         /**
    665          * The &quot;Latin Extended-B&quot; Unicode Block.
    666          *
    667          * @since 1.2
    668          */
    669         public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f);
    670         /**
    671          * The &quot;IPA Extensions&quot; Unicode Block.
    672          *
    673          * @since 1.2
    674          */
    675         public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af);
    676         /**
    677          * The &quot;Spacing Modifier Letters&quot; Unicode Block.
    678          *
    679          * @since 1.2
    680          */
    681         public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff);
    682         /**
    683          * The &quot;Combining Diacritical Marks&quot; Unicode Block.
    684          *
    685          * @since 1.2
    686          */
    687         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f);
    688         /**
    689          * The &quot;Greek and Coptic&quot; Unicode Block. Previously referred
    690          * to as &quot;Greek&quot;.
    691          *
    692          * @since 1.2
    693          */
    694         public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff);
    695         /**
    696          * The &quot;Cyrillic&quot; Unicode Block.
    697          *
    698          * @since 1.2
    699          */
    700         public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff);
    701         /**
    702          * The &quot;Cyrillic Supplement&quot; Unicode Block. Previously
    703          * referred to as &quot;Cyrillic Supplementary&quot;.
    704          *
    705          * @since 1.5
    706          */
    707         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f);
    708         /**
    709          * The &quot;Armenian&quot; Unicode Block.
    710          *
    711          * @since 1.2
    712          */
    713         public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f);
    714         /**
    715          * The &quot;Hebrew&quot; Unicode Block.
    716          *
    717          * @since 1.2
    718          */
    719         public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff);
    720         /**
    721          * The &quot;Arabic&quot; Unicode Block.
    722          *
    723          * @since 1.2
    724          */
    725         public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff);
    726         /**
    727          * The &quot;Syriac&quot; Unicode Block.
    728          *
    729          * @since 1.4
    730          */
    731         public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f);
    732         /**
    733          * The &quot;Thaana&quot; Unicode Block.
    734          *
    735          * @since 1.4
    736          */
    737         public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf);
    738         /**
    739          * The &quot;Devanagari&quot; Unicode Block.
    740          *
    741          * @since 1.2
    742          */
    743         public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f);
    744         /**
    745          * The &quot;Bengali&quot; Unicode Block.
    746          *
    747          * @since 1.2
    748          */
    749         public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff);
    750         /**
    751          * The &quot;Gurmukhi&quot; Unicode Block.
    752          *
    753          * @since 1.2
    754          */
    755         public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f);
    756         /**
    757          * The &quot;Gujarati&quot; Unicode Block.
    758          *
    759          * @since 1.2
    760          */
    761         public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff);
    762         /**
    763          * The &quot;Oriya&quot; Unicode Block.
    764          *
    765          * @since 1.2
    766          */
    767         public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f);
    768         /**
    769          * The &quot;Tamil&quot; Unicode Block.
    770          *
    771          * @since 1.2
    772          */
    773         public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff);
    774         /**
    775          * The &quot;Telugu&quot; Unicode Block.
    776          *
    777          * @since 1.2
    778          */
    779         public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f);
    780         /**
    781          * The &quot;Kannada&quot; Unicode Block.
    782          *
    783          * @since 1.2
    784          */
    785         public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff);
    786         /**
    787          * The &quot;Malayalam&quot; Unicode Block.
    788          *
    789          * @since 1.2
    790          */
    791         public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f);
    792         /**
    793          * The &quot;Sinhala&quot; Unicode Block.
    794          *
    795          * @since 1.4
    796          */
    797         public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff);
    798         /**
    799          * The &quot;Thai&quot; Unicode Block.
    800          *
    801          * @since 1.2
    802          */
    803         public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f);
    804         /**
    805          * The &quot;Lao&quot; Unicode Block.
    806          *
    807          * @since 1.2
    808          */
    809         public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff);
    810         /**
    811          * The &quot;Tibetan&quot; Unicode Block.
    812          *
    813          * @since 1.2
    814          */
    815         public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff);
    816         /**
    817          * The &quot;Myanmar&quot; Unicode Block.
    818          *
    819          * @since 1.4
    820          */
    821         public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f);
    822         /**
    823          * The &quot;Georgian&quot; Unicode Block.
    824          *
    825          * @since 1.2
    826          */
    827         public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff);
    828         /**
    829          * The &quot;Hangul Jamo&quot; Unicode Block.
    830          *
    831          * @since 1.2
    832          */
    833         public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff);
    834         /**
    835          * The &quot;Ethiopic&quot; Unicode Block.
    836          *
    837          * @since 1.4
    838          */
    839         public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f);
    840         /**
    841          * The &quot;Cherokee&quot; Unicode Block.
    842          *
    843          * @since 1.4
    844          */
    845         public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff);
    846         /**
    847          * The &quot;Unified Canadian Aboriginal Syllabics&quot; Unicode Block.
    848          *
    849          * @since 1.4
    850          */
    851         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f);
    852         /**
    853          * The &quot;Ogham&quot; Unicode Block.
    854          *
    855          * @since 1.4
    856          */
    857         public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f);
    858         /**
    859          * The &quot;Runic&quot; Unicode Block.
    860          *
    861          * @since 1.4
    862          */
    863         public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff);
    864         /**
    865          * The &quot;Tagalog&quot; Unicode Block.
    866          *
    867          * @since 1.5
    868          */
    869         public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f);
    870         /**
    871          * The &quot;Hanunoo&quot; Unicode Block.
    872          *
    873          * @since 1.5
    874          */
    875         public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f);
    876         /**
    877          * The &quot;Buhid&quot; Unicode Block.
    878          *
    879          * @since 1.5
    880          */
    881         public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f);
    882         /**
    883          * The &quot;Tagbanwa&quot; Unicode Block.
    884          *
    885          * @since 1.5
    886          */
    887         public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f);
    888         /**
    889          * The &quot;Khmer&quot; Unicode Block.
    890          *
    891          * @since 1.4
    892          */
    893         public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff);
    894         /**
    895          * The &quot;Mongolian&quot; Unicode Block.
    896          *
    897          * @since 1.4
    898          */
    899         public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af);
    900         /**
    901          * The &quot;Limbu&quot; Unicode Block.
    902          *
    903          * @since 1.5
    904          */
    905         public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f);
    906         /**
    907          * The &quot;Tai Le&quot; Unicode Block.
    908          *
    909          * @since 1.5
    910          */
    911         public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f);
    912         /**
    913          * The &quot;Khmer Symbols&quot; Unicode Block.
    914          *
    915          * @since 1.5
    916          */
    917         public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff);
    918         /**
    919          * The &quot;Phonetic Extensions&quot; Unicode Block.
    920          *
    921          * @since 1.5
    922          */
    923         public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f);
    924         /**
    925          * The &quot;Latin Extended Additional&quot; Unicode Block.
    926          *
    927          * @since 1.2
    928          */
    929         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff);
    930         /**
    931          * The &quot;Greek Extended&quot; Unicode Block.
    932          *
    933          * @since 1.2
    934          */
    935         public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff);
    936         /**
    937          * The &quot;General Punctuation&quot; Unicode Block.
    938          *
    939          * @since 1.2
    940          */
    941         public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f);
    942         /**
    943          * The &quot;Superscripts and Subscripts&quot; Unicode Block.
    944          *
    945          * @since 1.2
    946          */
    947         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f);
    948         /**
    949          * The &quot;Currency Symbols&quot; Unicode Block.
    950          *
    951          * @since 1.2
    952          */
    953         public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf);
    954         /**
    955          * The &quot;Combining Diacritical Marks for Symbols&quot; Unicode
    956          * Block. Previously referred to as &quot;Combining Marks for
    957          * Symbols&quot;.
    958          *
    959          * @since 1.2
    960          */
    961         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff);
    962         /**
    963          * The &quot;Letterlike Symbols&quot; Unicode Block.
    964          *
    965          * @since 1.2
    966          */
    967         public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f);
    968         /**
    969          * The &quot;Number Forms&quot; Unicode Block.
    970          *
    971          * @since 1.2
    972          */
    973         public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f);
    974         /**
    975          * The &quot;Arrows&quot; Unicode Block.
    976          *
    977          * @since 1.2
    978          */
    979         public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff);
    980         /**
    981          * The &quot;Mathematical Operators&quot; Unicode Block.
    982          *
    983          * @since 1.2
    984          */
    985         public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff);
    986         /**
    987          * The &quot;Miscellaneous Technical&quot; Unicode Block.
    988          *
    989          * @since 1.2
    990          */
    991         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff);
    992         /**
    993          * The &quot;Control Pictures&quot; Unicode Block.
    994          *
    995          * @since 1.2
    996          */
    997         public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f);
    998         /**
    999          * The &quot;Optical Character Recognition&quot; Unicode Block.
   1000          *
   1001          * @since 1.2
   1002          */
   1003         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f);
   1004         /**
   1005          * The &quot;Enclosed Alphanumerics&quot; Unicode Block.
   1006          *
   1007          * @since 1.2
   1008          */
   1009         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff);
   1010         /**
   1011          * The &quot;Box Drawing&quot; Unicode Block.
   1012          *
   1013          * @since 1.2
   1014          */
   1015         public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f);
   1016         /**
   1017          * The &quot;Block Elements&quot; Unicode Block.
   1018          *
   1019          * @since 1.2
   1020          */
   1021         public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f);
   1022         /**
   1023          * The &quot;Geometric Shapes&quot; Unicode Block.
   1024          *
   1025          * @since 1.2
   1026          */
   1027         public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff);
   1028         /**
   1029          * The &quot;Miscellaneous Symbols&quot; Unicode Block.
   1030          *
   1031          * @since 1.2
   1032          */
   1033         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff);
   1034         /**
   1035          * The &quot;Dingbats&quot; Unicode Block.
   1036          *
   1037          * @since 1.2
   1038          */
   1039         public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf);
   1040         /**
   1041          * The &quot;Miscellaneous Mathematical Symbols-A&quot; Unicode Block.
   1042          *
   1043          * @since 1.5
   1044          */
   1045         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef);
   1046         /**
   1047          * The &quot;Supplemental Arrows-A&quot; Unicode Block.
   1048          *
   1049          * @since 1.5
   1050          */
   1051         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff);
   1052         /**
   1053          * The &quot;Braille Patterns&quot; Unicode Block.
   1054          *
   1055          * @since 1.4
   1056          */
   1057         public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff);
   1058         /**
   1059          * The &quot;Supplemental Arrows-B&quot; Unicode Block.
   1060          *
   1061          * @since 1.5
   1062          */
   1063         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f);
   1064         /**
   1065          * The &quot;Miscellaneous Mathematical Symbols-B&quot; Unicode Block.
   1066          *
   1067          * @since 1.5
   1068          */
   1069         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff);
   1070         /**
   1071          * The &quot;Supplemental Mathematical Operators&quot; Unicode Block.
   1072          *
   1073          * @since 1.5
   1074          */
   1075         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff);
   1076         /**
   1077          * The &quot;Miscellaneous Symbols and Arrows&quot; Unicode Block.
   1078          *
   1079          * @since 1.2
   1080          */
   1081         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff);
   1082         /**
   1083          * The &quot;CJK Radicals Supplement&quot; Unicode Block.
   1084          *
   1085          * @since 1.4
   1086          */
   1087         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff);
   1088         /**
   1089          * The &quot;Kangxi Radicals&quot; Unicode Block.
   1090          *
   1091          * @since 1.4
   1092          */
   1093         public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf);
   1094         /**
   1095          * The &quot;Ideographic Description Characters&quot; Unicode Block.
   1096          *
   1097          * @since 1.4
   1098          */
   1099         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff);
   1100         /**
   1101          * The &quot;CJK Symbols and Punctuation&quot; Unicode Block.
   1102          *
   1103          * @since 1.2
   1104          */
   1105         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f);
   1106         /**
   1107          * The &quot;Hiragana&quot; Unicode Block.
   1108          *
   1109          * @since 1.2
   1110          */
   1111         public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f);
   1112         /**
   1113          * The &quot;Katakana&quot; Unicode Block.
   1114          *
   1115          * @since 1.2
   1116          */
   1117         public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff);
   1118         /**
   1119          * The &quot;Bopomofo&quot; Unicode Block.
   1120          *
   1121          * @since 1.2
   1122          */
   1123         public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f);
   1124         /**
   1125          * The &quot;Hangul Compatibility Jamo&quot; Unicode Block.
   1126          *
   1127          * @since 1.2
   1128          */
   1129         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f);
   1130         /**
   1131          * The &quot;Kanbun&quot; Unicode Block.
   1132          *
   1133          * @since 1.2
   1134          */
   1135         public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f);
   1136         /**
   1137          * The &quot;Bopomofo Extended&quot; Unicode Block.
   1138          *
   1139          * @since 1.4
   1140          */
   1141         public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf);
   1142         /**
   1143          * The &quot;Katakana Phonetic Extensions&quot; Unicode Block.
   1144          *
   1145          * @since 1.5
   1146          */
   1147         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff);
   1148         /**
   1149          * The &quot;Enclosed CJK Letters and Months&quot; Unicode Block.
   1150          *
   1151          * @since 1.2
   1152          */
   1153         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff);
   1154         /**
   1155          * The &quot;CJK Compatibility&quot; Unicode Block.
   1156          *
   1157          * @since 1.2
   1158          */
   1159         public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff);
   1160         /**
   1161          * The &quot;CJK Unified Ideographs Extension A&quot; Unicode Block.
   1162          *
   1163          * @since 1.4
   1164          */
   1165         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf);
   1166         /**
   1167          * The &quot;Yijing Hexagram Symbols&quot; Unicode Block.
   1168          *
   1169          * @since 1.5
   1170          */
   1171         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff);
   1172         /**
   1173          * The &quot;CJK Unified Ideographs&quot; Unicode Block.
   1174          *
   1175          * @since 1.2
   1176          */
   1177         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff);
   1178         /**
   1179          * The &quot;Yi Syllables&quot; Unicode Block.
   1180          *
   1181          * @since 1.4
   1182          */
   1183         public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f);
   1184         /**
   1185          * The &quot;Yi Radicals&quot; Unicode Block.
   1186          *
   1187          * @since 1.4
   1188          */
   1189         public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf);
   1190         /**
   1191          * The &quot;Hangul Syllables&quot; Unicode Block.
   1192          *
   1193          * @since 1.2
   1194          */
   1195         public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af);
   1196         /**
   1197          * The &quot;High Surrogates&quot; Unicode Block. This block represents
   1198          * code point values in the high surrogate range 0xD800 to 0xDB7F
   1199          */
   1200         public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f);
   1201         /**
   1202          * The &quot;High Private Use Surrogates&quot; Unicode Block. This block
   1203          * represents code point values in the high surrogate range 0xDB80 to
   1204          * 0xDBFF
   1205          */
   1206         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff);
   1207         /**
   1208          * The &quot;Low Surrogates&quot; Unicode Block. This block represents
   1209          * code point values in the low surrogate range 0xDC00 to 0xDFFF
   1210          */
   1211         public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff);
   1212         /**
   1213          * The &quot;Private Use Area&quot; Unicode Block.
   1214          *
   1215          * @since 1.2
   1216          */
   1217         public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff);
   1218         /**
   1219          * The &quot;CJK Compatibility Ideographs&quot; Unicode Block.
   1220          *
   1221          * @since 1.2
   1222          */
   1223         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff);
   1224         /**
   1225          * The &quot;Alphabetic Presentation Forms&quot; Unicode Block.
   1226          *
   1227          * @since 1.2
   1228          */
   1229         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f);
   1230         /**
   1231          * The &quot;Arabic Presentation Forms-A&quot; Unicode Block.
   1232          *
   1233          * @since 1.2
   1234          */
   1235         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff);
   1236         /**
   1237          * The &quot;Variation Selectors&quot; Unicode Block.
   1238          *
   1239          * @since 1.5
   1240          */
   1241         public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f);
   1242         /**
   1243          * The &quot;Combining Half Marks&quot; Unicode Block.
   1244          *
   1245          * @since 1.2
   1246          */
   1247         public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f);
   1248         /**
   1249          * The &quot;CJK Compatibility Forms&quot; Unicode Block.
   1250          *
   1251          * @since 1.2
   1252          */
   1253         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f);
   1254         /**
   1255          * The &quot;Small Form Variants&quot; Unicode Block.
   1256          *
   1257          * @since 1.2
   1258          */
   1259         public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f);
   1260         /**
   1261          * The &quot;Arabic Presentation Forms-B&quot; Unicode Block.
   1262          *
   1263          * @since 1.2
   1264          */
   1265         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff);
   1266         /**
   1267          * The &quot;Halfwidth and Fullwidth Forms&quot; Unicode Block.
   1268          *
   1269          * @since 1.2
   1270          */
   1271         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef);
   1272         /**
   1273          * The &quot;Specials&quot; Unicode Block.
   1274          *
   1275          * @since 1.2
   1276          */
   1277         public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff);
   1278         /**
   1279          * The &quot;Linear B Syllabary&quot; Unicode Block.
   1280          *
   1281          * @since 1.2
   1282          */
   1283         public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f);
   1284         /**
   1285          * The &quot;Linear B Ideograms&quot; Unicode Block.
   1286          *
   1287          * @since 1.5
   1288          */
   1289         public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff);
   1290         /**
   1291          * The &quot;Aegean Numbers&quot; Unicode Block.
   1292          *
   1293          * @since 1.5
   1294          */
   1295         public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f);
   1296         /**
   1297          * The &quot;Old Italic&quot; Unicode Block.
   1298          *
   1299          * @since 1.5
   1300          */
   1301         public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f);
   1302         /**
   1303          * The &quot;Gothic&quot; Unicode Block.
   1304          *
   1305          * @since 1.5
   1306          */
   1307         public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f);
   1308         /**
   1309          * The &quot;Ugaritic&quot; Unicode Block.
   1310          *
   1311          * @since 1.5
   1312          */
   1313         public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f);
   1314         /**
   1315          * The &quot;Deseret&quot; Unicode Block.
   1316          *
   1317          * @since 1.5
   1318          */
   1319         public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f);
   1320         /**
   1321          * The &quot;Shavian&quot; Unicode Block.
   1322          *
   1323          * @since 1.5
   1324          */
   1325         public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f);
   1326         /**
   1327          * The &quot;Osmanya&quot; Unicode Block.
   1328          *
   1329          * @since 1.5
   1330          */
   1331         public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af);
   1332         /**
   1333          * The &quot;Cypriot Syllabary&quot; Unicode Block.
   1334          *
   1335          * @since 1.5
   1336          */
   1337         public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f);
   1338         /**
   1339          * The &quot;Byzantine Musical Symbols&quot; Unicode Block.
   1340          *
   1341          * @since 1.5
   1342          */
   1343         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff);
   1344         /**
   1345          * The &quot;Musical Symbols&quot; Unicode Block.
   1346          *
   1347          * @since 1.5
   1348          */
   1349         public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff);
   1350         /**
   1351          * The &quot;Tai Xuan Jing Symbols&quot; Unicode Block.
   1352          *
   1353          * @since 1.5
   1354          */
   1355         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f);
   1356         /**
   1357          * The &quot;Mathematical Alphanumeric Symbols&quot; Unicode Block.
   1358          *
   1359          * @since 1.5
   1360          */
   1361         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff);
   1362         /**
   1363          * The &quot;CJK Unified Ideographs Extension B&quot; Unicode Block.
   1364          *
   1365          * @since 1.5
   1366          */
   1367         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df);
   1368         /**
   1369          * The &quot;CJK Compatibility Ideographs Supplement&quot; Unicode Block.
   1370          *
   1371          * @since 1.5
   1372          */
   1373         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f);
   1374         /**
   1375          * The &quot;Tags&quot; Unicode Block.
   1376          *
   1377          * @since 1.5
   1378          */
   1379         public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f);
   1380         /**
   1381          * The &quot;Variation Selectors Supplement&quot; Unicode Block.
   1382          *
   1383          * @since 1.5
   1384          */
   1385         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef);
   1386         /**
   1387          * The &quot;Supplementary Private Use Area-A&quot; Unicode Block.
   1388          *
   1389          * @since 1.5
   1390          */
   1391         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff);
   1392         /**
   1393          * The &quot;Supplementary Private Use Area-B&quot; Unicode Block.
   1394          *
   1395          * @since 1.5
   1396          */
   1397         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff);
   1398 
   1399         /*
   1400          * All of the UnicodeBlocks with valid ranges in ascending order.
   1401          */
   1402         private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] {
   1403             null,
   1404             UnicodeBlock.BASIC_LATIN,
   1405             UnicodeBlock.LATIN_1_SUPPLEMENT,
   1406             UnicodeBlock.LATIN_EXTENDED_A,
   1407             UnicodeBlock.LATIN_EXTENDED_B,
   1408             UnicodeBlock.IPA_EXTENSIONS,
   1409             UnicodeBlock.SPACING_MODIFIER_LETTERS,
   1410             UnicodeBlock.COMBINING_DIACRITICAL_MARKS,
   1411             UnicodeBlock.GREEK,
   1412             UnicodeBlock.CYRILLIC,
   1413             UnicodeBlock.ARMENIAN,
   1414             UnicodeBlock.HEBREW,
   1415             UnicodeBlock.ARABIC,
   1416             UnicodeBlock.SYRIAC,
   1417             UnicodeBlock.THAANA,
   1418             UnicodeBlock.DEVANAGARI,
   1419             UnicodeBlock.BENGALI,
   1420             UnicodeBlock.GURMUKHI,
   1421             UnicodeBlock.GUJARATI,
   1422             UnicodeBlock.ORIYA,
   1423             UnicodeBlock.TAMIL,
   1424             UnicodeBlock.TELUGU,
   1425             UnicodeBlock.KANNADA,
   1426             UnicodeBlock.MALAYALAM,
   1427             UnicodeBlock.SINHALA,
   1428             UnicodeBlock.THAI,
   1429             UnicodeBlock.LAO,
   1430             UnicodeBlock.TIBETAN,
   1431             UnicodeBlock.MYANMAR,
   1432             UnicodeBlock.GEORGIAN,
   1433             UnicodeBlock.HANGUL_JAMO,
   1434             UnicodeBlock.ETHIOPIC,
   1435             UnicodeBlock.CHEROKEE,
   1436             UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
   1437             UnicodeBlock.OGHAM,
   1438             UnicodeBlock.RUNIC,
   1439             UnicodeBlock.KHMER,
   1440             UnicodeBlock.MONGOLIAN,
   1441             UnicodeBlock.LATIN_EXTENDED_ADDITIONAL,
   1442             UnicodeBlock.GREEK_EXTENDED,
   1443             UnicodeBlock.GENERAL_PUNCTUATION,
   1444             UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS,
   1445             UnicodeBlock.CURRENCY_SYMBOLS,
   1446             UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS,
   1447             UnicodeBlock.LETTERLIKE_SYMBOLS,
   1448             UnicodeBlock.NUMBER_FORMS,
   1449             UnicodeBlock.ARROWS,
   1450             UnicodeBlock.MATHEMATICAL_OPERATORS,
   1451             UnicodeBlock.MISCELLANEOUS_TECHNICAL,
   1452             UnicodeBlock.CONTROL_PICTURES,
   1453             UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION,
   1454             UnicodeBlock.ENCLOSED_ALPHANUMERICS,
   1455             UnicodeBlock.BOX_DRAWING,
   1456             UnicodeBlock.BLOCK_ELEMENTS,
   1457             UnicodeBlock.GEOMETRIC_SHAPES,
   1458             UnicodeBlock.MISCELLANEOUS_SYMBOLS,
   1459             UnicodeBlock.DINGBATS,
   1460             UnicodeBlock.BRAILLE_PATTERNS,
   1461             UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
   1462             UnicodeBlock.KANGXI_RADICALS,
   1463             UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
   1464             UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
   1465             UnicodeBlock.HIRAGANA,
   1466             UnicodeBlock.KATAKANA,
   1467             UnicodeBlock.BOPOMOFO,
   1468             UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
   1469             UnicodeBlock.KANBUN,
   1470             UnicodeBlock.BOPOMOFO_EXTENDED,
   1471             UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS,
   1472             UnicodeBlock.CJK_COMPATIBILITY,
   1473             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
   1474             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
   1475             UnicodeBlock.YI_SYLLABLES,
   1476             UnicodeBlock.YI_RADICALS,
   1477             UnicodeBlock.HANGUL_SYLLABLES,
   1478             UnicodeBlock.HIGH_SURROGATES,
   1479             UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES,
   1480             UnicodeBlock.LOW_SURROGATES,
   1481             UnicodeBlock.PRIVATE_USE_AREA,
   1482             UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
   1483             UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS,
   1484             UnicodeBlock.ARABIC_PRESENTATION_FORMS_A,
   1485             UnicodeBlock.COMBINING_HALF_MARKS,
   1486             UnicodeBlock.CJK_COMPATIBILITY_FORMS,
   1487             UnicodeBlock.SMALL_FORM_VARIANTS,
   1488             UnicodeBlock.ARABIC_PRESENTATION_FORMS_B,
   1489             UnicodeBlock.SPECIALS,
   1490             UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
   1491             UnicodeBlock.OLD_ITALIC,
   1492             UnicodeBlock.GOTHIC,
   1493             UnicodeBlock.DESERET,
   1494             UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS,
   1495             UnicodeBlock.MUSICAL_SYMBOLS,
   1496             UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
   1497             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
   1498             UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
   1499             UnicodeBlock.TAGS,
   1500             UnicodeBlock.CYRILLIC_SUPPLEMENTARY,
   1501             UnicodeBlock.TAGALOG,
   1502             UnicodeBlock.HANUNOO,
   1503             UnicodeBlock.BUHID,
   1504             UnicodeBlock.TAGBANWA,
   1505             UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
   1506             UnicodeBlock.SUPPLEMENTAL_ARROWS_A,
   1507             UnicodeBlock.SUPPLEMENTAL_ARROWS_B,
   1508             UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
   1509             UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
   1510             UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
   1511             UnicodeBlock.VARIATION_SELECTORS,
   1512             UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
   1513             UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B,
   1514             UnicodeBlock.LIMBU,
   1515             UnicodeBlock.TAI_LE,
   1516             UnicodeBlock.KHMER_SYMBOLS,
   1517             UnicodeBlock.PHONETIC_EXTENSIONS,
   1518             UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS,
   1519             UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS,
   1520             UnicodeBlock.LINEAR_B_SYLLABARY,
   1521             UnicodeBlock.LINEAR_B_IDEOGRAMS,
   1522             UnicodeBlock.AEGEAN_NUMBERS,
   1523             UnicodeBlock.UGARITIC,
   1524             UnicodeBlock.SHAVIAN,
   1525             UnicodeBlock.OSMANYA,
   1526             UnicodeBlock.CYPRIOT_SYLLABARY,
   1527             UnicodeBlock.TAI_XUAN_JING_SYMBOLS,
   1528             UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT
   1529         };
   1530 
   1531         /**
   1532          * Retrieves the constant that corresponds to the specified block name.
   1533          * The block names are defined by the Unicode 4.0.1 specification in the
   1534          * {@code Blocks-4.0.1.txt} file.
   1535          * <p>
   1536          * Block names may be one of the following:
   1537          * <ul>
   1538          * <li>Canonical block name, as defined by the Unicode specification;
   1539          * case-insensitive.</li>
   1540          * <li>Canonical block name without any spaces, as defined by the
   1541          * Unicode specification; case-insensitive.</li>
   1542          * <li>{@code UnicodeBlock} constant identifier. This is determined by
   1543          * uppercasing the canonical name and replacing all spaces and hyphens
   1544          * with underscores.</li>
   1545          * </ul>
   1546          *
   1547          * @param blockName
   1548          *            the name of the block to retrieve.
   1549          * @return the UnicodeBlock constant corresponding to {@code blockName}.
   1550          * @throws NullPointerException
   1551          *             if {@code blockName} is {@code null}.
   1552          * @throws IllegalArgumentException
   1553          *             if {@code blockName} is not a valid block name.
   1554          * @since 1.5
   1555          */
   1556         public static final UnicodeBlock forName(String blockName) {
   1557             if (blockName == null) {
   1558                 throw new NullPointerException();
   1559             }
   1560             int block = forNameImpl(blockName);
   1561             if (block == -1) {
   1562                 if(blockName.equals("SURROGATES_AREA")) {
   1563                     return SURROGATES_AREA;
   1564                 } else if(blockName.equalsIgnoreCase("greek")) {
   1565                     return GREEK;
   1566                 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") ||
   1567                         blockName.equals("Combining Marks for Symbols") ||
   1568                         blockName.equals("CombiningMarksforSymbols")) {
   1569                     return COMBINING_MARKS_FOR_SYMBOLS;
   1570                 }
   1571                 throw new IllegalArgumentException();
   1572             }
   1573             return BLOCKS[block];
   1574         }
   1575 
   1576         /**
   1577          * Gets the constant for the Unicode block that contains the specified
   1578          * character.
   1579          *
   1580          * @param c
   1581          *            the character for which to get the {@code UnicodeBlock}
   1582          *            constant.
   1583          * @return the {@code UnicodeBlock} constant for the block that contains
   1584          *         {@code c}, or {@code null} if {@code c} does not belong to
   1585          *         any defined block.
   1586          */
   1587         public static UnicodeBlock of(char c) {
   1588             return of((int) c);
   1589         }
   1590 
   1591         /**
   1592          * Gets the constant for the Unicode block that contains the specified
   1593          * Unicode code point.
   1594          *
   1595          * @param codePoint
   1596          *            the Unicode code point for which to get the
   1597          *            {@code UnicodeBlock} constant.
   1598          * @return the {@code UnicodeBlock} constant for the block that contains
   1599          *         {@code codePoint}, or {@code null} if {@code codePoint} does
   1600          *         not belong to any defined block.
   1601          * @throws IllegalArgumentException
   1602          *             if {@code codePoint} is not a valid Unicode code point.
   1603          * @since 1.5
   1604          */
   1605         public static UnicodeBlock of(int codePoint) {
   1606             if (!isValidCodePoint(codePoint)) {
   1607                 throw new IllegalArgumentException();
   1608             }
   1609             int block = ofImpl(codePoint);
   1610             if(block == -1 || block >= BLOCKS.length) {
   1611                 return null;
   1612             }
   1613             return BLOCKS[block];
   1614         }
   1615 
   1616         private UnicodeBlock(String blockName, int start, int end) {
   1617             super(blockName);
   1618         }
   1619     }
   1620 
   1621     private static native int forNameImpl(String blockName);
   1622 
   1623     private static native int ofImpl(int codePoint);
   1624 
   1625     /**
   1626      * Constructs a new {@code Character} with the specified primitive char
   1627      * value.
   1628      *
   1629      * @param value
   1630      *            the primitive char value to store in the new instance.
   1631      */
   1632     public Character(char value) {
   1633         this.value = value;
   1634     }
   1635 
   1636     /**
   1637      * Gets the primitive value of this character.
   1638      *
   1639      * @return this object's primitive value.
   1640      */
   1641     public char charValue() {
   1642         return value;
   1643     }
   1644 
   1645     /**
   1646      * Compares this object to the specified character object to determine their
   1647      * relative order.
   1648      *
   1649      * @param c
   1650      *            the character object to compare this object to.
   1651      * @return {@code 0} if the value of this character and the value of
   1652      *         {@code c} are equal; a positive value if the value of this
   1653      *         character is greater than the value of {@code c}; a negative
   1654      *         value if the value of this character is less than the value of
   1655      *         {@code c}.
   1656      * @see java.lang.Comparable
   1657      * @since 1.2
   1658      */
   1659     public int compareTo(Character c) {
   1660         return value - c.value;
   1661     }
   1662 
   1663     /**
   1664      * Returns a {@code Character} instance for the {@code char} value passed.
   1665      * <p>
   1666      * If it is not necessary to get a new {@code Character} instance, it is
   1667      * recommended to use this method instead of the constructor, since it
   1668      * maintains a cache of instances which may result in better performance.
   1669      *
   1670      * @param c
   1671      *            the char value for which to get a {@code Character} instance.
   1672      * @return the {@code Character} instance for {@code c}.
   1673      * @since 1.5
   1674      */
   1675     public static Character valueOf(char c) {
   1676         return c < 128 ? SMALL_VALUES[c] : new Character(c);
   1677     }
   1678 
   1679     /**
   1680      * A cache of instances used by {@link #valueOf(char)} and auto-boxing
   1681      */
   1682     private static final Character[] SMALL_VALUES = new Character[128];
   1683 
   1684     static {
   1685         for(int i = 0; i < 128; i++) {
   1686             SMALL_VALUES[i] = new Character((char) i);
   1687         }
   1688     }
   1689     /**
   1690      * Indicates whether {@code codePoint} is a valid Unicode code point.
   1691      *
   1692      * @param codePoint
   1693      *            the code point to test.
   1694      * @return {@code true} if {@code codePoint} is a valid Unicode code point;
   1695      *         {@code false} otherwise.
   1696      * @since 1.5
   1697      */
   1698     public static boolean isValidCodePoint(int codePoint) {
   1699         return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
   1700     }
   1701 
   1702     /**
   1703      * Indicates whether {@code codePoint} is within the supplementary code
   1704      * point range.
   1705      *
   1706      * @param codePoint
   1707      *            the code point to test.
   1708      * @return {@code true} if {@code codePoint} is within the supplementary
   1709      *         code point range; {@code false} otherwise.
   1710      * @since 1.5
   1711      */
   1712     public static boolean isSupplementaryCodePoint(int codePoint) {
   1713         return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
   1714     }
   1715 
   1716     /**
   1717      * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
   1718      * that is used for representing supplementary characters in UTF-16
   1719      * encoding.
   1720      *
   1721      * @param ch
   1722      *            the character to test.
   1723      * @return {@code true} if {@code ch} is a high-surrogate code unit;
   1724      *         {@code false} otherwise.
   1725      * @see #isLowSurrogate(char)
   1726      * @since 1.5
   1727      */
   1728     public static boolean isHighSurrogate(char ch) {
   1729         return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
   1730     }
   1731 
   1732     /**
   1733      * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
   1734      * that is used for representing supplementary characters in UTF-16
   1735      * encoding.
   1736      *
   1737      * @param ch
   1738      *            the character to test.
   1739      * @return {@code true} if {@code ch} is a low-surrogate code unit;
   1740      *         {@code false} otherwise.
   1741      * @see #isHighSurrogate(char)
   1742      * @since 1.5
   1743      */
   1744     public static boolean isLowSurrogate(char ch) {
   1745         return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
   1746     }
   1747 
   1748     /**
   1749      * Indicates whether the specified character pair is a valid surrogate pair.
   1750      *
   1751      * @param high
   1752      *            the high surrogate unit to test.
   1753      * @param low
   1754      *            the low surrogate unit to test.
   1755      * @return {@code true} if {@code high} is a high-surrogate code unit and
   1756      *         {@code low} is a low-surrogate code unit; {@code false}
   1757      *         otherwise.
   1758      * @see #isHighSurrogate(char)
   1759      * @see #isLowSurrogate(char)
   1760      * @since 1.5
   1761      */
   1762     public static boolean isSurrogatePair(char high, char low) {
   1763         return (isHighSurrogate(high) && isLowSurrogate(low));
   1764     }
   1765 
   1766     /**
   1767      * Calculates the number of {@code char} values required to represent the
   1768      * specified Unicode code point. This method checks if the {@code codePoint}
   1769      * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
   1770      * returned, otherwise {@code 1}. To test if the code point is valid, use
   1771      * the {@link #isValidCodePoint(int)} method.
   1772      *
   1773      * @param codePoint
   1774      *            the code point for which to calculate the number of required
   1775      *            chars.
   1776      * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
   1777      * @see #isValidCodePoint(int)
   1778      * @see #isSupplementaryCodePoint(int)
   1779      * @since 1.5
   1780      */
   1781     public static int charCount(int codePoint) {
   1782         return (codePoint >= 0x10000 ? 2 : 1);
   1783     }
   1784 
   1785     /**
   1786      * Converts a surrogate pair into a Unicode code point. This method assumes
   1787      * that the pair are valid surrogates. If the pair are <i>not</i> valid
   1788      * surrogates, then the result is indeterminate. The
   1789      * {@link #isSurrogatePair(char, char)} method should be used prior to this
   1790      * method to validate the pair.
   1791      *
   1792      * @param high
   1793      *            the high surrogate unit.
   1794      * @param low
   1795      *            the low surrogate unit.
   1796      * @return the Unicode code point corresponding to the surrogate unit pair.
   1797      * @see #isSurrogatePair(char, char)
   1798      * @since 1.5
   1799      */
   1800     public static int toCodePoint(char high, char low) {
   1801         // See RFC 2781, Section 2.2
   1802         // http://www.ietf.org/rfc/rfc2781.txt
   1803         int h = (high & 0x3FF) << 10;
   1804         int l = low & 0x3FF;
   1805         return (h | l) + 0x10000;
   1806     }
   1807 
   1808     /**
   1809      * Returns the code point at {@code index} in the specified sequence of
   1810      * character units. If the unit at {@code index} is a high-surrogate unit,
   1811      * {@code index + 1} is less than the length of the sequence and the unit at
   1812      * {@code index + 1} is a low-surrogate unit, then the supplementary code
   1813      * point represented by the pair is returned; otherwise the {@code char}
   1814      * value at {@code index} is returned.
   1815      *
   1816      * @param seq
   1817      *            the source sequence of {@code char} units.
   1818      * @param index
   1819      *            the position in {@code seq} from which to retrieve the code
   1820      *            point.
   1821      * @return the Unicode code point or {@code char} value at {@code index} in
   1822      *         {@code seq}.
   1823      * @throws NullPointerException
   1824      *             if {@code seq} is {@code null}.
   1825      * @throws IndexOutOfBoundsException
   1826      *             if the {@code index} is negative or greater than or equal to
   1827      *             the length of {@code seq}.
   1828      * @since 1.5
   1829      */
   1830     public static int codePointAt(CharSequence seq, int index) {
   1831         if (seq == null) {
   1832             throw new NullPointerException();
   1833         }
   1834         int len = seq.length();
   1835         if (index < 0 || index >= len) {
   1836             throw new IndexOutOfBoundsException();
   1837         }
   1838 
   1839         char high = seq.charAt(index++);
   1840         if (index >= len) {
   1841             return high;
   1842         }
   1843         char low = seq.charAt(index);
   1844         if (isSurrogatePair(high, low)) {
   1845             return toCodePoint(high, low);
   1846         }
   1847         return high;
   1848     }
   1849 
   1850     /**
   1851      * Returns the code point at {@code index} in the specified array of
   1852      * character units. If the unit at {@code index} is a high-surrogate unit,
   1853      * {@code index + 1} is less than the length of the array and the unit at
   1854      * {@code index + 1} is a low-surrogate unit, then the supplementary code
   1855      * point represented by the pair is returned; otherwise the {@code char}
   1856      * value at {@code index} is returned.
   1857      *
   1858      * @param seq
   1859      *            the source array of {@code char} units.
   1860      * @param index
   1861      *            the position in {@code seq} from which to retrieve the code
   1862      *            point.
   1863      * @return the Unicode code point or {@code char} value at {@code index} in
   1864      *         {@code seq}.
   1865      * @throws NullPointerException
   1866      *             if {@code seq} is {@code null}.
   1867      * @throws IndexOutOfBoundsException
   1868      *             if the {@code index} is negative or greater than or equal to
   1869      *             the length of {@code seq}.
   1870      * @since 1.5
   1871      */
   1872     public static int codePointAt(char[] seq, int index) {
   1873         if (seq == null) {
   1874             throw new NullPointerException();
   1875         }
   1876         int len = seq.length;
   1877         if (index < 0 || index >= len) {
   1878             throw new IndexOutOfBoundsException();
   1879         }
   1880 
   1881         char high = seq[index++];
   1882         if (index >= len) {
   1883             return high;
   1884         }
   1885         char low = seq[index];
   1886         if (isSurrogatePair(high, low)) {
   1887             return toCodePoint(high, low);
   1888         }
   1889         return high;
   1890     }
   1891 
   1892     /**
   1893      * Returns the code point at {@code index} in the specified array of
   1894      * character units, where {@code index} has to be less than {@code limit}.
   1895      * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
   1896      * is less than {@code limit} and the unit at {@code index + 1} is a
   1897      * low-surrogate unit, then the supplementary code point represented by the
   1898      * pair is returned; otherwise the {@code char} value at {@code index} is
   1899      * returned.
   1900      *
   1901      * @param seq
   1902      *            the source array of {@code char} units.
   1903      * @param index
   1904      *            the position in {@code seq} from which to get the code point.
   1905      * @param limit
   1906      *            the index after the last unit in {@code seq} that can be used.
   1907      * @return the Unicode code point or {@code char} value at {@code index} in
   1908      *         {@code seq}.
   1909      * @throws NullPointerException
   1910      *             if {@code seq} is {@code null}.
   1911      * @throws IndexOutOfBoundsException
   1912      *             if {@code index < 0}, {@code index >= limit},
   1913      *             {@code limit < 0} or if {@code limit} is greater than the
   1914      *             length of {@code seq}.
   1915      * @since 1.5
   1916      */
   1917     public static int codePointAt(char[] seq, int index, int limit) {
   1918         if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
   1919             throw new IndexOutOfBoundsException();
   1920         }
   1921 
   1922         char high = seq[index++];
   1923         if (index >= limit) {
   1924             return high;
   1925         }
   1926         char low = seq[index];
   1927         if (isSurrogatePair(high, low)) {
   1928             return toCodePoint(high, low);
   1929         }
   1930         return high;
   1931     }
   1932 
   1933     /**
   1934      * Returns the code point that precedes {@code index} in the specified
   1935      * sequence of character units. If the unit at {@code index - 1} is a
   1936      * low-surrogate unit, {@code index - 2} is not negative and the unit at
   1937      * {@code index - 2} is a high-surrogate unit, then the supplementary code
   1938      * point represented by the pair is returned; otherwise the {@code char}
   1939      * value at {@code index - 1} is returned.
   1940      *
   1941      * @param seq
   1942      *            the source sequence of {@code char} units.
   1943      * @param index
   1944      *            the position in {@code seq} following the code
   1945      *            point that should be returned.
   1946      * @return the Unicode code point or {@code char} value before {@code index}
   1947      *         in {@code seq}.
   1948      * @throws NullPointerException
   1949      *             if {@code seq} is {@code null}.
   1950      * @throws IndexOutOfBoundsException
   1951      *             if the {@code index} is less than 1 or greater than the
   1952      *             length of {@code seq}.
   1953      * @since 1.5
   1954      */
   1955     public static int codePointBefore(CharSequence seq, int index) {
   1956         if (seq == null) {
   1957             throw new NullPointerException();
   1958         }
   1959         int len = seq.length();
   1960         if (index < 1 || index > len) {
   1961             throw new IndexOutOfBoundsException();
   1962         }
   1963 
   1964         char low = seq.charAt(--index);
   1965         if (--index < 0) {
   1966             return low;
   1967         }
   1968         char high = seq.charAt(index);
   1969         if (isSurrogatePair(high, low)) {
   1970             return toCodePoint(high, low);
   1971         }
   1972         return low;
   1973     }
   1974 
   1975     /**
   1976      * Returns the code point that precedes {@code index} in the specified
   1977      * array of character units. If the unit at {@code index - 1} is a
   1978      * low-surrogate unit, {@code index - 2} is not negative and the unit at
   1979      * {@code index - 2} is a high-surrogate unit, then the supplementary code
   1980      * point represented by the pair is returned; otherwise the {@code char}
   1981      * value at {@code index - 1} is returned.
   1982      *
   1983      * @param seq
   1984      *            the source array of {@code char} units.
   1985      * @param index
   1986      *            the position in {@code seq} following the code
   1987      *            point that should be returned.
   1988      * @return the Unicode code point or {@code char} value before {@code index}
   1989      *         in {@code seq}.
   1990      * @throws NullPointerException
   1991      *             if {@code seq} is {@code null}.
   1992      * @throws IndexOutOfBoundsException
   1993      *             if the {@code index} is less than 1 or greater than the
   1994      *             length of {@code seq}.
   1995      * @since 1.5
   1996      */
   1997     public static int codePointBefore(char[] seq, int index) {
   1998         if (seq == null) {
   1999             throw new NullPointerException();
   2000         }
   2001         int len = seq.length;
   2002         if (index < 1 || index > len) {
   2003             throw new IndexOutOfBoundsException();
   2004         }
   2005 
   2006         char low = seq[--index];
   2007         if (--index < 0) {
   2008             return low;
   2009         }
   2010         char high = seq[index];
   2011         if (isSurrogatePair(high, low)) {
   2012             return toCodePoint(high, low);
   2013         }
   2014         return low;
   2015     }
   2016 
   2017     /**
   2018      * Returns the code point that precedes the {@code index} in the specified
   2019      * array of character units and is not less than {@code start}. If the unit
   2020      * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
   2021      * less than {@code start} and the unit at {@code index - 2} is a
   2022      * high-surrogate unit, then the supplementary code point represented by the
   2023      * pair is returned; otherwise the {@code char} value at {@code index - 1}
   2024      * is returned.
   2025      *
   2026      * @param seq
   2027      *            the source array of {@code char} units.
   2028      * @param index
   2029      *            the position in {@code seq} following the code point that
   2030      *            should be returned.
   2031      * @param start
   2032      *            the index of the first element in {@code seq}.
   2033      * @return the Unicode code point or {@code char} value before {@code index}
   2034      *         in {@code seq}.
   2035      * @throws NullPointerException
   2036      *             if {@code seq} is {@code null}.
   2037      * @throws IndexOutOfBoundsException
   2038      *             if the {@code index <= start}, {@code start < 0},
   2039      *             {@code index} is greater than the length of {@code seq}, or
   2040      *             if {@code start} is equal or greater than the length of
   2041      *             {@code seq}.
   2042      * @since 1.5
   2043      */
   2044     public static int codePointBefore(char[] seq, int index, int start) {
   2045         if (seq == null) {
   2046             throw new NullPointerException();
   2047         }
   2048         int len = seq.length;
   2049         if (index <= start || index > len || start < 0 || start >= len) {
   2050             throw new IndexOutOfBoundsException();
   2051         }
   2052 
   2053         char low = seq[--index];
   2054         if (--index < start) {
   2055             return low;
   2056         }
   2057         char high = seq[index];
   2058         if (isSurrogatePair(high, low)) {
   2059             return toCodePoint(high, low);
   2060         }
   2061         return low;
   2062     }
   2063 
   2064     /**
   2065      * Converts the specified Unicode code point into a UTF-16 encoded sequence
   2066      * and copies the value(s) into the char array {@code dst}, starting at
   2067      * index {@code dstIndex}.
   2068      *
   2069      * @param codePoint
   2070      *            the Unicode code point to encode.
   2071      * @param dst
   2072      *            the destination array to copy the encoded value into.
   2073      * @param dstIndex
   2074      *            the index in {@code dst} from where to start copying.
   2075      * @return the number of {@code char} value units copied into {@code dst}.
   2076      * @throws IllegalArgumentException
   2077      *             if {@code codePoint} is not a valid Unicode code point.
   2078      * @throws NullPointerException
   2079      *             if {@code dst} is {@code null}.
   2080      * @throws IndexOutOfBoundsException
   2081      *             if {@code dstIndex} is negative, greater than or equal to
   2082      *             {@code dst.length} or equals {@code dst.length - 1} when
   2083      *             {@code codePoint} is a
   2084      *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
   2085      * @since 1.5
   2086      */
   2087     public static int toChars(int codePoint, char[] dst, int dstIndex) {
   2088         if (!isValidCodePoint(codePoint)) {
   2089             throw new IllegalArgumentException();
   2090         }
   2091         if (dst == null) {
   2092             throw new NullPointerException();
   2093         }
   2094         if (dstIndex < 0 || dstIndex >= dst.length) {
   2095             throw new IndexOutOfBoundsException();
   2096         }
   2097 
   2098         if (isSupplementaryCodePoint(codePoint)) {
   2099             if (dstIndex == dst.length - 1) {
   2100                 throw new IndexOutOfBoundsException();
   2101             }
   2102             // See RFC 2781, Section 2.1
   2103             // http://www.ietf.org/rfc/rfc2781.txt
   2104             int cpPrime = codePoint - 0x10000;
   2105             int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
   2106             int low = 0xDC00 | (cpPrime & 0x3FF);
   2107             dst[dstIndex] = (char) high;
   2108             dst[dstIndex + 1] = (char) low;
   2109             return 2;
   2110         }
   2111 
   2112         dst[dstIndex] = (char) codePoint;
   2113         return 1;
   2114     }
   2115 
   2116     /**
   2117      * Converts the specified Unicode code point into a UTF-16 encoded sequence
   2118      * and returns it as a char array.
   2119      *
   2120      * @param codePoint
   2121      *            the Unicode code point to encode.
   2122      * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
   2123      *         {@link #isSupplementaryCodePoint(int) supplementary code point},
   2124      *         then the returned array contains two characters, otherwise it
   2125      *         contains just one character.
   2126      * @throws IllegalArgumentException
   2127      *             if {@code codePoint} is not a valid Unicode code point.
   2128      * @since 1.5
   2129      */
   2130     public static char[] toChars(int codePoint) {
   2131         if (!isValidCodePoint(codePoint)) {
   2132             throw new IllegalArgumentException();
   2133         }
   2134 
   2135         if (isSupplementaryCodePoint(codePoint)) {
   2136             int cpPrime = codePoint - 0x10000;
   2137             int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
   2138             int low = 0xDC00 | (cpPrime & 0x3FF);
   2139             return new char[] { (char) high, (char) low };
   2140         }
   2141         return new char[] { (char) codePoint };
   2142     }
   2143 
   2144     /**
   2145      * Counts the number of Unicode code points in the subsequence of the
   2146      * specified character sequence, as delineated by {@code beginIndex} and
   2147      * {@code endIndex}. Any surrogate values with missing pair values will be
   2148      * counted as one code point.
   2149      *
   2150      * @param seq
   2151      *            the {@code CharSequence} to look through.
   2152      * @param beginIndex
   2153      *            the inclusive index to begin counting at.
   2154      * @param endIndex
   2155      *            the exclusive index to stop counting at.
   2156      * @return the number of Unicode code points.
   2157      * @throws NullPointerException
   2158      *             if {@code seq} is {@code null}.
   2159      * @throws IndexOutOfBoundsException
   2160      *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
   2161      *             if {@code endIndex} is greater than the length of {@code seq}.
   2162      * @since 1.5
   2163      */
   2164     public static int codePointCount(CharSequence seq, int beginIndex,
   2165             int endIndex) {
   2166         if (seq == null) {
   2167             throw new NullPointerException();
   2168         }
   2169         int len = seq.length();
   2170         if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
   2171             throw new IndexOutOfBoundsException();
   2172         }
   2173 
   2174         int result = 0;
   2175         for (int i = beginIndex; i < endIndex; i++) {
   2176             char c = seq.charAt(i);
   2177             if (isHighSurrogate(c)) {
   2178                 if (++i < endIndex) {
   2179                     c = seq.charAt(i);
   2180                     if (!isLowSurrogate(c)) {
   2181                         result++;
   2182                     }
   2183                 }
   2184             }
   2185             result++;
   2186         }
   2187         return result;
   2188     }
   2189 
   2190     /**
   2191      * Counts the number of Unicode code points in the subsequence of the
   2192      * specified char array, as delineated by {@code offset} and {@code count}.
   2193      * Any surrogate values with missing pair values will be counted as one code
   2194      * point.
   2195      *
   2196      * @param seq
   2197      *            the char array to look through
   2198      * @param offset
   2199      *            the inclusive index to begin counting at.
   2200      * @param count
   2201      *            the number of {@code char} values to look through in
   2202      *            {@code seq}.
   2203      * @return the number of Unicode code points.
   2204      * @throws NullPointerException
   2205      *             if {@code seq} is {@code null}.
   2206      * @throws IndexOutOfBoundsException
   2207      *             if {@code offset < 0}, {@code count < 0} or if
   2208      *             {@code offset + count} is greater than the length of
   2209      *             {@code seq}.
   2210      * @since 1.5
   2211      */
   2212     public static int codePointCount(char[] seq, int offset, int count) {
   2213         if (seq == null) {
   2214             throw new NullPointerException();
   2215         }
   2216         int len = seq.length;
   2217         int endIndex = offset + count;
   2218         if (offset < 0 || count < 0 || endIndex > len) {
   2219             throw new IndexOutOfBoundsException();
   2220         }
   2221 
   2222         int result = 0;
   2223         for (int i = offset; i < endIndex; i++) {
   2224             char c = seq[i];
   2225             if (isHighSurrogate(c)) {
   2226                 if (++i < endIndex) {
   2227                     c = seq[i];
   2228                     if (!isLowSurrogate(c)) {
   2229                         result++;
   2230                     }
   2231                 }
   2232             }
   2233             result++;
   2234         }
   2235         return result;
   2236     }
   2237 
   2238     /**
   2239      * Determines the index in the specified character sequence that is offset
   2240      * {@code codePointOffset} code points from {@code index}.
   2241      *
   2242      * @param seq
   2243      *            the character sequence to find the index in.
   2244      * @param index
   2245      *            the start index in {@code seq}.
   2246      * @param codePointOffset
   2247      *            the number of code points to look backwards or forwards; may
   2248      *            be a negative or positive value.
   2249      * @return the index in {@code seq} that is {@code codePointOffset} code
   2250      *         points away from {@code index}.
   2251      * @throws NullPointerException
   2252      *             if {@code seq} is {@code null}.
   2253      * @throws IndexOutOfBoundsException
   2254      *             if {@code index < 0}, {@code index} is greater than the
   2255      *             length of {@code seq}, or if there are not enough values in
   2256      *             {@code seq} to skip {@code codePointOffset} code points
   2257      *             forwards or backwards (if {@code codePointOffset} is
   2258      *             negative) from {@code index}.
   2259      * @since 1.5
   2260      */
   2261     public static int offsetByCodePoints(CharSequence seq, int index,
   2262             int codePointOffset) {
   2263         if (seq == null) {
   2264             throw new NullPointerException();
   2265         }
   2266         int len = seq.length();
   2267         if (index < 0 || index > len) {
   2268             throw new IndexOutOfBoundsException();
   2269         }
   2270 
   2271         if (codePointOffset == 0) {
   2272             return index;
   2273         }
   2274 
   2275         if (codePointOffset > 0) {
   2276             int codePoints = codePointOffset;
   2277             int i = index;
   2278             while (codePoints > 0) {
   2279                 codePoints--;
   2280                 if (i >= len) {
   2281                     throw new IndexOutOfBoundsException();
   2282                 }
   2283                 if (isHighSurrogate(seq.charAt(i))) {
   2284                     int next = i + 1;
   2285                     if (next < len && isLowSurrogate(seq.charAt(next))) {
   2286                         i++;
   2287                     }
   2288                 }
   2289                 i++;
   2290             }
   2291             return i;
   2292         }
   2293 
   2294         assert codePointOffset < 0;
   2295         int codePoints = -codePointOffset;
   2296         int i = index;
   2297         while (codePoints > 0) {
   2298             codePoints--;
   2299             i--;
   2300             if (i < 0) {
   2301                 throw new IndexOutOfBoundsException();
   2302             }
   2303             if (isLowSurrogate(seq.charAt(i))) {
   2304                 int prev = i - 1;
   2305                 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
   2306                     i--;
   2307                 }
   2308             }
   2309         }
   2310         return i;
   2311     }
   2312 
   2313     /**
   2314      * Determines the index in a subsequence of the specified character array
   2315      * that is offset {@code codePointOffset} code points from {@code index}.
   2316      * The subsequence is delineated by {@code start} and {@code count}.
   2317      *
   2318      * @param seq
   2319      *            the character array to find the index in.
   2320      * @param start
   2321      *            the inclusive index that marks the beginning of the
   2322      *            subsequence.
   2323      * @param count
   2324      *            the number of {@code char} values to include within the
   2325      *            subsequence.
   2326      * @param index
   2327      *            the start index in the subsequence of the char array.
   2328      * @param codePointOffset
   2329      *            the number of code points to look backwards or forwards; may
   2330      *            be a negative or positive value.
   2331      * @return the index in {@code seq} that is {@code codePointOffset} code
   2332      *         points away from {@code index}.
   2333      * @throws NullPointerException
   2334      *             if {@code seq} is {@code null}.
   2335      * @throws IndexOutOfBoundsException
   2336      *             if {@code start < 0}, {@code count < 0},
   2337      *             {@code index < start}, {@code index > start + count},
   2338      *             {@code start + count} is greater than the length of
   2339      *             {@code seq}, or if there are not enough values in
   2340      *             {@code seq} to skip {@code codePointOffset} code points
   2341      *             forward or backward (if {@code codePointOffset} is
   2342      *             negative) from {@code index}.
   2343      * @since 1.5
   2344      */
   2345     public static int offsetByCodePoints(char[] seq, int start, int count,
   2346             int index, int codePointOffset) {
   2347         if (seq == null) {
   2348             throw new NullPointerException();
   2349         }
   2350         int end = start + count;
   2351         if (start < 0 || count < 0 || end > seq.length || index < start
   2352                 || index > end) {
   2353             throw new IndexOutOfBoundsException();
   2354         }
   2355 
   2356         if (codePointOffset == 0) {
   2357             return index;
   2358         }
   2359 
   2360         if (codePointOffset > 0) {
   2361             int codePoints = codePointOffset;
   2362             int i = index;
   2363             while (codePoints > 0) {
   2364                 codePoints--;
   2365                 if (i >= end) {
   2366                     throw new IndexOutOfBoundsException();
   2367                 }
   2368                 if (isHighSurrogate(seq[i])) {
   2369                     int next = i + 1;
   2370                     if (next < end && isLowSurrogate(seq[next])) {
   2371                         i++;
   2372                     }
   2373                 }
   2374                 i++;
   2375             }
   2376             return i;
   2377         }
   2378 
   2379         assert codePointOffset < 0;
   2380         int codePoints = -codePointOffset;
   2381         int i = index;
   2382         while (codePoints > 0) {
   2383             codePoints--;
   2384             i--;
   2385             if (i < start) {
   2386                 throw new IndexOutOfBoundsException();
   2387             }
   2388             if (isLowSurrogate(seq[i])) {
   2389                 int prev = i - 1;
   2390                 if (prev >= start && isHighSurrogate(seq[prev])) {
   2391                     i--;
   2392                 }
   2393             }
   2394         }
   2395         return i;
   2396     }
   2397 
   2398     /**
   2399      * Convenience method to determine the value of the specified character
   2400      * {@code c} in the supplied radix. The value of {@code radix} must be
   2401      * between MIN_RADIX and MAX_RADIX.
   2402      *
   2403      * @param c
   2404      *            the character to determine the value of.
   2405      * @param radix
   2406      *            the radix.
   2407      * @return the value of {@code c} in {@code radix} if {@code radix} lies
   2408      *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
   2409      */
   2410     public static int digit(char c, int radix) {
   2411         return digit((int) c, radix);
   2412     }
   2413 
   2414     /**
   2415      * Convenience method to determine the value of the character
   2416      * {@code codePoint} in the supplied radix. The value of {@code radix} must
   2417      * be between MIN_RADIX and MAX_RADIX.
   2418      *
   2419      * @param codePoint
   2420      *            the character, including supplementary characters.
   2421      * @param radix
   2422      *            the radix.
   2423      * @return if {@code radix} lies between {@link #MIN_RADIX} and
   2424      *         {@link #MAX_RADIX} then the value of the character in the radix;
   2425      *         -1 otherwise.
   2426      */
   2427     public static int digit(int codePoint, int radix) {
   2428         if (radix < MIN_RADIX || radix > MAX_RADIX) {
   2429             return -1;
   2430         }
   2431         if (codePoint < 128) {
   2432             // Optimized for ASCII
   2433             int result = -1;
   2434             if ('0' <= codePoint && codePoint <= '9') {
   2435                 result = codePoint - '0';
   2436             } else if ('a' <= codePoint && codePoint <= 'z') {
   2437                 result = 10 + (codePoint - 'a');
   2438             } else if ('A' <= codePoint && codePoint <= 'Z') {
   2439                 result = 10 + (codePoint - 'A');
   2440             }
   2441             return result < radix ? result : -1;
   2442         }
   2443         return digitImpl(codePoint, radix);
   2444     }
   2445 
   2446     private static native int digitImpl(int codePoint, int radix);
   2447 
   2448     /**
   2449      * Compares this object with the specified object and indicates if they are
   2450      * equal. In order to be equal, {@code object} must be an instance of
   2451      * {@code Character} and have the same char value as this object.
   2452      *
   2453      * @param object
   2454      *            the object to compare this double with.
   2455      * @return {@code true} if the specified object is equal to this
   2456      *         {@code Character}; {@code false} otherwise.
   2457      */
   2458     @Override
   2459     public boolean equals(Object object) {
   2460         return (object instanceof Character) && (value == ((Character) object).value);
   2461     }
   2462 
   2463     /**
   2464      * Returns the character which represents the specified digit in the
   2465      * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
   2466      * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
   2467      * smaller than {@code radix}. If any of these conditions does not hold, 0
   2468      * is returned.
   2469      *
   2470      * @param digit
   2471      *            the integer value.
   2472      * @param radix
   2473      *            the radix.
   2474      * @return the character which represents the {@code digit} in the
   2475      *         {@code radix}.
   2476      */
   2477     public static char forDigit(int digit, int radix) {
   2478         if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
   2479             if (0 <= digit && digit < radix) {
   2480                 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
   2481             }
   2482         }
   2483         return 0;
   2484     }
   2485 
   2486     /**
   2487      * Gets the numeric value of the specified Unicode character.
   2488      *
   2489      * @param c
   2490      *            the Unicode character to get the numeric value of.
   2491      * @return a non-negative numeric integer value if a numeric value for
   2492      *         {@code c} exists, -1 if there is no numeric value for {@code c},
   2493      *         -2 if the numeric value can not be represented with an integer.
   2494      */
   2495     public static int getNumericValue(char c) {
   2496         return getNumericValue((int) c);
   2497     }
   2498 
   2499     /**
   2500      * Gets the numeric value of the specified Unicode code point. For example,
   2501      * the code point '\u216B' stands for the Roman number XII, which has the
   2502      * numeric value 12.
   2503      *
   2504      * @param codePoint
   2505      *            the Unicode code point to get the numeric value of.
   2506      * @return a non-negative numeric integer value if a numeric value for
   2507      *         {@code codePoint} exists, -1 if there is no numeric value for
   2508      *         {@code codePoint}, -2 if the numeric value can not be
   2509      *         represented with an integer.
   2510      */
   2511     public static int getNumericValue(int codePoint) {
   2512         if (codePoint < 128) {
   2513             // Optimized for ASCII
   2514             if (codePoint >= '0' && codePoint <= '9') {
   2515                 return codePoint - '0';
   2516             }
   2517             if (codePoint >= 'a' && codePoint <= 'z') {
   2518                 return codePoint - ('a' - 10);
   2519             }
   2520             if (codePoint >= 'A' && codePoint <= 'Z') {
   2521                 return codePoint - ('A' - 10);
   2522             }
   2523             return -1;
   2524         }
   2525         return getNumericValueImpl(codePoint);
   2526     }
   2527 
   2528     private static native int getNumericValueImpl(int codePoint);
   2529 
   2530     /**
   2531      * Gets the general Unicode category of the specified character.
   2532      *
   2533      * @param c
   2534      *            the character to get the category of.
   2535      * @return the Unicode category of {@code c}.
   2536      */
   2537     public static int getType(char c) {
   2538         return getType((int) c);
   2539     }
   2540 
   2541     /**
   2542      * Gets the general Unicode category of the specified code point.
   2543      *
   2544      * @param codePoint
   2545      *            the Unicode code point to get the category of.
   2546      * @return the Unicode category of {@code codePoint}.
   2547      */
   2548     public static int getType(int codePoint) {
   2549         int type = getTypeImpl(codePoint);
   2550         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
   2551         if (type <= Character.FORMAT) {
   2552             return type;
   2553         }
   2554         return (type + 1);
   2555     }
   2556 
   2557     private static native int getTypeImpl(int codePoint);
   2558 
   2559     /**
   2560      * Gets the Unicode directionality of the specified character.
   2561      *
   2562      * @param c
   2563      *            the character to get the directionality of.
   2564      * @return the Unicode directionality of {@code c}.
   2565      */
   2566     public static byte getDirectionality(char c) {
   2567         return getDirectionality((int)c);
   2568     }
   2569 
   2570     /**
   2571      * Gets the Unicode directionality of the specified character.
   2572      *
   2573      * @param codePoint
   2574      *            the Unicode code point to get the directionality of.
   2575      * @return the Unicode directionality of {@code codePoint}.
   2576      */
   2577     public static byte getDirectionality(int codePoint) {
   2578         if (getType(codePoint) == Character.UNASSIGNED) {
   2579             return Character.DIRECTIONALITY_UNDEFINED;
   2580         }
   2581 
   2582         byte directionality = getDirectionalityImpl(codePoint);
   2583         if (directionality == -1) {
   2584             return -1;
   2585         }
   2586         return DIRECTIONALITY[directionality];
   2587     }
   2588 
   2589     private static native byte getDirectionalityImpl(int codePoint);
   2590 
   2591     /**
   2592      * Indicates whether the specified character is mirrored.
   2593      *
   2594      * @param c
   2595      *            the character to check.
   2596      * @return {@code true} if {@code c} is mirrored; {@code false}
   2597      *         otherwise.
   2598      */
   2599     public static boolean isMirrored(char c) {
   2600         return isMirrored((int) c);
   2601     }
   2602 
   2603     /**
   2604      * Indicates whether the specified code point is mirrored.
   2605      *
   2606      * @param codePoint
   2607      *            the code point to check.
   2608      * @return {@code true} if {@code codePoint} is mirrored, {@code false}
   2609      *         otherwise.
   2610      */
   2611     public static boolean isMirrored(int codePoint) {
   2612         return isMirroredImpl(codePoint);
   2613     }
   2614 
   2615     private static native boolean isMirroredImpl(int codePoint);
   2616 
   2617     @Override
   2618     public int hashCode() {
   2619         return value;
   2620     }
   2621 
   2622     /**
   2623      * Indicates whether the specified character is defined in the Unicode
   2624      * specification.
   2625      *
   2626      * @param c
   2627      *            the character to check.
   2628      * @return {@code true} if the general Unicode category of the character is
   2629      *         not {@code UNASSIGNED}; {@code false} otherwise.
   2630      */
   2631     public static boolean isDefined(char c) {
   2632         return isDefinedImpl(c);
   2633     }
   2634 
   2635     /**
   2636      * Indicates whether the specified code point is defined in the Unicode
   2637      * specification.
   2638      *
   2639      * @param codePoint
   2640      *            the code point to check.
   2641      * @return {@code true} if the general Unicode category of the code point is
   2642      *         not {@code UNASSIGNED}; {@code false} otherwise.
   2643      */
   2644     public static boolean isDefined(int codePoint) {
   2645         return isDefinedImpl(codePoint);
   2646     }
   2647 
   2648     private static native boolean isDefinedImpl(int codePoint);
   2649 
   2650     /**
   2651      * Indicates whether the specified character is a digit.
   2652      *
   2653      * @param c
   2654      *            the character to check.
   2655      * @return {@code true} if {@code c} is a digit; {@code false}
   2656      *         otherwise.
   2657      */
   2658     public static boolean isDigit(char c) {
   2659         return isDigit((int) c);
   2660     }
   2661 
   2662     /**
   2663      * Indicates whether the specified code point is a digit.
   2664      *
   2665      * @param codePoint
   2666      *            the code point to check.
   2667      * @return {@code true} if {@code codePoint} is a digit; {@code false}
   2668      *         otherwise.
   2669      */
   2670     public static boolean isDigit(int codePoint) {
   2671         // Optimized case for ASCII
   2672         if ('0' <= codePoint && codePoint <= '9') {
   2673             return true;
   2674         }
   2675         if (codePoint < 1632) {
   2676             return false;
   2677         }
   2678         return isDigitImpl(codePoint);
   2679     }
   2680 
   2681     private static native boolean isDigitImpl(int codePoint);
   2682 
   2683     /**
   2684      * Indicates whether the specified character is ignorable in a Java or
   2685      * Unicode identifier.
   2686      *
   2687      * @param c
   2688      *            the character to check.
   2689      * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
   2690      */
   2691     public static boolean isIdentifierIgnorable(char c) {
   2692         return isIdentifierIgnorable((int) c);
   2693     }
   2694 
   2695     /**
   2696      * Indicates whether the specified code point is ignorable in a Java or
   2697      * Unicode identifier.
   2698      *
   2699      * @param codePoint
   2700      *            the code point to check.
   2701      * @return {@code true} if {@code codePoint} is ignorable; {@code false}
   2702      *         otherwise.
   2703      */
   2704     public static boolean isIdentifierIgnorable(int codePoint) {
   2705         if (codePoint < 0x600) {
   2706             return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
   2707                     (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
   2708         }
   2709         return isIdentifierIgnorableImpl(codePoint);
   2710     }
   2711 
   2712     private static native boolean isIdentifierIgnorableImpl(int codePoint);
   2713 
   2714     /**
   2715      * Indicates whether the specified character is an ISO control character.
   2716      *
   2717      * @param c
   2718      *            the character to check.
   2719      * @return {@code true} if {@code c} is an ISO control character;
   2720      *         {@code false} otherwise.
   2721      */
   2722     public static boolean isISOControl(char c) {
   2723         return isISOControl((int) c);
   2724     }
   2725 
   2726     /**
   2727      * Indicates whether the specified code point is an ISO control character.
   2728      *
   2729      * @param c
   2730      *            the code point to check.
   2731      * @return {@code true} if {@code c} is an ISO control character;
   2732      *         {@code false} otherwise.
   2733      */
   2734     public static boolean isISOControl(int c) {
   2735         return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
   2736     }
   2737 
   2738     /**
   2739      * Indicates whether the specified character is a valid part of a Java
   2740      * identifier other than the first character.
   2741      *
   2742      * @param c
   2743      *            the character to check.
   2744      * @return {@code true} if {@code c} is valid as part of a Java identifier;
   2745      *         {@code false} otherwise.
   2746      */
   2747     public static boolean isJavaIdentifierPart(char c) {
   2748         // BEGIN android-changed
   2749         return isJavaIdentifierPart((int) c);
   2750         // END android-changed
   2751     }
   2752 
   2753     /**
   2754      * Indicates whether the specified code point is a valid part of a Java
   2755      * identifier other than the first character.
   2756      *
   2757      * @param codePoint
   2758      *            the code point to check.
   2759      * @return {@code true} if {@code c} is valid as part of a Java identifier;
   2760      *         {@code false} otherwise.
   2761      */
   2762     public static boolean isJavaIdentifierPart(int codePoint) {
   2763         // BEGIN android-changed: use precomputed bitmasks for the ASCII range.
   2764         // Optimized case for ASCII
   2765         if (codePoint < 64) {
   2766             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
   2767         } else if (codePoint < 128) {
   2768             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   2769         }
   2770         int type = getType(codePoint);
   2771         return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
   2772                 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
   2773                 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
   2774                 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
   2775                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
   2776                 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
   2777         // END android-changed
   2778     }
   2779 
   2780     /**
   2781      * Indicates whether the specified character is a valid first character for
   2782      * a Java identifier.
   2783      *
   2784      * @param c
   2785      *            the character to check.
   2786      * @return {@code true} if {@code c} is a valid first character of a Java
   2787      *         identifier; {@code false} otherwise.
   2788      */
   2789     public static boolean isJavaIdentifierStart(char c) {
   2790         // BEGIN android-changed
   2791         return isJavaIdentifierStart((int) c);
   2792         // END android-changed
   2793     }
   2794 
   2795     /**
   2796      * Indicates whether the specified code point is a valid first character for
   2797      * a Java identifier.
   2798      *
   2799      * @param codePoint
   2800      *            the code point to check.
   2801      * @return {@code true} if {@code codePoint} is a valid start of a Java
   2802      *         identifier; {@code false} otherwise.
   2803      */
   2804     public static boolean isJavaIdentifierStart(int codePoint) {
   2805         // BEGIN android-changed: use precomputed bitmasks for the ASCII range.
   2806         // Optimized case for ASCII
   2807         if (codePoint < 64) {
   2808             return (codePoint == '$'); // There's only one character in this range.
   2809         } else if (codePoint < 128) {
   2810             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   2811         }
   2812         int type = getType(codePoint);
   2813         return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
   2814                 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
   2815         // END android-changed
   2816     }
   2817 
   2818     /**
   2819      * Indicates whether the specified character is a Java letter.
   2820      *
   2821      * @param c
   2822      *            the character to check.
   2823      * @return {@code true} if {@code c} is a Java letter; {@code false}
   2824      *         otherwise.
   2825      * @deprecated Use {@link #isJavaIdentifierStart(char)}
   2826      */
   2827     @Deprecated
   2828     public static boolean isJavaLetter(char c) {
   2829         return isJavaIdentifierStart(c);
   2830     }
   2831 
   2832     /**
   2833      * Indicates whether the specified character is a Java letter or digit
   2834      * character.
   2835      *
   2836      * @param c
   2837      *            the character to check.
   2838      * @return {@code true} if {@code c} is a Java letter or digit;
   2839      *         {@code false} otherwise.
   2840      * @deprecated Use {@link #isJavaIdentifierPart(char)}
   2841      */
   2842     @Deprecated
   2843     public static boolean isJavaLetterOrDigit(char c) {
   2844         return isJavaIdentifierPart(c);
   2845     }
   2846 
   2847     /**
   2848      * Indicates whether the specified character is a letter.
   2849      *
   2850      * @param c
   2851      *            the character to check.
   2852      * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
   2853      */
   2854     public static boolean isLetter(char c) {
   2855         return isLetter((int) c);
   2856     }
   2857 
   2858     /**
   2859      * Indicates whether the specified code point is a letter.
   2860      *
   2861      * @param codePoint
   2862      *            the code point to check.
   2863      * @return {@code true} if {@code codePoint} is a letter; {@code false}
   2864      *         otherwise.
   2865      */
   2866     public static boolean isLetter(int codePoint) {
   2867         if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
   2868             return true;
   2869         }
   2870         if (codePoint < 128) {
   2871             return false;
   2872         }
   2873         return isLetterImpl(codePoint);
   2874     }
   2875 
   2876     private static native boolean isLetterImpl(int codePoint);
   2877 
   2878     /**
   2879      * Indicates whether the specified character is a letter or a digit.
   2880      *
   2881      * @param c
   2882      *            the character to check.
   2883      * @return {@code true} if {@code c} is a letter or a digit; {@code false}
   2884      *         otherwise.
   2885      */
   2886     public static boolean isLetterOrDigit(char c) {
   2887         return isLetterOrDigit((int) c);
   2888     }
   2889 
   2890     /**
   2891      * Indicates whether the specified code point is a letter or a digit.
   2892      *
   2893      * @param codePoint
   2894      *            the code point to check.
   2895      * @return {@code true} if {@code codePoint} is a letter or a digit;
   2896      *         {@code false} otherwise.
   2897      */
   2898     public static boolean isLetterOrDigit(int codePoint) {
   2899         // Optimized case for ASCII
   2900         if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
   2901             return true;
   2902         }
   2903         if ('0' <= codePoint && codePoint <= '9') {
   2904             return true;
   2905         }
   2906         if (codePoint < 128) {
   2907             return false;
   2908         }
   2909         return isLetterOrDigitImpl(codePoint);
   2910     }
   2911 
   2912     private static native boolean isLetterOrDigitImpl(int codePoint);
   2913 
   2914     /**
   2915      * Indicates whether the specified character is a lower case letter.
   2916      *
   2917      * @param c
   2918      *            the character to check.
   2919      * @return {@code true} if {@code c} is a lower case letter; {@code false}
   2920      *         otherwise.
   2921      */
   2922     public static boolean isLowerCase(char c) {
   2923         return isLowerCase((int) c);
   2924     }
   2925 
   2926     /**
   2927      * Indicates whether the specified code point is a lower case letter.
   2928      *
   2929      * @param codePoint
   2930      *            the code point to check.
   2931      * @return {@code true} if {@code codePoint} is a lower case letter;
   2932      *         {@code false} otherwise.
   2933      */
   2934     public static boolean isLowerCase(int codePoint) {
   2935         // Optimized case for ASCII
   2936         if ('a' <= codePoint && codePoint <= 'z') {
   2937             return true;
   2938         }
   2939         if (codePoint < 128) {
   2940             return false;
   2941         }
   2942         return isLowerCaseImpl(codePoint);
   2943     }
   2944 
   2945     private static native boolean isLowerCaseImpl(int codePoint);
   2946 
   2947     /**
   2948      * Indicates whether the specified character is a Java space.
   2949      *
   2950      * @param c
   2951      *            the character to check.
   2952      * @return {@code true} if {@code c} is a Java space; {@code false}
   2953      *         otherwise.
   2954      * @deprecated Use {@link #isWhitespace(char)}
   2955      */
   2956     @Deprecated
   2957     public static boolean isSpace(char c) {
   2958         return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
   2959     }
   2960 
   2961     /**
   2962      * Indicates whether the specified character is a Unicode space character.
   2963      * That is, if it is a member of one of the Unicode categories Space
   2964      * Separator, Line Separator, or Paragraph Separator.
   2965      *
   2966      * @param c
   2967      *            the character to check.
   2968      * @return {@code true} if {@code c} is a Unicode space character,
   2969      *         {@code false} otherwise.
   2970      */
   2971     public static boolean isSpaceChar(char c) {
   2972         return isSpaceChar((int) c);
   2973     }
   2974 
   2975     /**
   2976      * Indicates whether the specified code point is a Unicode space character.
   2977      * That is, if it is a member of one of the Unicode categories Space
   2978      * Separator, Line Separator, or Paragraph Separator.
   2979      *
   2980      * @param codePoint
   2981      *            the code point to check.
   2982      * @return {@code true} if {@code codePoint} is a Unicode space character,
   2983      *         {@code false} otherwise.
   2984      */
   2985     public static boolean isSpaceChar(int codePoint) {
   2986         if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) {
   2987             return true;
   2988         }
   2989         if (codePoint < 0x2000) {
   2990             return false;
   2991         }
   2992         if (codePoint <= 0xffff) {
   2993             return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
   2994                     codePoint == 0x202f || codePoint == 0x3000;
   2995         }
   2996         return isSpaceCharImpl(codePoint);
   2997     }
   2998 
   2999     private static native boolean isSpaceCharImpl(int codePoint);
   3000 
   3001     /**
   3002      * Indicates whether the specified character is a titlecase character.
   3003      *
   3004      * @param c
   3005      *            the character to check.
   3006      * @return {@code true} if {@code c} is a titlecase character, {@code false}
   3007      *         otherwise.
   3008      */
   3009     public static boolean isTitleCase(char c) {
   3010         return isTitleCaseImpl(c);
   3011     }
   3012 
   3013     /**
   3014      * Indicates whether the specified code point is a titlecase character.
   3015      *
   3016      * @param codePoint
   3017      *            the code point to check.
   3018      * @return {@code true} if {@code codePoint} is a titlecase character,
   3019      *         {@code false} otherwise.
   3020      */
   3021     public static boolean isTitleCase(int codePoint) {
   3022         return isTitleCaseImpl(codePoint);
   3023     }
   3024 
   3025     private static native boolean isTitleCaseImpl(int codePoint);
   3026 
   3027     /**
   3028      * Indicates whether the specified character is valid as part of a Unicode
   3029      * identifier other than the first character.
   3030      *
   3031      * @param c
   3032      *            the character to check.
   3033      * @return {@code true} if {@code c} is valid as part of a Unicode
   3034      *         identifier; {@code false} otherwise.
   3035      */
   3036     public static boolean isUnicodeIdentifierPart(char c) {
   3037         return isUnicodeIdentifierPartImpl(c);
   3038     }
   3039 
   3040     /**
   3041      * Indicates whether the specified code point is valid as part of a Unicode
   3042      * identifier other than the first character.
   3043      *
   3044      * @param codePoint
   3045      *            the code point to check.
   3046      * @return {@code true} if {@code codePoint} is valid as part of a Unicode
   3047      *         identifier; {@code false} otherwise.
   3048      */
   3049     public static boolean isUnicodeIdentifierPart(int codePoint) {
   3050         return isUnicodeIdentifierPartImpl(codePoint);
   3051     }
   3052 
   3053     private static native boolean isUnicodeIdentifierPartImpl(int codePoint);
   3054 
   3055     /**
   3056      * Indicates whether the specified character is a valid initial character
   3057      * for a Unicode identifier.
   3058      *
   3059      * @param c
   3060      *            the character to check.
   3061      * @return {@code true} if {@code c} is a valid first character for a
   3062      *         Unicode identifier; {@code false} otherwise.
   3063      */
   3064     public static boolean isUnicodeIdentifierStart(char c) {
   3065         return isUnicodeIdentifierStartImpl(c);
   3066     }
   3067 
   3068     /**
   3069      * Indicates whether the specified code point is a valid initial character
   3070      * for a Unicode identifier.
   3071      *
   3072      * @param codePoint
   3073      *            the code point to check.
   3074      * @return {@code true} if {@code codePoint} is a valid first character for
   3075      *         a Unicode identifier; {@code false} otherwise.
   3076      */
   3077     public static boolean isUnicodeIdentifierStart(int codePoint) {
   3078         return isUnicodeIdentifierStartImpl(codePoint);
   3079     }
   3080 
   3081     private static native boolean isUnicodeIdentifierStartImpl(int codePoint);
   3082 
   3083     /**
   3084      * Indicates whether the specified character is an upper case letter.
   3085      *
   3086      * @param c
   3087      *            the character to check.
   3088      * @return {@code true} if {@code c} is a upper case letter; {@code false}
   3089      *         otherwise.
   3090      */
   3091     public static boolean isUpperCase(char c) {
   3092         return isUpperCase((int) c);
   3093     }
   3094 
   3095     /**
   3096      * Indicates whether the specified code point is an upper case letter.
   3097      *
   3098      * @param codePoint
   3099      *            the code point to check.
   3100      * @return {@code true} if {@code codePoint} is a upper case letter;
   3101      *         {@code false} otherwise.
   3102      */
   3103     public static boolean isUpperCase(int codePoint) {
   3104         // Optimized case for ASCII
   3105         if ('A' <= codePoint && codePoint <= 'Z') {
   3106             return true;
   3107         }
   3108         if (codePoint < 128) {
   3109             return false;
   3110         }
   3111         return isUpperCaseImpl(codePoint);
   3112     }
   3113 
   3114     private static native boolean isUpperCaseImpl(int codePoint);
   3115 
   3116     /**
   3117      * Indicates whether the specified character is a whitespace character in
   3118      * Java.
   3119      *
   3120      * @param c
   3121      *            the character to check.
   3122      * @return {@code true} if the supplied {@code c} is a whitespace character
   3123      *         in Java; {@code false} otherwise.
   3124      */
   3125     public static boolean isWhitespace(char c) {
   3126         return isWhitespace((int) c);
   3127     }
   3128 
   3129     /**
   3130      * Indicates whether the specified code point is a whitespace character in
   3131      * Java.
   3132      *
   3133      * @param codePoint
   3134      *            the code point to check.
   3135      * @return {@code true} if the supplied {@code c} is a whitespace character
   3136      *         in Java; {@code false} otherwise.
   3137      */
   3138     public static boolean isWhitespace(int codePoint) {
   3139         // Optimized case for ASCII
   3140         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) {
   3141             return true;
   3142         }
   3143         if (codePoint == 0x1680) {
   3144             return true;
   3145         }
   3146         if (codePoint < 0x2000 || codePoint == 0x2007) {
   3147             return false;
   3148         }
   3149         if (codePoint <= 0xffff) {
   3150             return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
   3151                     codePoint == 0x3000;
   3152         }
   3153         return isWhitespaceImpl(codePoint);
   3154     }
   3155 
   3156     private static native boolean isWhitespaceImpl(int codePoint);
   3157 
   3158     /**
   3159      * Reverses the order of the first and second byte in the specified
   3160      * character.
   3161      *
   3162      * @param c
   3163      *            the character to reverse.
   3164      * @return the character with reordered bytes.
   3165      */
   3166     public static char reverseBytes(char c) {
   3167         return (char)((c<<8) | (c>>8));
   3168     }
   3169 
   3170     /**
   3171      * Returns the lower case equivalent for the specified character if the
   3172      * character is an upper case letter. Otherwise, the specified character is
   3173      * returned unchanged.
   3174      *
   3175      * @param c
   3176      *            the character
   3177      * @return if {@code c} is an upper case character then its lower case
   3178      *         counterpart, otherwise just {@code c}.
   3179      */
   3180     public static char toLowerCase(char c) {
   3181         return (char) toLowerCase((int) c);
   3182     }
   3183 
   3184     /**
   3185      * Returns the lower case equivalent for the specified code point if it is
   3186      * an upper case letter. Otherwise, the specified code point is returned
   3187      * unchanged.
   3188      *
   3189      * @param codePoint
   3190      *            the code point to check.
   3191      * @return if {@code codePoint} is an upper case character then its lower
   3192      *         case counterpart, otherwise just {@code codePoint}.
   3193      */
   3194     public static int toLowerCase(int codePoint) {
   3195         // Optimized case for ASCII
   3196         if ('A' <= codePoint && codePoint <= 'Z') {
   3197             return (char) (codePoint + ('a' - 'A'));
   3198         }
   3199         if (codePoint < 192) {
   3200             return codePoint;
   3201         }
   3202         return toLowerCaseImpl(codePoint);
   3203     }
   3204 
   3205     private static native int toLowerCaseImpl(int codePoint);
   3206 
   3207     @Override
   3208     public String toString() {
   3209         return String.valueOf(value);
   3210     }
   3211 
   3212     /**
   3213      * Converts the specified character to its string representation.
   3214      *
   3215      * @param value
   3216      *            the character to convert.
   3217      * @return the character converted to a string.
   3218      */
   3219     public static String toString(char value) {
   3220         return String.valueOf(value);
   3221     }
   3222 
   3223     /**
   3224      * Returns the title case equivalent for the specified character if it
   3225      * exists. Otherwise, the specified character is returned unchanged.
   3226      *
   3227      * @param c
   3228      *            the character to convert.
   3229      * @return the title case equivalent of {@code c} if it exists, otherwise
   3230      *         {@code c}.
   3231      */
   3232     public static char toTitleCase(char c) {
   3233         return (char) toTitleCaseImpl(c);
   3234     }
   3235 
   3236     /**
   3237      * Returns the title case equivalent for the specified code point if it
   3238      * exists. Otherwise, the specified code point is returned unchanged.
   3239      *
   3240      * @param codePoint
   3241      *            the code point to convert.
   3242      * @return the title case equivalent of {@code codePoint} if it exists,
   3243      *         otherwise {@code codePoint}.
   3244      */
   3245     public static int toTitleCase(int codePoint) {
   3246         return toTitleCaseImpl(codePoint);
   3247     }
   3248 
   3249     private static native int toTitleCaseImpl(int codePoint);
   3250 
   3251     /**
   3252      * Returns the upper case equivalent for the specified character if the
   3253      * character is a lower case letter. Otherwise, the specified character is
   3254      * returned unchanged.
   3255      *
   3256      * @param c
   3257      *            the character to convert.
   3258      * @return if {@code c} is a lower case character then its upper case
   3259      *         counterpart, otherwise just {@code c}.
   3260      */
   3261     public static char toUpperCase(char c) {
   3262         return (char) toUpperCase((int) c);
   3263     }
   3264 
   3265     /**
   3266      * Returns the upper case equivalent for the specified code point if the
   3267      * code point is a lower case letter. Otherwise, the specified code point is
   3268      * returned unchanged.
   3269      *
   3270      * @param codePoint
   3271      *            the code point to convert.
   3272      * @return if {@code codePoint} is a lower case character then its upper
   3273      *         case counterpart, otherwise just {@code codePoint}.
   3274      */
   3275     public static int toUpperCase(int codePoint) {
   3276         // Optimized case for ASCII
   3277         if ('a' <= codePoint && codePoint <= 'z') {
   3278             return (char) (codePoint - ('a' - 'A'));
   3279         }
   3280         if (codePoint < 181) {
   3281             return codePoint;
   3282         }
   3283         return toUpperCaseImpl(codePoint);
   3284     }
   3285 
   3286     private static native int toUpperCaseImpl(int codePoint);
   3287 }
   3288