Home | History | Annotate | Download | only in lang
      1 /*
      2  *  Licensed to the Apache Software Foundation (ASF) under one or more
      3  *  contributor license agreements.  See the NOTICE file distributed with
      4  *  this work for additional information regarding copyright ownership.
      5  *  The ASF licenses this file to You under the Apache License, Version 2.0
      6  *  (the "License"); you may not use this file except in compliance with
      7  *  the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  *  Unless required by applicable law or agreed to in writing, software
     12  *  distributed under the License is distributed on an "AS IS" BASIS,
     13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  *  See the License for the specific language governing permissions and
     15  *  limitations under the License.
     16  */
     17 
     18 package java.lang;
     19 
     20 import java.io.Serializable;
     21 import java.util.Arrays;
     22 
     23 /**
     24  * The wrapper for the primitive type {@code char}. This class also provides a
     25  * number of utility methods for working with characters.
     26  *
     27  * <p>Character data is kept up to date as Unicode evolves.
     28  * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of
     29  * the {@code Locale} documentation for details of the Unicode versions implemented by current
     30  * and historical Android releases.
     31  *
     32  * <p>The Unicode specification, character tables, and other information are available at
     33  * <a href="http://www.unicode.org/">http://www.unicode.org/</a>.
     34  *
     35  * <p>Unicode characters are referred to as <i>code points</i>. The range of valid
     36  * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
     37  * is the code point range U+0000 to U+FFFF. Characters above the BMP are
     38  * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
     39  * encoding and {@code char} pairs are used to represent code points in the
     40  * supplementary range. A pair of {@code char} values that represent a
     41  * supplementary character are made up of a <i>high surrogate</i> with a value
     42  * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
     43  * 0xDC00 to 0xDFFF.
     44  * <p>
     45  * On the Java platform a {@code char} value represents either a single BMP code
     46  * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
     47  * is used to represent all Unicode code points.
     48  *
     49  * <a name="unicode_categories"><h3>Unicode categories</h3></a>
     50  * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
     51  * grouped semantically to provide a convenient overview. This table is also useful in
     52  * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
     53  * <span class="datatable">
     54  * <style type="text/css">
     55  * .datatable td { padding-right: 20px; }
     56  * </style>
     57  * <p><table>
     58  * <tr> <td> Cn </td> <td> Unassigned </td>  <td>{@link #UNASSIGNED}</td> </tr>
     59  * <tr> <td> Cc </td> <td> Control </td>     <td>{@link #CONTROL}</td> </tr>
     60  * <tr> <td> Cf </td> <td> Format </td>      <td>{@link #FORMAT}</td> </tr>
     61  * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
     62  * <tr> <td> Cs </td> <td> Surrogate </td>   <td>{@link #SURROGATE}</td> </tr>
     63  * <tr> <td><br></td> </tr>
     64  * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
     65  * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
     66  * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
     67  * <tr> <td> Lm </td> <td> Modifier letter </td>  <td>{@link #MODIFIER_LETTER}</td> </tr>
     68  * <tr> <td> Lo </td> <td> Other letter </td>     <td>{@link #OTHER_LETTER}</td> </tr>
     69  * <tr> <td><br></td> </tr>
     70  * <tr> <td> Mn </td> <td> Non-spacing mark </td>       <td>{@link #NON_SPACING_MARK}</td> </tr>
     71  * <tr> <td> Me </td> <td> Enclosing mark </td>         <td>{@link #ENCLOSING_MARK}</td> </tr>
     72  * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
     73  * <tr> <td><br></td> </tr>
     74  * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
     75  * <tr> <td> Nl </td> <td> Letter number </td>        <td>{@link #LETTER_NUMBER}</td> </tr>
     76  * <tr> <td> No </td> <td> Other number </td>         <td>{@link #OTHER_NUMBER}</td> </tr>
     77  * <tr> <td><br></td> </tr>
     78  * <tr> <td> Pd </td> <td> Dash punctuation </td>          <td>{@link #DASH_PUNCTUATION}</td> </tr>
     79  * <tr> <td> Ps </td> <td> Start punctuation </td>         <td>{@link #START_PUNCTUATION}</td> </tr>
     80  * <tr> <td> Pe </td> <td> End punctuation </td>           <td>{@link #END_PUNCTUATION}</td> </tr>
     81  * <tr> <td> Pc </td> <td> Connector punctuation </td>     <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
     82  * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
     83  * <tr> <td> Pf </td> <td> Final quote punctuation </td>   <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
     84  * <tr> <td> Po </td> <td> Other punctuation </td>         <td>{@link #OTHER_PUNCTUATION}</td> </tr>
     85  * <tr> <td><br></td> </tr>
     86  * <tr> <td> Sm </td> <td> Math symbol </td>     <td>{@link #MATH_SYMBOL}</td> </tr>
     87  * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
     88  * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
     89  * <tr> <td> So </td> <td> Other symbol </td>    <td>{@link #OTHER_SYMBOL}</td> </tr>
     90  * <tr> <td><br></td> </tr>
     91  * <tr> <td> Zs </td> <td> Space separator </td>     <td>{@link #SPACE_SEPARATOR}</td> </tr>
     92  * <tr> <td> Zl </td> <td> Line separator </td>      <td>{@link #LINE_SEPARATOR}</td> </tr>
     93  * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
     94  * </table>
     95  * </span>
     96  *
     97  * @since 1.0
     98  */
     99 @FindBugsSuppressWarnings("DM_NUMBER_CTOR")
    100 public final class Character implements Serializable, Comparable<Character> {
    101     private static final long serialVersionUID = 3786198910865385080L;
    102 
    103     private final char value;
    104 
    105     /**
    106      * The minimum {@code Character} value.
    107      */
    108     public static final char MIN_VALUE = '\u0000';
    109 
    110     /**
    111      * The maximum {@code Character} value.
    112      */
    113     public static final char MAX_VALUE = '\uffff';
    114 
    115     /**
    116      * The minimum radix used for conversions between characters and integers.
    117      */
    118     public static final int MIN_RADIX = 2;
    119 
    120     /**
    121      * The maximum radix used for conversions between characters and integers.
    122      */
    123     public static final int MAX_RADIX = 36;
    124 
    125     /**
    126      * The {@link Class} object that represents the primitive type {@code char}.
    127      */
    128     @SuppressWarnings("unchecked")
    129     public static final Class<Character> TYPE
    130             = (Class<Character>) char[].class.getComponentType();
    131     // Note: Character.TYPE can't be set to "char.class", since *that* is
    132     // defined to be "java.lang.Character.TYPE";
    133 
    134     /**
    135      * Unicode category constant Cn.
    136      */
    137     public static final byte UNASSIGNED = 0;
    138 
    139     /**
    140      * Unicode category constant Lu.
    141      */
    142     public static final byte UPPERCASE_LETTER = 1;
    143 
    144     /**
    145      * Unicode category constant Ll.
    146      */
    147     public static final byte LOWERCASE_LETTER = 2;
    148 
    149     /**
    150      * Unicode category constant Lt.
    151      */
    152     public static final byte TITLECASE_LETTER = 3;
    153 
    154     /**
    155      * Unicode category constant Lm.
    156      */
    157     public static final byte MODIFIER_LETTER = 4;
    158 
    159     /**
    160      * Unicode category constant Lo.
    161      */
    162     public static final byte OTHER_LETTER = 5;
    163 
    164     /**
    165      * Unicode category constant Mn.
    166      */
    167     public static final byte NON_SPACING_MARK = 6;
    168 
    169     /**
    170      * Unicode category constant Me.
    171      */
    172     public static final byte ENCLOSING_MARK = 7;
    173 
    174     /**
    175      * Unicode category constant Mc.
    176      */
    177     public static final byte COMBINING_SPACING_MARK = 8;
    178 
    179     /**
    180      * Unicode category constant Nd.
    181      */
    182     public static final byte DECIMAL_DIGIT_NUMBER = 9;
    183 
    184     /**
    185      * Unicode category constant Nl.
    186      */
    187     public static final byte LETTER_NUMBER = 10;
    188 
    189     /**
    190      * Unicode category constant No.
    191      */
    192     public static final byte OTHER_NUMBER = 11;
    193 
    194     /**
    195      * Unicode category constant Zs.
    196      */
    197     public static final byte SPACE_SEPARATOR = 12;
    198 
    199     /**
    200      * Unicode category constant Zl.
    201      */
    202     public static final byte LINE_SEPARATOR = 13;
    203 
    204     /**
    205      * Unicode category constant Zp.
    206      */
    207     public static final byte PARAGRAPH_SEPARATOR = 14;
    208 
    209     /**
    210      * Unicode category constant Cc.
    211      */
    212     public static final byte CONTROL = 15;
    213 
    214     /**
    215      * Unicode category constant Cf.
    216      */
    217     public static final byte FORMAT = 16;
    218 
    219     /**
    220      * Unicode category constant Co.
    221      */
    222     public static final byte PRIVATE_USE = 18;
    223 
    224     /**
    225      * Unicode category constant Cs.
    226      */
    227     public static final byte SURROGATE = 19;
    228 
    229     /**
    230      * Unicode category constant Pd.
    231      */
    232     public static final byte DASH_PUNCTUATION = 20;
    233 
    234     /**
    235      * Unicode category constant Ps.
    236      */
    237     public static final byte START_PUNCTUATION = 21;
    238 
    239     /**
    240      * Unicode category constant Pe.
    241      */
    242     public static final byte END_PUNCTUATION = 22;
    243 
    244     /**
    245      * Unicode category constant Pc.
    246      */
    247     public static final byte CONNECTOR_PUNCTUATION = 23;
    248 
    249     /**
    250      * Unicode category constant Po.
    251      */
    252     public static final byte OTHER_PUNCTUATION = 24;
    253 
    254     /**
    255      * Unicode category constant Sm.
    256      */
    257     public static final byte MATH_SYMBOL = 25;
    258 
    259     /**
    260      * Unicode category constant Sc.
    261      */
    262     public static final byte CURRENCY_SYMBOL = 26;
    263 
    264     /**
    265      * Unicode category constant Sk.
    266      */
    267     public static final byte MODIFIER_SYMBOL = 27;
    268 
    269     /**
    270      * Unicode category constant So.
    271      */
    272     public static final byte OTHER_SYMBOL = 28;
    273 
    274     /**
    275      * Unicode category constant Pi.
    276      *
    277      * @since 1.4
    278      */
    279     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
    280 
    281     /**
    282      * Unicode category constant Pf.
    283      *
    284      * @since 1.4
    285      */
    286     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
    287 
    288     /**
    289      * Unicode bidirectional constant.
    290      *
    291      * @since 1.4
    292      */
    293     public static final byte DIRECTIONALITY_UNDEFINED = -1;
    294 
    295     /**
    296      * Unicode bidirectional constant L.
    297      *
    298      * @since 1.4
    299      */
    300     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
    301 
    302     /**
    303      * Unicode bidirectional constant R.
    304      *
    305      * @since 1.4
    306      */
    307     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
    308 
    309     /**
    310      * Unicode bidirectional constant AL.
    311      *
    312      * @since 1.4
    313      */
    314     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
    315 
    316     /**
    317      * Unicode bidirectional constant EN.
    318      *
    319      * @since 1.4
    320      */
    321     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
    322 
    323     /**
    324      * Unicode bidirectional constant ES.
    325      *
    326      * @since 1.4
    327      */
    328     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
    329 
    330     /**
    331      * Unicode bidirectional constant ET.
    332      *
    333      * @since 1.4
    334      */
    335     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
    336 
    337     /**
    338      * Unicode bidirectional constant AN.
    339      *
    340      * @since 1.4
    341      */
    342     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
    343 
    344     /**
    345      * Unicode bidirectional constant CS.
    346      *
    347      * @since 1.4
    348      */
    349     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
    350 
    351     /**
    352      * Unicode bidirectional constant NSM.
    353      *
    354      * @since 1.4
    355      */
    356     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
    357 
    358     /**
    359      * Unicode bidirectional constant BN.
    360      *
    361      * @since 1.4
    362      */
    363     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
    364 
    365     /**
    366      * Unicode bidirectional constant B.
    367      *
    368      * @since 1.4
    369      */
    370     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
    371 
    372     /**
    373      * Unicode bidirectional constant S.
    374      *
    375      * @since 1.4
    376      */
    377     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
    378 
    379     /**
    380      * Unicode bidirectional constant WS.
    381      *
    382      * @since 1.4
    383      */
    384     public static final byte DIRECTIONALITY_WHITESPACE = 12;
    385 
    386     /**
    387      * Unicode bidirectional constant ON.
    388      *
    389      * @since 1.4
    390      */
    391     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
    392 
    393     /**
    394      * Unicode bidirectional constant LRE.
    395      *
    396      * @since 1.4
    397      */
    398     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
    399 
    400     /**
    401      * Unicode bidirectional constant LRO.
    402      *
    403      * @since 1.4
    404      */
    405     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
    406 
    407     /**
    408      * Unicode bidirectional constant RLE.
    409      *
    410      * @since 1.4
    411      */
    412     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
    413 
    414     /**
    415      * Unicode bidirectional constant RLO.
    416      *
    417      * @since 1.4
    418      */
    419     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
    420 
    421     /**
    422      * Unicode bidirectional constant PDF.
    423      *
    424      * @since 1.4
    425      */
    426     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
    427 
    428     /**
    429      * The minimum value of a high surrogate or leading surrogate unit in UTF-16
    430      * encoding, {@code '\uD800'}.
    431      *
    432      * @since 1.5
    433      */
    434     public static final char MIN_HIGH_SURROGATE = '\uD800';
    435 
    436     /**
    437      * The maximum value of a high surrogate or leading surrogate unit in UTF-16
    438      * encoding, {@code '\uDBFF'}.
    439      *
    440      * @since 1.5
    441      */
    442     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
    443 
    444     /**
    445      * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
    446      * encoding, {@code '\uDC00'}.
    447      *
    448      * @since 1.5
    449      */
    450     public static final char MIN_LOW_SURROGATE = '\uDC00';
    451 
    452     /**
    453      * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
    454      * encoding, {@code '\uDFFF'}.
    455      *
    456      * @since 1.5
    457      */
    458     public static final char MAX_LOW_SURROGATE = '\uDFFF';
    459 
    460     /**
    461      * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
    462      *
    463      * @since 1.5
    464      */
    465     public static final char MIN_SURROGATE = '\uD800';
    466 
    467     /**
    468      * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
    469      *
    470      * @since 1.5
    471      */
    472     public static final char MAX_SURROGATE = '\uDFFF';
    473 
    474     /**
    475      * The minimum value of a supplementary code point, {@code U+010000}.
    476      *
    477      * @since 1.5
    478      */
    479     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
    480 
    481     /**
    482      * The minimum code point value, {@code U+0000}.
    483      *
    484      * @since 1.5
    485      */
    486     public static final int MIN_CODE_POINT = 0x000000;
    487 
    488     /**
    489      * The maximum code point value, {@code U+10FFFF}.
    490      *
    491      * @since 1.5
    492      */
    493     public static final int MAX_CODE_POINT = 0x10FFFF;
    494 
    495     /**
    496      * The number of bits required to represent a {@code Character} value
    497      * unsigned form.
    498      *
    499      * @since 1.5
    500      */
    501     public static final int SIZE = 16;
    502 
    503     private static final byte[] DIRECTIONALITY = new byte[] {
    504             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
    505             DIRECTIONALITY_EUROPEAN_NUMBER,
    506             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
    507             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
    508             DIRECTIONALITY_ARABIC_NUMBER,
    509             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
    510             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
    511             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
    512             DIRECTIONALITY_OTHER_NEUTRALS,
    513             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
    514             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
    515             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
    516             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
    517             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
    518             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
    519             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
    520 
    521     /*
    522      * Represents a subset of the Unicode character set.
    523      */
    524     public static class Subset {
    525         String name;
    526 
    527         /**
    528          * Constructs a new {@code Subset}.
    529          *
    530          * @param string
    531          *            this subset's name.
    532          */
    533         protected Subset(String string) {
    534             if (string == null) {
    535                 throw new NullPointerException("string == null");
    536             }
    537             name = string;
    538         }
    539 
    540         /**
    541          * Compares this character subset with the specified object. Uses
    542          * {@link java.lang.Object#equals(Object)} to do the comparison.
    543          *
    544          * @param object
    545          *            the object to compare this character subset with.
    546          * @return {@code true} if {@code object} is this subset, that is, if
    547          *         {@code object == this}; {@code false} otherwise.
    548          */
    549         @Override
    550         public final boolean equals(Object object) {
    551             return super.equals(object);
    552         }
    553 
    554         /**
    555          * Returns the integer hash code for this character subset.
    556          *
    557          * @return this subset's hash code, which is the hash code computed by
    558          *         {@link java.lang.Object#hashCode()}.
    559          */
    560         @Override
    561         public final int hashCode() {
    562             return super.hashCode();
    563         }
    564 
    565         /**
    566          * Returns the string representation of this subset.
    567          *
    568          * @return this subset's name.
    569          */
    570         @Override
    571         public final String toString() {
    572             return name;
    573         }
    574     }
    575 
    576     /**
    577      * Represents a block of Unicode characters, as defined by the Unicode 4.0.1
    578      * specification.
    579      *
    580      * @since 1.2
    581      */
    582     public static final class UnicodeBlock extends Subset {
    583         /**
    584          * The &quot;Surrogates Area&quot; Unicode Block.
    585          *
    586          * @deprecated As of Java 5, this block has been replaced by
    587          *             {@link #HIGH_SURROGATES},
    588          *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
    589          *             {@link #LOW_SURROGATES}.
    590          */
    591         @Deprecated
    592         public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0);
    593         /**
    594          * The &quot;Basic Latin&quot; Unicode Block.
    595          *
    596          * @since 1.2
    597          */
    598         public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f);
    599         /**
    600          * The &quot;Latin-1 Supplement&quot; Unicode Block.
    601          *
    602          * @since 1.2
    603          */
    604         public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff);
    605         /**
    606          * The &quot;Latin Extended-A&quot; Unicode Block.
    607          *
    608          * @since 1.2
    609          */
    610         public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f);
    611         /**
    612          * The &quot;Latin Extended-B&quot; Unicode Block.
    613          *
    614          * @since 1.2
    615          */
    616         public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f);
    617         /**
    618          * The &quot;IPA Extensions&quot; Unicode Block.
    619          *
    620          * @since 1.2
    621          */
    622         public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af);
    623         /**
    624          * The &quot;Spacing Modifier Letters&quot; Unicode Block.
    625          *
    626          * @since 1.2
    627          */
    628         public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff);
    629         /**
    630          * The &quot;Combining Diacritical Marks&quot; Unicode Block.
    631          *
    632          * @since 1.2
    633          */
    634         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f);
    635         /**
    636          * The &quot;Greek and Coptic&quot; Unicode Block. Previously referred
    637          * to as &quot;Greek&quot;.
    638          *
    639          * @since 1.2
    640          */
    641         public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff);
    642         /**
    643          * The &quot;Cyrillic&quot; Unicode Block.
    644          *
    645          * @since 1.2
    646          */
    647         public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff);
    648         /**
    649          * The &quot;Cyrillic Supplement&quot; Unicode Block. Previously
    650          * referred to as &quot;Cyrillic Supplementary&quot;.
    651          *
    652          * @since 1.5
    653          */
    654         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f);
    655         /**
    656          * The &quot;Armenian&quot; Unicode Block.
    657          *
    658          * @since 1.2
    659          */
    660         public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f);
    661         /**
    662          * The &quot;Hebrew&quot; Unicode Block.
    663          *
    664          * @since 1.2
    665          */
    666         public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff);
    667         /**
    668          * The &quot;Arabic&quot; Unicode Block.
    669          *
    670          * @since 1.2
    671          */
    672         public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff);
    673         /**
    674          * The &quot;Syriac&quot; Unicode Block.
    675          *
    676          * @since 1.4
    677          */
    678         public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f);
    679         /**
    680          * The &quot;Thaana&quot; Unicode Block.
    681          *
    682          * @since 1.4
    683          */
    684         public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf);
    685         /**
    686          * The &quot;Devanagari&quot; Unicode Block.
    687          *
    688          * @since 1.2
    689          */
    690         public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f);
    691         /**
    692          * The &quot;Bengali&quot; Unicode Block.
    693          *
    694          * @since 1.2
    695          */
    696         public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff);
    697         /**
    698          * The &quot;Gurmukhi&quot; Unicode Block.
    699          *
    700          * @since 1.2
    701          */
    702         public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f);
    703         /**
    704          * The &quot;Gujarati&quot; Unicode Block.
    705          *
    706          * @since 1.2
    707          */
    708         public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff);
    709         /**
    710          * The &quot;Oriya&quot; Unicode Block.
    711          *
    712          * @since 1.2
    713          */
    714         public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f);
    715         /**
    716          * The &quot;Tamil&quot; Unicode Block.
    717          *
    718          * @since 1.2
    719          */
    720         public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff);
    721         /**
    722          * The &quot;Telugu&quot; Unicode Block.
    723          *
    724          * @since 1.2
    725          */
    726         public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f);
    727         /**
    728          * The &quot;Kannada&quot; Unicode Block.
    729          *
    730          * @since 1.2
    731          */
    732         public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff);
    733         /**
    734          * The &quot;Malayalam&quot; Unicode Block.
    735          *
    736          * @since 1.2
    737          */
    738         public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f);
    739         /**
    740          * The &quot;Sinhala&quot; Unicode Block.
    741          *
    742          * @since 1.4
    743          */
    744         public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff);
    745         /**
    746          * The &quot;Thai&quot; Unicode Block.
    747          *
    748          * @since 1.2
    749          */
    750         public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f);
    751         /**
    752          * The &quot;Lao&quot; Unicode Block.
    753          *
    754          * @since 1.2
    755          */
    756         public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff);
    757         /**
    758          * The &quot;Tibetan&quot; Unicode Block.
    759          *
    760          * @since 1.2
    761          */
    762         public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff);
    763         /**
    764          * The &quot;Myanmar&quot; Unicode Block.
    765          *
    766          * @since 1.4
    767          */
    768         public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f);
    769         /**
    770          * The &quot;Georgian&quot; Unicode Block.
    771          *
    772          * @since 1.2
    773          */
    774         public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff);
    775         /**
    776          * The &quot;Hangul Jamo&quot; Unicode Block.
    777          *
    778          * @since 1.2
    779          */
    780         public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff);
    781         /**
    782          * The &quot;Ethiopic&quot; Unicode Block.
    783          *
    784          * @since 1.4
    785          */
    786         public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f);
    787         /**
    788          * The &quot;Cherokee&quot; Unicode Block.
    789          *
    790          * @since 1.4
    791          */
    792         public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff);
    793         /**
    794          * The &quot;Unified Canadian Aboriginal Syllabics&quot; Unicode Block.
    795          *
    796          * @since 1.4
    797          */
    798         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f);
    799         /**
    800          * The &quot;Ogham&quot; Unicode Block.
    801          *
    802          * @since 1.4
    803          */
    804         public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f);
    805         /**
    806          * The &quot;Runic&quot; Unicode Block.
    807          *
    808          * @since 1.4
    809          */
    810         public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff);
    811         /**
    812          * The &quot;Tagalog&quot; Unicode Block.
    813          *
    814          * @since 1.5
    815          */
    816         public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f);
    817         /**
    818          * The &quot;Hanunoo&quot; Unicode Block.
    819          *
    820          * @since 1.5
    821          */
    822         public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f);
    823         /**
    824          * The &quot;Buhid&quot; Unicode Block.
    825          *
    826          * @since 1.5
    827          */
    828         public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f);
    829         /**
    830          * The &quot;Tagbanwa&quot; Unicode Block.
    831          *
    832          * @since 1.5
    833          */
    834         public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f);
    835         /**
    836          * The &quot;Khmer&quot; Unicode Block.
    837          *
    838          * @since 1.4
    839          */
    840         public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff);
    841         /**
    842          * The &quot;Mongolian&quot; Unicode Block.
    843          *
    844          * @since 1.4
    845          */
    846         public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af);
    847         /**
    848          * The &quot;Limbu&quot; Unicode Block.
    849          *
    850          * @since 1.5
    851          */
    852         public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f);
    853         /**
    854          * The &quot;Tai Le&quot; Unicode Block.
    855          *
    856          * @since 1.5
    857          */
    858         public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f);
    859         /**
    860          * The &quot;Khmer Symbols&quot; Unicode Block.
    861          *
    862          * @since 1.5
    863          */
    864         public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff);
    865         /**
    866          * The &quot;Phonetic Extensions&quot; Unicode Block.
    867          *
    868          * @since 1.5
    869          */
    870         public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f);
    871         /**
    872          * The &quot;Latin Extended Additional&quot; Unicode Block.
    873          *
    874          * @since 1.2
    875          */
    876         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff);
    877         /**
    878          * The &quot;Greek Extended&quot; Unicode Block.
    879          *
    880          * @since 1.2
    881          */
    882         public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff);
    883         /**
    884          * The &quot;General Punctuation&quot; Unicode Block.
    885          *
    886          * @since 1.2
    887          */
    888         public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f);
    889         /**
    890          * The &quot;Superscripts and Subscripts&quot; Unicode Block.
    891          *
    892          * @since 1.2
    893          */
    894         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f);
    895         /**
    896          * The &quot;Currency Symbols&quot; Unicode Block.
    897          *
    898          * @since 1.2
    899          */
    900         public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf);
    901         /**
    902          * The &quot;Combining Diacritical Marks for Symbols&quot; Unicode
    903          * Block. Previously referred to as &quot;Combining Marks for
    904          * Symbols&quot;.
    905          *
    906          * @since 1.2
    907          */
    908         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff);
    909         /**
    910          * The &quot;Letterlike Symbols&quot; Unicode Block.
    911          *
    912          * @since 1.2
    913          */
    914         public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f);
    915         /**
    916          * The &quot;Number Forms&quot; Unicode Block.
    917          *
    918          * @since 1.2
    919          */
    920         public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f);
    921         /**
    922          * The &quot;Arrows&quot; Unicode Block.
    923          *
    924          * @since 1.2
    925          */
    926         public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff);
    927         /**
    928          * The &quot;Mathematical Operators&quot; Unicode Block.
    929          *
    930          * @since 1.2
    931          */
    932         public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff);
    933         /**
    934          * The &quot;Miscellaneous Technical&quot; Unicode Block.
    935          *
    936          * @since 1.2
    937          */
    938         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff);
    939         /**
    940          * The &quot;Control Pictures&quot; Unicode Block.
    941          *
    942          * @since 1.2
    943          */
    944         public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f);
    945         /**
    946          * The &quot;Optical Character Recognition&quot; Unicode Block.
    947          *
    948          * @since 1.2
    949          */
    950         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f);
    951         /**
    952          * The &quot;Enclosed Alphanumerics&quot; Unicode Block.
    953          *
    954          * @since 1.2
    955          */
    956         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff);
    957         /**
    958          * The &quot;Box Drawing&quot; Unicode Block.
    959          *
    960          * @since 1.2
    961          */
    962         public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f);
    963         /**
    964          * The &quot;Block Elements&quot; Unicode Block.
    965          *
    966          * @since 1.2
    967          */
    968         public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f);
    969         /**
    970          * The &quot;Geometric Shapes&quot; Unicode Block.
    971          *
    972          * @since 1.2
    973          */
    974         public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff);
    975         /**
    976          * The &quot;Miscellaneous Symbols&quot; Unicode Block.
    977          *
    978          * @since 1.2
    979          */
    980         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff);
    981         /**
    982          * The &quot;Dingbats&quot; Unicode Block.
    983          *
    984          * @since 1.2
    985          */
    986         public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf);
    987         /**
    988          * The &quot;Miscellaneous Mathematical Symbols-A&quot; Unicode Block.
    989          *
    990          * @since 1.5
    991          */
    992         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef);
    993         /**
    994          * The &quot;Supplemental Arrows-A&quot; Unicode Block.
    995          *
    996          * @since 1.5
    997          */
    998         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff);
    999         /**
   1000          * The &quot;Braille Patterns&quot; Unicode Block.
   1001          *
   1002          * @since 1.4
   1003          */
   1004         public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff);
   1005         /**
   1006          * The &quot;Supplemental Arrows-B&quot; Unicode Block.
   1007          *
   1008          * @since 1.5
   1009          */
   1010         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f);
   1011         /**
   1012          * The &quot;Miscellaneous Mathematical Symbols-B&quot; Unicode Block.
   1013          *
   1014          * @since 1.5
   1015          */
   1016         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff);
   1017         /**
   1018          * The &quot;Supplemental Mathematical Operators&quot; Unicode Block.
   1019          *
   1020          * @since 1.5
   1021          */
   1022         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff);
   1023         /**
   1024          * The &quot;Miscellaneous Symbols and Arrows&quot; Unicode Block.
   1025          *
   1026          * @since 1.2
   1027          */
   1028         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff);
   1029         /**
   1030          * The &quot;CJK Radicals Supplement&quot; Unicode Block.
   1031          *
   1032          * @since 1.4
   1033          */
   1034         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff);
   1035         /**
   1036          * The &quot;Kangxi Radicals&quot; Unicode Block.
   1037          *
   1038          * @since 1.4
   1039          */
   1040         public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf);
   1041         /**
   1042          * The &quot;Ideographic Description Characters&quot; Unicode Block.
   1043          *
   1044          * @since 1.4
   1045          */
   1046         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff);
   1047         /**
   1048          * The &quot;CJK Symbols and Punctuation&quot; Unicode Block.
   1049          *
   1050          * @since 1.2
   1051          */
   1052         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f);
   1053         /**
   1054          * The &quot;Hiragana&quot; Unicode Block.
   1055          *
   1056          * @since 1.2
   1057          */
   1058         public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f);
   1059         /**
   1060          * The &quot;Katakana&quot; Unicode Block.
   1061          *
   1062          * @since 1.2
   1063          */
   1064         public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff);
   1065         /**
   1066          * The &quot;Bopomofo&quot; Unicode Block.
   1067          *
   1068          * @since 1.2
   1069          */
   1070         public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f);
   1071         /**
   1072          * The &quot;Hangul Compatibility Jamo&quot; Unicode Block.
   1073          *
   1074          * @since 1.2
   1075          */
   1076         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f);
   1077         /**
   1078          * The &quot;Kanbun&quot; Unicode Block.
   1079          *
   1080          * @since 1.2
   1081          */
   1082         public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f);
   1083         /**
   1084          * The &quot;Bopomofo Extended&quot; Unicode Block.
   1085          *
   1086          * @since 1.4
   1087          */
   1088         public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf);
   1089         /**
   1090          * The &quot;Katakana Phonetic Extensions&quot; Unicode Block.
   1091          *
   1092          * @since 1.5
   1093          */
   1094         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff);
   1095         /**
   1096          * The &quot;Enclosed CJK Letters and Months&quot; Unicode Block.
   1097          *
   1098          * @since 1.2
   1099          */
   1100         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff);
   1101         /**
   1102          * The &quot;CJK Compatibility&quot; Unicode Block.
   1103          *
   1104          * @since 1.2
   1105          */
   1106         public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff);
   1107         /**
   1108          * The &quot;CJK Unified Ideographs Extension A&quot; Unicode Block.
   1109          *
   1110          * @since 1.4
   1111          */
   1112         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf);
   1113         /**
   1114          * The &quot;Yijing Hexagram Symbols&quot; Unicode Block.
   1115          *
   1116          * @since 1.5
   1117          */
   1118         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff);
   1119         /**
   1120          * The &quot;CJK Unified Ideographs&quot; Unicode Block.
   1121          *
   1122          * @since 1.2
   1123          */
   1124         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff);
   1125         /**
   1126          * The &quot;Yi Syllables&quot; Unicode Block.
   1127          *
   1128          * @since 1.4
   1129          */
   1130         public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f);
   1131         /**
   1132          * The &quot;Yi Radicals&quot; Unicode Block.
   1133          *
   1134          * @since 1.4
   1135          */
   1136         public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf);
   1137         /**
   1138          * The &quot;Hangul Syllables&quot; Unicode Block.
   1139          *
   1140          * @since 1.2
   1141          */
   1142         public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af);
   1143         /**
   1144          * The &quot;High Surrogates&quot; Unicode Block. This block represents
   1145          * code point values in the high surrogate range 0xD800 to 0xDB7F
   1146          */
   1147         public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f);
   1148         /**
   1149          * The &quot;High Private Use Surrogates&quot; Unicode Block. This block
   1150          * represents code point values in the high surrogate range 0xDB80 to
   1151          * 0xDBFF
   1152          */
   1153         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff);
   1154         /**
   1155          * The &quot;Low Surrogates&quot; Unicode Block. This block represents
   1156          * code point values in the low surrogate range 0xDC00 to 0xDFFF
   1157          */
   1158         public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff);
   1159         /**
   1160          * The &quot;Private Use Area&quot; Unicode Block.
   1161          *
   1162          * @since 1.2
   1163          */
   1164         public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff);
   1165         /**
   1166          * The &quot;CJK Compatibility Ideographs&quot; Unicode Block.
   1167          *
   1168          * @since 1.2
   1169          */
   1170         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff);
   1171         /**
   1172          * The &quot;Alphabetic Presentation Forms&quot; Unicode Block.
   1173          *
   1174          * @since 1.2
   1175          */
   1176         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f);
   1177         /**
   1178          * The &quot;Arabic Presentation Forms-A&quot; Unicode Block.
   1179          *
   1180          * @since 1.2
   1181          */
   1182         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff);
   1183         /**
   1184          * The &quot;Variation Selectors&quot; Unicode Block.
   1185          *
   1186          * @since 1.5
   1187          */
   1188         public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f);
   1189         /**
   1190          * The &quot;Combining Half Marks&quot; Unicode Block.
   1191          *
   1192          * @since 1.2
   1193          */
   1194         public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f);
   1195         /**
   1196          * The &quot;CJK Compatibility Forms&quot; Unicode Block.
   1197          *
   1198          * @since 1.2
   1199          */
   1200         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f);
   1201         /**
   1202          * The &quot;Small Form Variants&quot; Unicode Block.
   1203          *
   1204          * @since 1.2
   1205          */
   1206         public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f);
   1207         /**
   1208          * The &quot;Arabic Presentation Forms-B&quot; Unicode Block.
   1209          *
   1210          * @since 1.2
   1211          */
   1212         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff);
   1213         /**
   1214          * The &quot;Halfwidth and Fullwidth Forms&quot; Unicode Block.
   1215          *
   1216          * @since 1.2
   1217          */
   1218         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef);
   1219         /**
   1220          * The &quot;Specials&quot; Unicode Block.
   1221          *
   1222          * @since 1.2
   1223          */
   1224         public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff);
   1225         /**
   1226          * The &quot;Linear B Syllabary&quot; Unicode Block.
   1227          *
   1228          * @since 1.2
   1229          */
   1230         public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f);
   1231         /**
   1232          * The &quot;Linear B Ideograms&quot; Unicode Block.
   1233          *
   1234          * @since 1.5
   1235          */
   1236         public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff);
   1237         /**
   1238          * The &quot;Aegean Numbers&quot; Unicode Block.
   1239          *
   1240          * @since 1.5
   1241          */
   1242         public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f);
   1243         /**
   1244          * The &quot;Old Italic&quot; Unicode Block.
   1245          *
   1246          * @since 1.5
   1247          */
   1248         public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f);
   1249         /**
   1250          * The &quot;Gothic&quot; Unicode Block.
   1251          *
   1252          * @since 1.5
   1253          */
   1254         public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f);
   1255         /**
   1256          * The &quot;Ugaritic&quot; Unicode Block.
   1257          *
   1258          * @since 1.5
   1259          */
   1260         public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f);
   1261         /**
   1262          * The &quot;Deseret&quot; Unicode Block.
   1263          *
   1264          * @since 1.5
   1265          */
   1266         public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f);
   1267         /**
   1268          * The &quot;Shavian&quot; Unicode Block.
   1269          *
   1270          * @since 1.5
   1271          */
   1272         public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f);
   1273         /**
   1274          * The &quot;Osmanya&quot; Unicode Block.
   1275          *
   1276          * @since 1.5
   1277          */
   1278         public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af);
   1279         /**
   1280          * The &quot;Cypriot Syllabary&quot; Unicode Block.
   1281          *
   1282          * @since 1.5
   1283          */
   1284         public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f);
   1285         /**
   1286          * The &quot;Byzantine Musical Symbols&quot; Unicode Block.
   1287          *
   1288          * @since 1.5
   1289          */
   1290         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff);
   1291         /**
   1292          * The &quot;Musical Symbols&quot; Unicode Block.
   1293          *
   1294          * @since 1.5
   1295          */
   1296         public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff);
   1297         /**
   1298          * The &quot;Tai Xuan Jing Symbols&quot; Unicode Block.
   1299          *
   1300          * @since 1.5
   1301          */
   1302         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f);
   1303         /**
   1304          * The &quot;Mathematical Alphanumeric Symbols&quot; Unicode Block.
   1305          *
   1306          * @since 1.5
   1307          */
   1308         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff);
   1309         /**
   1310          * The &quot;CJK Unified Ideographs Extension B&quot; Unicode Block.
   1311          *
   1312          * @since 1.5
   1313          */
   1314         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df);
   1315         /**
   1316          * The &quot;CJK Compatibility Ideographs Supplement&quot; Unicode Block.
   1317          *
   1318          * @since 1.5
   1319          */
   1320         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f);
   1321         /**
   1322          * The &quot;Tags&quot; Unicode Block.
   1323          *
   1324          * @since 1.5
   1325          */
   1326         public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f);
   1327         /**
   1328          * The &quot;Variation Selectors Supplement&quot; Unicode Block.
   1329          *
   1330          * @since 1.5
   1331          */
   1332         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef);
   1333         /**
   1334          * The &quot;Supplementary Private Use Area-A&quot; Unicode Block.
   1335          *
   1336          * @since 1.5
   1337          */
   1338         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff);
   1339         /**
   1340          * The &quot;Supplementary Private Use Area-B&quot; Unicode Block.
   1341          *
   1342          * @since 1.5
   1343          */
   1344         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff);
   1345 
   1346         /*
   1347          * All of the UnicodeBlocks with valid ranges in ascending order.
   1348          */
   1349         private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] {
   1350             null,
   1351             UnicodeBlock.BASIC_LATIN,
   1352             UnicodeBlock.LATIN_1_SUPPLEMENT,
   1353             UnicodeBlock.LATIN_EXTENDED_A,
   1354             UnicodeBlock.LATIN_EXTENDED_B,
   1355             UnicodeBlock.IPA_EXTENSIONS,
   1356             UnicodeBlock.SPACING_MODIFIER_LETTERS,
   1357             UnicodeBlock.COMBINING_DIACRITICAL_MARKS,
   1358             UnicodeBlock.GREEK,
   1359             UnicodeBlock.CYRILLIC,
   1360             UnicodeBlock.ARMENIAN,
   1361             UnicodeBlock.HEBREW,
   1362             UnicodeBlock.ARABIC,
   1363             UnicodeBlock.SYRIAC,
   1364             UnicodeBlock.THAANA,
   1365             UnicodeBlock.DEVANAGARI,
   1366             UnicodeBlock.BENGALI,
   1367             UnicodeBlock.GURMUKHI,
   1368             UnicodeBlock.GUJARATI,
   1369             UnicodeBlock.ORIYA,
   1370             UnicodeBlock.TAMIL,
   1371             UnicodeBlock.TELUGU,
   1372             UnicodeBlock.KANNADA,
   1373             UnicodeBlock.MALAYALAM,
   1374             UnicodeBlock.SINHALA,
   1375             UnicodeBlock.THAI,
   1376             UnicodeBlock.LAO,
   1377             UnicodeBlock.TIBETAN,
   1378             UnicodeBlock.MYANMAR,
   1379             UnicodeBlock.GEORGIAN,
   1380             UnicodeBlock.HANGUL_JAMO,
   1381             UnicodeBlock.ETHIOPIC,
   1382             UnicodeBlock.CHEROKEE,
   1383             UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
   1384             UnicodeBlock.OGHAM,
   1385             UnicodeBlock.RUNIC,
   1386             UnicodeBlock.KHMER,
   1387             UnicodeBlock.MONGOLIAN,
   1388             UnicodeBlock.LATIN_EXTENDED_ADDITIONAL,
   1389             UnicodeBlock.GREEK_EXTENDED,
   1390             UnicodeBlock.GENERAL_PUNCTUATION,
   1391             UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS,
   1392             UnicodeBlock.CURRENCY_SYMBOLS,
   1393             UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS,
   1394             UnicodeBlock.LETTERLIKE_SYMBOLS,
   1395             UnicodeBlock.NUMBER_FORMS,
   1396             UnicodeBlock.ARROWS,
   1397             UnicodeBlock.MATHEMATICAL_OPERATORS,
   1398             UnicodeBlock.MISCELLANEOUS_TECHNICAL,
   1399             UnicodeBlock.CONTROL_PICTURES,
   1400             UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION,
   1401             UnicodeBlock.ENCLOSED_ALPHANUMERICS,
   1402             UnicodeBlock.BOX_DRAWING,
   1403             UnicodeBlock.BLOCK_ELEMENTS,
   1404             UnicodeBlock.GEOMETRIC_SHAPES,
   1405             UnicodeBlock.MISCELLANEOUS_SYMBOLS,
   1406             UnicodeBlock.DINGBATS,
   1407             UnicodeBlock.BRAILLE_PATTERNS,
   1408             UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
   1409             UnicodeBlock.KANGXI_RADICALS,
   1410             UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
   1411             UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
   1412             UnicodeBlock.HIRAGANA,
   1413             UnicodeBlock.KATAKANA,
   1414             UnicodeBlock.BOPOMOFO,
   1415             UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
   1416             UnicodeBlock.KANBUN,
   1417             UnicodeBlock.BOPOMOFO_EXTENDED,
   1418             UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS,
   1419             UnicodeBlock.CJK_COMPATIBILITY,
   1420             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
   1421             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
   1422             UnicodeBlock.YI_SYLLABLES,
   1423             UnicodeBlock.YI_RADICALS,
   1424             UnicodeBlock.HANGUL_SYLLABLES,
   1425             UnicodeBlock.HIGH_SURROGATES,
   1426             UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES,
   1427             UnicodeBlock.LOW_SURROGATES,
   1428             UnicodeBlock.PRIVATE_USE_AREA,
   1429             UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
   1430             UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS,
   1431             UnicodeBlock.ARABIC_PRESENTATION_FORMS_A,
   1432             UnicodeBlock.COMBINING_HALF_MARKS,
   1433             UnicodeBlock.CJK_COMPATIBILITY_FORMS,
   1434             UnicodeBlock.SMALL_FORM_VARIANTS,
   1435             UnicodeBlock.ARABIC_PRESENTATION_FORMS_B,
   1436             UnicodeBlock.SPECIALS,
   1437             UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
   1438             UnicodeBlock.OLD_ITALIC,
   1439             UnicodeBlock.GOTHIC,
   1440             UnicodeBlock.DESERET,
   1441             UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS,
   1442             UnicodeBlock.MUSICAL_SYMBOLS,
   1443             UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
   1444             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
   1445             UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
   1446             UnicodeBlock.TAGS,
   1447             UnicodeBlock.CYRILLIC_SUPPLEMENTARY,
   1448             UnicodeBlock.TAGALOG,
   1449             UnicodeBlock.HANUNOO,
   1450             UnicodeBlock.BUHID,
   1451             UnicodeBlock.TAGBANWA,
   1452             UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
   1453             UnicodeBlock.SUPPLEMENTAL_ARROWS_A,
   1454             UnicodeBlock.SUPPLEMENTAL_ARROWS_B,
   1455             UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
   1456             UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
   1457             UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
   1458             UnicodeBlock.VARIATION_SELECTORS,
   1459             UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
   1460             UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B,
   1461             UnicodeBlock.LIMBU,
   1462             UnicodeBlock.TAI_LE,
   1463             UnicodeBlock.KHMER_SYMBOLS,
   1464             UnicodeBlock.PHONETIC_EXTENSIONS,
   1465             UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS,
   1466             UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS,
   1467             UnicodeBlock.LINEAR_B_SYLLABARY,
   1468             UnicodeBlock.LINEAR_B_IDEOGRAMS,
   1469             UnicodeBlock.AEGEAN_NUMBERS,
   1470             UnicodeBlock.UGARITIC,
   1471             UnicodeBlock.SHAVIAN,
   1472             UnicodeBlock.OSMANYA,
   1473             UnicodeBlock.CYPRIOT_SYLLABARY,
   1474             UnicodeBlock.TAI_XUAN_JING_SYMBOLS,
   1475             UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT
   1476         };
   1477 
   1478         /**
   1479          * Retrieves the constant that corresponds to the specified block name.
   1480          * The block names are defined by the Unicode 4.0.1 specification in the
   1481          * {@code Blocks-4.0.1.txt} file.
   1482          * <p>
   1483          * Block names may be one of the following:
   1484          * <ul>
   1485          * <li>Canonical block name, as defined by the Unicode specification;
   1486          * case-insensitive.</li>
   1487          * <li>Canonical block name without any spaces, as defined by the
   1488          * Unicode specification; case-insensitive.</li>
   1489          * <li>{@code UnicodeBlock} constant identifier. This is determined by
   1490          * uppercasing the canonical name and replacing all spaces and hyphens
   1491          * with underscores.</li>
   1492          * </ul>
   1493          *
   1494          * @param blockName
   1495          *            the name of the block to retrieve.
   1496          * @return the UnicodeBlock constant corresponding to {@code blockName}.
   1497          * @throws NullPointerException
   1498          *             if {@code blockName} is {@code null}.
   1499          * @throws IllegalArgumentException
   1500          *             if {@code blockName} is not a valid block name.
   1501          * @since 1.5
   1502          */
   1503         public static UnicodeBlock forName(String blockName) {
   1504             if (blockName == null) {
   1505                 throw new NullPointerException("blockName == null");
   1506             }
   1507             int block = forNameImpl(blockName);
   1508             if (block == -1) {
   1509                 if (blockName.equals("SURROGATES_AREA")) {
   1510                     return SURROGATES_AREA;
   1511                 } else if(blockName.equalsIgnoreCase("greek")) {
   1512                     return GREEK;
   1513                 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") ||
   1514                         blockName.equals("Combining Marks for Symbols") ||
   1515                         blockName.equals("CombiningMarksforSymbols")) {
   1516                     return COMBINING_MARKS_FOR_SYMBOLS;
   1517                 }
   1518                 throw new IllegalArgumentException("Bad block name: " + blockName);
   1519             }
   1520             return BLOCKS[block];
   1521         }
   1522 
   1523         /**
   1524          * Gets the constant for the Unicode block that contains the specified
   1525          * character.
   1526          *
   1527          * @param c
   1528          *            the character for which to get the {@code UnicodeBlock}
   1529          *            constant.
   1530          * @return the {@code UnicodeBlock} constant for the block that contains
   1531          *         {@code c}, or {@code null} if {@code c} does not belong to
   1532          *         any defined block.
   1533          */
   1534         public static UnicodeBlock of(char c) {
   1535             return of((int) c);
   1536         }
   1537 
   1538         /**
   1539          * Gets the constant for the Unicode block that contains the specified
   1540          * Unicode code point.
   1541          *
   1542          * @param codePoint
   1543          *            the Unicode code point for which to get the
   1544          *            {@code UnicodeBlock} constant.
   1545          * @return the {@code UnicodeBlock} constant for the block that contains
   1546          *         {@code codePoint}, or {@code null} if {@code codePoint} does
   1547          *         not belong to any defined block.
   1548          * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
   1549          * @since 1.5
   1550          */
   1551         public static UnicodeBlock of(int codePoint) {
   1552             checkValidCodePoint(codePoint);
   1553             int block = ofImpl(codePoint);
   1554             if (block == -1 || block >= BLOCKS.length) {
   1555                 return null;
   1556             }
   1557             return BLOCKS[block];
   1558         }
   1559 
   1560         private UnicodeBlock(String blockName, int start, int end) {
   1561             super(blockName);
   1562         }
   1563     }
   1564 
   1565     private static native int forNameImpl(String blockName);
   1566 
   1567     private static native int ofImpl(int codePoint);
   1568 
   1569     /**
   1570      * Constructs a new {@code Character} with the specified primitive char
   1571      * value.
   1572      *
   1573      * @param value
   1574      *            the primitive char value to store in the new instance.
   1575      */
   1576     public Character(char value) {
   1577         this.value = value;
   1578     }
   1579 
   1580     /**
   1581      * Gets the primitive value of this character.
   1582      *
   1583      * @return this object's primitive value.
   1584      */
   1585     public char charValue() {
   1586         return value;
   1587     }
   1588 
   1589     private static void checkValidCodePoint(int codePoint) {
   1590         if (!isValidCodePoint(codePoint)) {
   1591             throw new IllegalArgumentException("Invalid code point: " + codePoint);
   1592         }
   1593     }
   1594 
   1595     /**
   1596      * Compares this object to the specified character object to determine their
   1597      * relative order.
   1598      *
   1599      * @param c
   1600      *            the character object to compare this object to.
   1601      * @return {@code 0} if the value of this character and the value of
   1602      *         {@code c} are equal; a positive value if the value of this
   1603      *         character is greater than the value of {@code c}; a negative
   1604      *         value if the value of this character is less than the value of
   1605      *         {@code c}.
   1606      * @see java.lang.Comparable
   1607      * @since 1.2
   1608      */
   1609     public int compareTo(Character c) {
   1610         return compare(value, c.value);
   1611     }
   1612 
   1613     /**
   1614      * Compares two {@code char} values.
   1615      * @return 0 if lhs = rhs, less than 0 if lhs &lt; rhs, and greater than 0 if lhs &gt; rhs.
   1616      * @since 1.7
   1617      * @hide 1.7
   1618      */
   1619     public static int compare(char lhs, char rhs) {
   1620         return lhs - rhs;
   1621     }
   1622 
   1623     /**
   1624      * Returns a {@code Character} instance for the {@code char} value passed.
   1625      * <p>
   1626      * If it is not necessary to get a new {@code Character} instance, it is
   1627      * recommended to use this method instead of the constructor, since it
   1628      * maintains a cache of instances which may result in better performance.
   1629      *
   1630      * @param c
   1631      *            the char value for which to get a {@code Character} instance.
   1632      * @return the {@code Character} instance for {@code c}.
   1633      * @since 1.5
   1634      */
   1635     public static Character valueOf(char c) {
   1636         return c < 128 ? SMALL_VALUES[c] : new Character(c);
   1637     }
   1638 
   1639     /**
   1640      * A cache of instances used by {@link #valueOf(char)} and auto-boxing
   1641      */
   1642     private static final Character[] SMALL_VALUES = new Character[128];
   1643 
   1644     static {
   1645         for (int i = 0; i < 128; i++) {
   1646             SMALL_VALUES[i] = new Character((char) i);
   1647         }
   1648     }
   1649     /**
   1650      * Indicates whether {@code codePoint} is a valid Unicode code point.
   1651      *
   1652      * @param codePoint
   1653      *            the code point to test.
   1654      * @return {@code true} if {@code codePoint} is a valid Unicode code point;
   1655      *         {@code false} otherwise.
   1656      * @since 1.5
   1657      */
   1658     public static boolean isValidCodePoint(int codePoint) {
   1659         return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
   1660     }
   1661 
   1662     /**
   1663      * Indicates whether {@code codePoint} is within the supplementary code
   1664      * point range.
   1665      *
   1666      * @param codePoint
   1667      *            the code point to test.
   1668      * @return {@code true} if {@code codePoint} is within the supplementary
   1669      *         code point range; {@code false} otherwise.
   1670      * @since 1.5
   1671      */
   1672     public static boolean isSupplementaryCodePoint(int codePoint) {
   1673         return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
   1674     }
   1675 
   1676     /**
   1677      * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
   1678      * that is used for representing supplementary characters in UTF-16
   1679      * encoding.
   1680      *
   1681      * @param ch
   1682      *            the character to test.
   1683      * @return {@code true} if {@code ch} is a high-surrogate code unit;
   1684      *         {@code false} otherwise.
   1685      * @see #isLowSurrogate(char)
   1686      * @since 1.5
   1687      */
   1688     public static boolean isHighSurrogate(char ch) {
   1689         return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
   1690     }
   1691 
   1692     /**
   1693      * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
   1694      * that is used for representing supplementary characters in UTF-16
   1695      * encoding.
   1696      *
   1697      * @param ch
   1698      *            the character to test.
   1699      * @return {@code true} if {@code ch} is a low-surrogate code unit;
   1700      *         {@code false} otherwise.
   1701      * @see #isHighSurrogate(char)
   1702      * @since 1.5
   1703      */
   1704     public static boolean isLowSurrogate(char ch) {
   1705         return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
   1706     }
   1707 
   1708     /**
   1709      * Tests whether the given character is a high or low surrogate.
   1710      * @since 1.7
   1711      * @hide 1.7
   1712      */
   1713     public static boolean isSurrogate(char ch) {
   1714         return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
   1715     }
   1716 
   1717     /**
   1718      * Indicates whether the specified character pair is a valid surrogate pair.
   1719      *
   1720      * @param high
   1721      *            the high surrogate unit to test.
   1722      * @param low
   1723      *            the low surrogate unit to test.
   1724      * @return {@code true} if {@code high} is a high-surrogate code unit and
   1725      *         {@code low} is a low-surrogate code unit; {@code false}
   1726      *         otherwise.
   1727      * @see #isHighSurrogate(char)
   1728      * @see #isLowSurrogate(char)
   1729      * @since 1.5
   1730      */
   1731     public static boolean isSurrogatePair(char high, char low) {
   1732         return (isHighSurrogate(high) && isLowSurrogate(low));
   1733     }
   1734 
   1735     /**
   1736      * Calculates the number of {@code char} values required to represent the
   1737      * specified Unicode code point. This method checks if the {@code codePoint}
   1738      * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
   1739      * returned, otherwise {@code 1}. To test if the code point is valid, use
   1740      * the {@link #isValidCodePoint(int)} method.
   1741      *
   1742      * @param codePoint
   1743      *            the code point for which to calculate the number of required
   1744      *            chars.
   1745      * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
   1746      * @see #isValidCodePoint(int)
   1747      * @see #isSupplementaryCodePoint(int)
   1748      * @since 1.5
   1749      */
   1750     public static int charCount(int codePoint) {
   1751         return (codePoint >= 0x10000 ? 2 : 1);
   1752     }
   1753 
   1754     /**
   1755      * Converts a surrogate pair into a Unicode code point. This method assumes
   1756      * that the pair are valid surrogates. If the pair are <i>not</i> valid
   1757      * surrogates, then the result is indeterminate. The
   1758      * {@link #isSurrogatePair(char, char)} method should be used prior to this
   1759      * method to validate the pair.
   1760      *
   1761      * @param high
   1762      *            the high surrogate unit.
   1763      * @param low
   1764      *            the low surrogate unit.
   1765      * @return the Unicode code point corresponding to the surrogate unit pair.
   1766      * @see #isSurrogatePair(char, char)
   1767      * @since 1.5
   1768      */
   1769     public static int toCodePoint(char high, char low) {
   1770         // See RFC 2781, Section 2.2
   1771         // http://www.ietf.org/rfc/rfc2781.txt
   1772         int h = (high & 0x3FF) << 10;
   1773         int l = low & 0x3FF;
   1774         return (h | l) + 0x10000;
   1775     }
   1776 
   1777     /**
   1778      * Returns the code point at {@code index} in the specified sequence of
   1779      * character units. If the unit at {@code index} is a high-surrogate unit,
   1780      * {@code index + 1} is less than the length of the sequence and the unit at
   1781      * {@code index + 1} is a low-surrogate unit, then the supplementary code
   1782      * point represented by the pair is returned; otherwise the {@code char}
   1783      * value at {@code index} is returned.
   1784      *
   1785      * @param seq
   1786      *            the source sequence of {@code char} units.
   1787      * @param index
   1788      *            the position in {@code seq} from which to retrieve the code
   1789      *            point.
   1790      * @return the Unicode code point or {@code char} value at {@code index} in
   1791      *         {@code seq}.
   1792      * @throws NullPointerException
   1793      *             if {@code seq} is {@code null}.
   1794      * @throws IndexOutOfBoundsException
   1795      *             if the {@code index} is negative or greater than or equal to
   1796      *             the length of {@code seq}.
   1797      * @since 1.5
   1798      */
   1799     public static int codePointAt(CharSequence seq, int index) {
   1800         if (seq == null) {
   1801             throw new NullPointerException("seq == null");
   1802         }
   1803         int len = seq.length();
   1804         if (index < 0 || index >= len) {
   1805             throw new IndexOutOfBoundsException();
   1806         }
   1807 
   1808         char high = seq.charAt(index++);
   1809         if (index >= len) {
   1810             return high;
   1811         }
   1812         char low = seq.charAt(index);
   1813         if (isSurrogatePair(high, low)) {
   1814             return toCodePoint(high, low);
   1815         }
   1816         return high;
   1817     }
   1818 
   1819     /**
   1820      * Returns the code point at {@code index} in the specified array of
   1821      * character units. If the unit at {@code index} is a high-surrogate unit,
   1822      * {@code index + 1} is less than the length of the array and the unit at
   1823      * {@code index + 1} is a low-surrogate unit, then the supplementary code
   1824      * point represented by the pair is returned; otherwise the {@code char}
   1825      * value at {@code index} is returned.
   1826      *
   1827      * @param seq
   1828      *            the source array of {@code char} units.
   1829      * @param index
   1830      *            the position in {@code seq} from which to retrieve the code
   1831      *            point.
   1832      * @return the Unicode code point or {@code char} value at {@code index} in
   1833      *         {@code seq}.
   1834      * @throws NullPointerException
   1835      *             if {@code seq} is {@code null}.
   1836      * @throws IndexOutOfBoundsException
   1837      *             if the {@code index} is negative or greater than or equal to
   1838      *             the length of {@code seq}.
   1839      * @since 1.5
   1840      */
   1841     public static int codePointAt(char[] seq, int index) {
   1842         if (seq == null) {
   1843             throw new NullPointerException("seq == null");
   1844         }
   1845         int len = seq.length;
   1846         if (index < 0 || index >= len) {
   1847             throw new IndexOutOfBoundsException();
   1848         }
   1849 
   1850         char high = seq[index++];
   1851         if (index >= len) {
   1852             return high;
   1853         }
   1854         char low = seq[index];
   1855         if (isSurrogatePair(high, low)) {
   1856             return toCodePoint(high, low);
   1857         }
   1858         return high;
   1859     }
   1860 
   1861     /**
   1862      * Returns the code point at {@code index} in the specified array of
   1863      * character units, where {@code index} has to be less than {@code limit}.
   1864      * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
   1865      * is less than {@code limit} and the unit at {@code index + 1} is a
   1866      * low-surrogate unit, then the supplementary code point represented by the
   1867      * pair is returned; otherwise the {@code char} value at {@code index} is
   1868      * returned.
   1869      *
   1870      * @param seq
   1871      *            the source array of {@code char} units.
   1872      * @param index
   1873      *            the position in {@code seq} from which to get the code point.
   1874      * @param limit
   1875      *            the index after the last unit in {@code seq} that can be used.
   1876      * @return the Unicode code point or {@code char} value at {@code index} in
   1877      *         {@code seq}.
   1878      * @throws NullPointerException
   1879      *             if {@code seq} is {@code null}.
   1880      * @throws IndexOutOfBoundsException
   1881      *             if {@code index < 0}, {@code index >= limit},
   1882      *             {@code limit < 0} or if {@code limit} is greater than the
   1883      *             length of {@code seq}.
   1884      * @since 1.5
   1885      */
   1886     public static int codePointAt(char[] seq, int index, int limit) {
   1887         if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
   1888             throw new IndexOutOfBoundsException();
   1889         }
   1890 
   1891         char high = seq[index++];
   1892         if (index >= limit) {
   1893             return high;
   1894         }
   1895         char low = seq[index];
   1896         if (isSurrogatePair(high, low)) {
   1897             return toCodePoint(high, low);
   1898         }
   1899         return high;
   1900     }
   1901 
   1902     /**
   1903      * Returns the code point that precedes {@code index} in the specified
   1904      * sequence of character units. If the unit at {@code index - 1} is a
   1905      * low-surrogate unit, {@code index - 2} is not negative and the unit at
   1906      * {@code index - 2} is a high-surrogate unit, then the supplementary code
   1907      * point represented by the pair is returned; otherwise the {@code char}
   1908      * value at {@code index - 1} is returned.
   1909      *
   1910      * @param seq
   1911      *            the source sequence of {@code char} units.
   1912      * @param index
   1913      *            the position in {@code seq} following the code
   1914      *            point that should be returned.
   1915      * @return the Unicode code point or {@code char} value before {@code index}
   1916      *         in {@code seq}.
   1917      * @throws NullPointerException
   1918      *             if {@code seq} is {@code null}.
   1919      * @throws IndexOutOfBoundsException
   1920      *             if the {@code index} is less than 1 or greater than the
   1921      *             length of {@code seq}.
   1922      * @since 1.5
   1923      */
   1924     public static int codePointBefore(CharSequence seq, int index) {
   1925         if (seq == null) {
   1926             throw new NullPointerException("seq == null");
   1927         }
   1928         int len = seq.length();
   1929         if (index < 1 || index > len) {
   1930             throw new IndexOutOfBoundsException();
   1931         }
   1932 
   1933         char low = seq.charAt(--index);
   1934         if (--index < 0) {
   1935             return low;
   1936         }
   1937         char high = seq.charAt(index);
   1938         if (isSurrogatePair(high, low)) {
   1939             return toCodePoint(high, low);
   1940         }
   1941         return low;
   1942     }
   1943 
   1944     /**
   1945      * Returns the code point that precedes {@code index} in the specified
   1946      * array of character units. If the unit at {@code index - 1} is a
   1947      * low-surrogate unit, {@code index - 2} is not negative and the unit at
   1948      * {@code index - 2} is a high-surrogate unit, then the supplementary code
   1949      * point represented by the pair is returned; otherwise the {@code char}
   1950      * value at {@code index - 1} is returned.
   1951      *
   1952      * @param seq
   1953      *            the source array of {@code char} units.
   1954      * @param index
   1955      *            the position in {@code seq} following the code
   1956      *            point that should be returned.
   1957      * @return the Unicode code point or {@code char} value before {@code index}
   1958      *         in {@code seq}.
   1959      * @throws NullPointerException
   1960      *             if {@code seq} is {@code null}.
   1961      * @throws IndexOutOfBoundsException
   1962      *             if the {@code index} is less than 1 or greater than the
   1963      *             length of {@code seq}.
   1964      * @since 1.5
   1965      */
   1966     public static int codePointBefore(char[] seq, int index) {
   1967         if (seq == null) {
   1968             throw new NullPointerException("seq == null");
   1969         }
   1970         int len = seq.length;
   1971         if (index < 1 || index > len) {
   1972             throw new IndexOutOfBoundsException();
   1973         }
   1974 
   1975         char low = seq[--index];
   1976         if (--index < 0) {
   1977             return low;
   1978         }
   1979         char high = seq[index];
   1980         if (isSurrogatePair(high, low)) {
   1981             return toCodePoint(high, low);
   1982         }
   1983         return low;
   1984     }
   1985 
   1986     /**
   1987      * Returns the code point that precedes the {@code index} in the specified
   1988      * array of character units and is not less than {@code start}. If the unit
   1989      * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
   1990      * less than {@code start} and the unit at {@code index - 2} is a
   1991      * high-surrogate unit, then the supplementary code point represented by the
   1992      * pair is returned; otherwise the {@code char} value at {@code index - 1}
   1993      * is returned.
   1994      *
   1995      * @param seq
   1996      *            the source array of {@code char} units.
   1997      * @param index
   1998      *            the position in {@code seq} following the code point that
   1999      *            should be returned.
   2000      * @param start
   2001      *            the index of the first element in {@code seq}.
   2002      * @return the Unicode code point or {@code char} value before {@code index}
   2003      *         in {@code seq}.
   2004      * @throws NullPointerException
   2005      *             if {@code seq} is {@code null}.
   2006      * @throws IndexOutOfBoundsException
   2007      *             if the {@code index <= start}, {@code start < 0},
   2008      *             {@code index} is greater than the length of {@code seq}, or
   2009      *             if {@code start} is equal or greater than the length of
   2010      *             {@code seq}.
   2011      * @since 1.5
   2012      */
   2013     public static int codePointBefore(char[] seq, int index, int start) {
   2014         if (seq == null) {
   2015             throw new NullPointerException("seq == null");
   2016         }
   2017         int len = seq.length;
   2018         if (index <= start || index > len || start < 0 || start >= len) {
   2019             throw new IndexOutOfBoundsException();
   2020         }
   2021 
   2022         char low = seq[--index];
   2023         if (--index < start) {
   2024             return low;
   2025         }
   2026         char high = seq[index];
   2027         if (isSurrogatePair(high, low)) {
   2028             return toCodePoint(high, low);
   2029         }
   2030         return low;
   2031     }
   2032 
   2033     /**
   2034      * Converts the specified Unicode code point into a UTF-16 encoded sequence
   2035      * and copies the value(s) into the char array {@code dst}, starting at
   2036      * index {@code dstIndex}.
   2037      *
   2038      * @param codePoint
   2039      *            the Unicode code point to encode.
   2040      * @param dst
   2041      *            the destination array to copy the encoded value into.
   2042      * @param dstIndex
   2043      *            the index in {@code dst} from where to start copying.
   2044      * @return the number of {@code char} value units copied into {@code dst}.
   2045      * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
   2046      * @throws NullPointerException
   2047      *             if {@code dst} is {@code null}.
   2048      * @throws IndexOutOfBoundsException
   2049      *             if {@code dstIndex} is negative, greater than or equal to
   2050      *             {@code dst.length} or equals {@code dst.length - 1} when
   2051      *             {@code codePoint} is a
   2052      *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
   2053      * @since 1.5
   2054      */
   2055     public static int toChars(int codePoint, char[] dst, int dstIndex) {
   2056         checkValidCodePoint(codePoint);
   2057         if (dst == null) {
   2058             throw new NullPointerException("dst == null");
   2059         }
   2060         if (dstIndex < 0 || dstIndex >= dst.length) {
   2061             throw new IndexOutOfBoundsException();
   2062         }
   2063 
   2064         if (isSupplementaryCodePoint(codePoint)) {
   2065             if (dstIndex == dst.length - 1) {
   2066                 throw new IndexOutOfBoundsException();
   2067             }
   2068             // See RFC 2781, Section 2.1
   2069             // http://www.ietf.org/rfc/rfc2781.txt
   2070             int cpPrime = codePoint - 0x10000;
   2071             int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
   2072             int low = 0xDC00 | (cpPrime & 0x3FF);
   2073             dst[dstIndex] = (char) high;
   2074             dst[dstIndex + 1] = (char) low;
   2075             return 2;
   2076         }
   2077 
   2078         dst[dstIndex] = (char) codePoint;
   2079         return 1;
   2080     }
   2081 
   2082     /**
   2083      * Converts the specified Unicode code point into a UTF-16 encoded sequence
   2084      * and returns it as a char array.
   2085      *
   2086      * @param codePoint
   2087      *            the Unicode code point to encode.
   2088      * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
   2089      *         {@link #isSupplementaryCodePoint(int) supplementary code point},
   2090      *         then the returned array contains two characters, otherwise it
   2091      *         contains just one character.
   2092      * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
   2093      * @since 1.5
   2094      */
   2095     public static char[] toChars(int codePoint) {
   2096         checkValidCodePoint(codePoint);
   2097         if (isSupplementaryCodePoint(codePoint)) {
   2098             int cpPrime = codePoint - 0x10000;
   2099             int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
   2100             int low = 0xDC00 | (cpPrime & 0x3FF);
   2101             return new char[] { (char) high, (char) low };
   2102         }
   2103         return new char[] { (char) codePoint };
   2104     }
   2105 
   2106     /**
   2107      * Counts the number of Unicode code points in the subsequence of the
   2108      * specified character sequence, as delineated by {@code beginIndex} and
   2109      * {@code endIndex}. Any surrogate values with missing pair values will be
   2110      * counted as one code point.
   2111      *
   2112      * @param seq
   2113      *            the {@code CharSequence} to look through.
   2114      * @param beginIndex
   2115      *            the inclusive index to begin counting at.
   2116      * @param endIndex
   2117      *            the exclusive index to stop counting at.
   2118      * @return the number of Unicode code points.
   2119      * @throws NullPointerException
   2120      *             if {@code seq} is {@code null}.
   2121      * @throws IndexOutOfBoundsException
   2122      *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
   2123      *             if {@code endIndex} is greater than the length of {@code seq}.
   2124      * @since 1.5
   2125      */
   2126     public static int codePointCount(CharSequence seq, int beginIndex,
   2127             int endIndex) {
   2128         if (seq == null) {
   2129             throw new NullPointerException("seq == null");
   2130         }
   2131         int len = seq.length();
   2132         if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
   2133             throw new IndexOutOfBoundsException();
   2134         }
   2135 
   2136         int result = 0;
   2137         for (int i = beginIndex; i < endIndex; i++) {
   2138             char c = seq.charAt(i);
   2139             if (isHighSurrogate(c)) {
   2140                 if (++i < endIndex) {
   2141                     c = seq.charAt(i);
   2142                     if (!isLowSurrogate(c)) {
   2143                         result++;
   2144                     }
   2145                 }
   2146             }
   2147             result++;
   2148         }
   2149         return result;
   2150     }
   2151 
   2152     /**
   2153      * Counts the number of Unicode code points in the subsequence of the
   2154      * specified char array, as delineated by {@code offset} and {@code count}.
   2155      * Any surrogate values with missing pair values will be counted as one code
   2156      * point.
   2157      *
   2158      * @param seq
   2159      *            the char array to look through
   2160      * @param offset
   2161      *            the inclusive index to begin counting at.
   2162      * @param count
   2163      *            the number of {@code char} values to look through in
   2164      *            {@code seq}.
   2165      * @return the number of Unicode code points.
   2166      * @throws NullPointerException
   2167      *             if {@code seq} is {@code null}.
   2168      * @throws IndexOutOfBoundsException
   2169      *             if {@code offset < 0}, {@code count < 0} or if
   2170      *             {@code offset + count} is greater than the length of
   2171      *             {@code seq}.
   2172      * @since 1.5
   2173      */
   2174     public static int codePointCount(char[] seq, int offset, int count) {
   2175         Arrays.checkOffsetAndCount(seq.length, offset, count);
   2176         int endIndex = offset + count;
   2177         int result = 0;
   2178         for (int i = offset; i < endIndex; i++) {
   2179             char c = seq[i];
   2180             if (isHighSurrogate(c)) {
   2181                 if (++i < endIndex) {
   2182                     c = seq[i];
   2183                     if (!isLowSurrogate(c)) {
   2184                         result++;
   2185                     }
   2186                 }
   2187             }
   2188             result++;
   2189         }
   2190         return result;
   2191     }
   2192 
   2193     /**
   2194      * Determines the index in the specified character sequence that is offset
   2195      * {@code codePointOffset} code points from {@code index}.
   2196      *
   2197      * @param seq
   2198      *            the character sequence to find the index in.
   2199      * @param index
   2200      *            the start index in {@code seq}.
   2201      * @param codePointOffset
   2202      *            the number of code points to look backwards or forwards; may
   2203      *            be a negative or positive value.
   2204      * @return the index in {@code seq} that is {@code codePointOffset} code
   2205      *         points away from {@code index}.
   2206      * @throws NullPointerException
   2207      *             if {@code seq} is {@code null}.
   2208      * @throws IndexOutOfBoundsException
   2209      *             if {@code index < 0}, {@code index} is greater than the
   2210      *             length of {@code seq}, or if there are not enough values in
   2211      *             {@code seq} to skip {@code codePointOffset} code points
   2212      *             forwards or backwards (if {@code codePointOffset} is
   2213      *             negative) from {@code index}.
   2214      * @since 1.5
   2215      */
   2216     public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) {
   2217         if (seq == null) {
   2218             throw new NullPointerException("seq == null");
   2219         }
   2220         int len = seq.length();
   2221         if (index < 0 || index > len) {
   2222             throw new IndexOutOfBoundsException();
   2223         }
   2224 
   2225         if (codePointOffset == 0) {
   2226             return index;
   2227         }
   2228 
   2229         if (codePointOffset > 0) {
   2230             int codePoints = codePointOffset;
   2231             int i = index;
   2232             while (codePoints > 0) {
   2233                 codePoints--;
   2234                 if (i >= len) {
   2235                     throw new IndexOutOfBoundsException();
   2236                 }
   2237                 if (isHighSurrogate(seq.charAt(i))) {
   2238                     int next = i + 1;
   2239                     if (next < len && isLowSurrogate(seq.charAt(next))) {
   2240                         i++;
   2241                     }
   2242                 }
   2243                 i++;
   2244             }
   2245             return i;
   2246         }
   2247 
   2248         int codePoints = -codePointOffset;
   2249         int i = index;
   2250         while (codePoints > 0) {
   2251             codePoints--;
   2252             i--;
   2253             if (i < 0) {
   2254                 throw new IndexOutOfBoundsException();
   2255             }
   2256             if (isLowSurrogate(seq.charAt(i))) {
   2257                 int prev = i - 1;
   2258                 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
   2259                     i--;
   2260                 }
   2261             }
   2262         }
   2263         return i;
   2264     }
   2265 
   2266     /**
   2267      * Determines the index in a subsequence of the specified character array
   2268      * that is offset {@code codePointOffset} code points from {@code index}.
   2269      * The subsequence is delineated by {@code start} and {@code count}.
   2270      *
   2271      * @param seq
   2272      *            the character array to find the index in.
   2273      * @param start
   2274      *            the inclusive index that marks the beginning of the
   2275      *            subsequence.
   2276      * @param count
   2277      *            the number of {@code char} values to include within the
   2278      *            subsequence.
   2279      * @param index
   2280      *            the start index in the subsequence of the char array.
   2281      * @param codePointOffset
   2282      *            the number of code points to look backwards or forwards; may
   2283      *            be a negative or positive value.
   2284      * @return the index in {@code seq} that is {@code codePointOffset} code
   2285      *         points away from {@code index}.
   2286      * @throws NullPointerException
   2287      *             if {@code seq} is {@code null}.
   2288      * @throws IndexOutOfBoundsException
   2289      *             if {@code start < 0}, {@code count < 0},
   2290      *             {@code index < start}, {@code index > start + count},
   2291      *             {@code start + count} is greater than the length of
   2292      *             {@code seq}, or if there are not enough values in
   2293      *             {@code seq} to skip {@code codePointOffset} code points
   2294      *             forward or backward (if {@code codePointOffset} is
   2295      *             negative) from {@code index}.
   2296      * @since 1.5
   2297      */
   2298     public static int offsetByCodePoints(char[] seq, int start, int count,
   2299             int index, int codePointOffset) {
   2300         Arrays.checkOffsetAndCount(seq.length, start, count);
   2301         int end = start + count;
   2302         if (index < start || index > end) {
   2303             throw new IndexOutOfBoundsException();
   2304         }
   2305 
   2306         if (codePointOffset == 0) {
   2307             return index;
   2308         }
   2309 
   2310         if (codePointOffset > 0) {
   2311             int codePoints = codePointOffset;
   2312             int i = index;
   2313             while (codePoints > 0) {
   2314                 codePoints--;
   2315                 if (i >= end) {
   2316                     throw new IndexOutOfBoundsException();
   2317                 }
   2318                 if (isHighSurrogate(seq[i])) {
   2319                     int next = i + 1;
   2320                     if (next < end && isLowSurrogate(seq[next])) {
   2321                         i++;
   2322                     }
   2323                 }
   2324                 i++;
   2325             }
   2326             return i;
   2327         }
   2328 
   2329         int codePoints = -codePointOffset;
   2330         int i = index;
   2331         while (codePoints > 0) {
   2332             codePoints--;
   2333             i--;
   2334             if (i < start) {
   2335                 throw new IndexOutOfBoundsException();
   2336             }
   2337             if (isLowSurrogate(seq[i])) {
   2338                 int prev = i - 1;
   2339                 if (prev >= start && isHighSurrogate(seq[prev])) {
   2340                     i--;
   2341                 }
   2342             }
   2343         }
   2344         return i;
   2345     }
   2346 
   2347     /**
   2348      * Convenience method to determine the value of the specified character
   2349      * {@code c} in the supplied radix. The value of {@code radix} must be
   2350      * between MIN_RADIX and MAX_RADIX.
   2351      *
   2352      * @param c
   2353      *            the character to determine the value of.
   2354      * @param radix
   2355      *            the radix.
   2356      * @return the value of {@code c} in {@code radix} if {@code radix} lies
   2357      *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
   2358      */
   2359     public static int digit(char c, int radix) {
   2360         return digit((int) c, radix);
   2361     }
   2362 
   2363     /**
   2364      * Convenience method to determine the value of the character
   2365      * {@code codePoint} in the supplied radix. The value of {@code radix} must
   2366      * be between MIN_RADIX and MAX_RADIX.
   2367      *
   2368      * @param codePoint
   2369      *            the character, including supplementary characters.
   2370      * @param radix
   2371      *            the radix.
   2372      * @return if {@code radix} lies between {@link #MIN_RADIX} and
   2373      *         {@link #MAX_RADIX} then the value of the character in the radix;
   2374      *         -1 otherwise.
   2375      */
   2376     public static int digit(int codePoint, int radix) {
   2377         if (radix < MIN_RADIX || radix > MAX_RADIX) {
   2378             return -1;
   2379         }
   2380         if (codePoint < 128) {
   2381             // Optimized for ASCII
   2382             int result = -1;
   2383             if ('0' <= codePoint && codePoint <= '9') {
   2384                 result = codePoint - '0';
   2385             } else if ('a' <= codePoint && codePoint <= 'z') {
   2386                 result = 10 + (codePoint - 'a');
   2387             } else if ('A' <= codePoint && codePoint <= 'Z') {
   2388                 result = 10 + (codePoint - 'A');
   2389             }
   2390             return result < radix ? result : -1;
   2391         }
   2392         return digitImpl(codePoint, radix);
   2393     }
   2394 
   2395     private static native int digitImpl(int codePoint, int radix);
   2396 
   2397     /**
   2398      * Compares this object with the specified object and indicates if they are
   2399      * equal. In order to be equal, {@code object} must be an instance of
   2400      * {@code Character} and have the same char value as this object.
   2401      *
   2402      * @param object
   2403      *            the object to compare this double with.
   2404      * @return {@code true} if the specified object is equal to this
   2405      *         {@code Character}; {@code false} otherwise.
   2406      */
   2407     @Override
   2408     public boolean equals(Object object) {
   2409         return (object instanceof Character) && (((Character) object).value == value);
   2410     }
   2411 
   2412     /**
   2413      * Returns the character which represents the specified digit in the
   2414      * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
   2415      * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
   2416      * smaller than {@code radix}. If any of these conditions does not hold, 0
   2417      * is returned.
   2418      *
   2419      * @param digit
   2420      *            the integer value.
   2421      * @param radix
   2422      *            the radix.
   2423      * @return the character which represents the {@code digit} in the
   2424      *         {@code radix}.
   2425      */
   2426     public static char forDigit(int digit, int radix) {
   2427         if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
   2428             if (digit >= 0 && digit < radix) {
   2429                 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
   2430             }
   2431         }
   2432         return 0;
   2433     }
   2434 
   2435     /**
   2436      * Returns the name of the given code point, or null if the code point is unassigned.
   2437      *
   2438      * <p>As a fallback mechanism this method returns strings consisting of the Unicode
   2439      * block name (with underscores replaced by spaces), a single space, and the uppercase
   2440      * hex value of the code point, using as few digits as necessary.
   2441      *
   2442      * <p>Examples:
   2443      * <ul>
   2444      * <li>{@code Character.getName(0)} returns "NULL".
   2445      * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E".
   2446      * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX".
   2447      * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000".
   2448      * </ul>
   2449      *
   2450      * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
   2451      * @since 1.7
   2452      * @hide 1.7
   2453      */
   2454     public static String getName(int codePoint) {
   2455         checkValidCodePoint(codePoint);
   2456         if (getType(codePoint) == Character.UNASSIGNED) {
   2457             return null;
   2458         }
   2459         String result = getNameImpl(codePoint);
   2460         if (result == null) {
   2461             String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ');
   2462             result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0);
   2463         }
   2464         return result;
   2465     }
   2466 
   2467     private static native String getNameImpl(int codePoint);
   2468 
   2469     /**
   2470      * Returns the numeric value of the specified Unicode character.
   2471      * See {@link #getNumericValue(int)}.
   2472      *
   2473      * @param c the character
   2474      * @return a non-negative numeric integer value if a numeric value for
   2475      *         {@code c} exists, -1 if there is no numeric value for {@code c},
   2476      *         -2 if the numeric value can not be represented as an integer.
   2477      */
   2478     public static int getNumericValue(char c) {
   2479         return getNumericValue((int) c);
   2480     }
   2481 
   2482     /**
   2483      * Gets the numeric value of the specified Unicode code point. For example,
   2484      * the code point '\u216B' stands for the Roman number XII, which has the
   2485      * numeric value 12.
   2486      *
   2487      * <p>There are two points of divergence between this method and the Unicode
   2488      * specification. This method treats the letters a-z (in both upper and lower
   2489      * cases, and their full-width variants) as numbers from 10 to 35. The
   2490      * Unicode specification also supports the idea of code points with non-integer
   2491      * numeric values; this method does not (except to the extent of returning -2
   2492      * for such code points).
   2493      *
   2494      * @param codePoint the code point
   2495      * @return a non-negative numeric integer value if a numeric value for
   2496      *         {@code codePoint} exists, -1 if there is no numeric value for
   2497      *         {@code codePoint}, -2 if the numeric value can not be
   2498      *         represented with an integer.
   2499      */
   2500     public static int getNumericValue(int codePoint) {
   2501         // This is both an optimization and papers over differences between Java and ICU.
   2502         if (codePoint < 128) {
   2503             if (codePoint >= '0' && codePoint <= '9') {
   2504                 return codePoint - '0';
   2505             }
   2506             if (codePoint >= 'a' && codePoint <= 'z') {
   2507                 return codePoint - ('a' - 10);
   2508             }
   2509             if (codePoint >= 'A' && codePoint <= 'Z') {
   2510                 return codePoint - ('A' - 10);
   2511             }
   2512             return -1;
   2513         }
   2514         // Full-width uppercase A-Z.
   2515         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
   2516             return codePoint - 0xff17;
   2517         }
   2518         // Full-width lowercase a-z.
   2519         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
   2520             return codePoint - 0xff37;
   2521         }
   2522         return getNumericValueImpl(codePoint);
   2523     }
   2524 
   2525     private static native int getNumericValueImpl(int codePoint);
   2526 
   2527     /**
   2528      * Gets the general Unicode category of the specified character.
   2529      *
   2530      * @param c
   2531      *            the character to get the category of.
   2532      * @return the Unicode category of {@code c}.
   2533      */
   2534     public static int getType(char c) {
   2535         return getType((int) c);
   2536     }
   2537 
   2538     /**
   2539      * Gets the general Unicode category of the specified code point.
   2540      *
   2541      * @param codePoint
   2542      *            the Unicode code point to get the category of.
   2543      * @return the Unicode category of {@code codePoint}.
   2544      */
   2545     public static int getType(int codePoint) {
   2546         int type = getTypeImpl(codePoint);
   2547         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
   2548         if (type <= Character.FORMAT) {
   2549             return type;
   2550         }
   2551         return (type + 1);
   2552     }
   2553 
   2554     private static native int getTypeImpl(int codePoint);
   2555 
   2556     /**
   2557      * Gets the Unicode directionality of the specified character.
   2558      *
   2559      * @param c
   2560      *            the character to get the directionality of.
   2561      * @return the Unicode directionality of {@code c}.
   2562      */
   2563     public static byte getDirectionality(char c) {
   2564         return getDirectionality((int)c);
   2565     }
   2566 
   2567     /**
   2568      * Gets the Unicode directionality of the specified character.
   2569      *
   2570      * @param codePoint
   2571      *            the Unicode code point to get the directionality of.
   2572      * @return the Unicode directionality of {@code codePoint}.
   2573      */
   2574     public static byte getDirectionality(int codePoint) {
   2575         if (getType(codePoint) == Character.UNASSIGNED) {
   2576             return Character.DIRECTIONALITY_UNDEFINED;
   2577         }
   2578 
   2579         byte directionality = getDirectionalityImpl(codePoint);
   2580         if (directionality == -1) {
   2581             return -1;
   2582         }
   2583         return DIRECTIONALITY[directionality];
   2584     }
   2585 
   2586     private static native byte getDirectionalityImpl(int codePoint);
   2587 
   2588     /**
   2589      * Indicates whether the specified character is mirrored.
   2590      *
   2591      * @param c
   2592      *            the character to check.
   2593      * @return {@code true} if {@code c} is mirrored; {@code false}
   2594      *         otherwise.
   2595      */
   2596     public static boolean isMirrored(char c) {
   2597         return isMirrored((int) c);
   2598     }
   2599 
   2600     /**
   2601      * Indicates whether the specified code point is mirrored.
   2602      *
   2603      * @param codePoint
   2604      *            the code point to check.
   2605      * @return {@code true} if {@code codePoint} is mirrored, {@code false}
   2606      *         otherwise.
   2607      */
   2608     public static boolean isMirrored(int codePoint) {
   2609         return isMirroredImpl(codePoint);
   2610     }
   2611 
   2612     private static native boolean isMirroredImpl(int codePoint);
   2613 
   2614     @Override
   2615     public int hashCode() {
   2616         return value;
   2617     }
   2618 
   2619     /**
   2620      * Returns the high surrogate for the given code point. The result is meaningless if
   2621      * the given code point is not a supplementary character.
   2622      * @since 1.7
   2623      * @hide 1.7
   2624      */
   2625     public static char highSurrogate(int codePoint) {
   2626         return (char) ((codePoint >> 10) + 0xd7c0);
   2627     }
   2628 
   2629     /**
   2630      * Returns the low surrogate for the given code point. The result is meaningless if
   2631      * the given code point is not a supplementary character.
   2632      * @since 1.7
   2633      * @hide 1.7
   2634      */
   2635     public static char lowSurrogate(int codePoint) {
   2636         return (char) ((codePoint & 0x3ff) | 0xdc00);
   2637     }
   2638 
   2639     /**
   2640      * Tests whether the given code point is in the Basic Multilingual Plane (BMP).
   2641      * Such code points can be represented by a single {@code char}.
   2642      * @since 1.7
   2643      * @hide 1.7
   2644      */
   2645     public static boolean isBmpCodePoint(int codePoint) {
   2646         return codePoint >= 0 && codePoint <= 0xffff;
   2647     }
   2648 
   2649     /**
   2650      * Indicates whether the specified character is defined in the Unicode
   2651      * specification.
   2652      *
   2653      * @param c
   2654      *            the character to check.
   2655      * @return {@code true} if the general Unicode category of the character is
   2656      *         not {@code UNASSIGNED}; {@code false} otherwise.
   2657      */
   2658     public static boolean isDefined(char c) {
   2659         return isDefinedImpl(c);
   2660     }
   2661 
   2662     /**
   2663      * Indicates whether the specified code point is defined in the Unicode
   2664      * specification.
   2665      *
   2666      * @param codePoint
   2667      *            the code point to check.
   2668      * @return {@code true} if the general Unicode category of the code point is
   2669      *         not {@code UNASSIGNED}; {@code false} otherwise.
   2670      */
   2671     public static boolean isDefined(int codePoint) {
   2672         return isDefinedImpl(codePoint);
   2673     }
   2674 
   2675     private static native boolean isDefinedImpl(int codePoint);
   2676 
   2677     /**
   2678      * Indicates whether the specified character is a digit.
   2679      *
   2680      * @param c
   2681      *            the character to check.
   2682      * @return {@code true} if {@code c} is a digit; {@code false}
   2683      *         otherwise.
   2684      */
   2685     public static boolean isDigit(char c) {
   2686         return isDigit((int) c);
   2687     }
   2688 
   2689     /**
   2690      * Indicates whether the specified code point is a digit.
   2691      *
   2692      * @param codePoint
   2693      *            the code point to check.
   2694      * @return {@code true} if {@code codePoint} is a digit; {@code false}
   2695      *         otherwise.
   2696      */
   2697     public static boolean isDigit(int codePoint) {
   2698         // Optimized case for ASCII
   2699         if ('0' <= codePoint && codePoint <= '9') {
   2700             return true;
   2701         }
   2702         if (codePoint < 1632) {
   2703             return false;
   2704         }
   2705         return isDigitImpl(codePoint);
   2706     }
   2707 
   2708     private static native boolean isDigitImpl(int codePoint);
   2709 
   2710     /**
   2711      * Indicates whether the specified character is ignorable in a Java or
   2712      * Unicode identifier.
   2713      *
   2714      * @param c
   2715      *            the character to check.
   2716      * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
   2717      */
   2718     public static boolean isIdentifierIgnorable(char c) {
   2719         return isIdentifierIgnorable((int) c);
   2720     }
   2721 
   2722     /**
   2723      * Indicates whether the specified code point is ignorable in a Java or
   2724      * Unicode identifier.
   2725      *
   2726      * @param codePoint
   2727      *            the code point to check.
   2728      * @return {@code true} if {@code codePoint} is ignorable; {@code false}
   2729      *         otherwise.
   2730      */
   2731     public static boolean isIdentifierIgnorable(int codePoint) {
   2732         // This is both an optimization and papers over differences between Java and ICU.
   2733         if (codePoint < 0x600) {
   2734             return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
   2735                     (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
   2736         }
   2737         return isIdentifierIgnorableImpl(codePoint);
   2738     }
   2739 
   2740     private static native boolean isIdentifierIgnorableImpl(int codePoint);
   2741 
   2742     /**
   2743      * Indicates whether the specified character is an ISO control character.
   2744      *
   2745      * @param c
   2746      *            the character to check.
   2747      * @return {@code true} if {@code c} is an ISO control character;
   2748      *         {@code false} otherwise.
   2749      */
   2750     public static boolean isISOControl(char c) {
   2751         return isISOControl((int) c);
   2752     }
   2753 
   2754     /**
   2755      * Indicates whether the specified code point is an ISO control character.
   2756      *
   2757      * @param c
   2758      *            the code point to check.
   2759      * @return {@code true} if {@code c} is an ISO control character;
   2760      *         {@code false} otherwise.
   2761      */
   2762     public static boolean isISOControl(int c) {
   2763         return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
   2764     }
   2765 
   2766     /**
   2767      * Indicates whether the specified character is a valid part of a Java
   2768      * identifier other than the first character.
   2769      *
   2770      * @param c
   2771      *            the character to check.
   2772      * @return {@code true} if {@code c} is valid as part of a Java identifier;
   2773      *         {@code false} otherwise.
   2774      */
   2775     public static boolean isJavaIdentifierPart(char c) {
   2776         return isJavaIdentifierPart((int) c);
   2777     }
   2778 
   2779     /**
   2780      * Indicates whether the specified code point is a valid part of a Java
   2781      * identifier other than the first character.
   2782      *
   2783      * @param codePoint
   2784      *            the code point to check.
   2785      * @return {@code true} if {@code c} is valid as part of a Java identifier;
   2786      *         {@code false} otherwise.
   2787      */
   2788     public static boolean isJavaIdentifierPart(int codePoint) {
   2789         // Use precomputed bitmasks to optimize the ASCII range.
   2790         if (codePoint < 64) {
   2791             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
   2792         } else if (codePoint < 128) {
   2793             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   2794         }
   2795         int type = getType(codePoint);
   2796         return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
   2797                 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
   2798                 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
   2799                 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
   2800                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
   2801                 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
   2802     }
   2803 
   2804     /**
   2805      * Indicates whether the specified character is a valid first character for
   2806      * a Java identifier.
   2807      *
   2808      * @param c
   2809      *            the character to check.
   2810      * @return {@code true} if {@code c} is a valid first character of a Java
   2811      *         identifier; {@code false} otherwise.
   2812      */
   2813     public static boolean isJavaIdentifierStart(char c) {
   2814         return isJavaIdentifierStart((int) c);
   2815     }
   2816 
   2817     /**
   2818      * Indicates whether the specified code point is a valid first character for
   2819      * a Java identifier.
   2820      *
   2821      * @param codePoint
   2822      *            the code point to check.
   2823      * @return {@code true} if {@code codePoint} is a valid start of a Java
   2824      *         identifier; {@code false} otherwise.
   2825      */
   2826     public static boolean isJavaIdentifierStart(int codePoint) {
   2827         // Use precomputed bitmasks to optimize the ASCII range.
   2828         if (codePoint < 64) {
   2829             return (codePoint == '$'); // There's only one character in this range.
   2830         } else if (codePoint < 128) {
   2831             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   2832         }
   2833         int type = getType(codePoint);
   2834         return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
   2835                 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
   2836     }
   2837 
   2838     /**
   2839      * Indicates whether the specified character is a Java letter.
   2840      *
   2841      * @param c
   2842      *            the character to check.
   2843      * @return {@code true} if {@code c} is a Java letter; {@code false}
   2844      *         otherwise.
   2845      * @deprecated Use {@link #isJavaIdentifierStart(char)}
   2846      */
   2847     @Deprecated
   2848     public static boolean isJavaLetter(char c) {
   2849         return isJavaIdentifierStart(c);
   2850     }
   2851 
   2852     /**
   2853      * Indicates whether the specified character is a Java letter or digit
   2854      * character.
   2855      *
   2856      * @param c
   2857      *            the character to check.
   2858      * @return {@code true} if {@code c} is a Java letter or digit;
   2859      *         {@code false} otherwise.
   2860      * @deprecated Use {@link #isJavaIdentifierPart(char)}
   2861      */
   2862     @Deprecated
   2863     public static boolean isJavaLetterOrDigit(char c) {
   2864         return isJavaIdentifierPart(c);
   2865     }
   2866 
   2867     /**
   2868      * Indicates whether the specified character is a letter.
   2869      *
   2870      * @param c
   2871      *            the character to check.
   2872      * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
   2873      */
   2874     public static boolean isLetter(char c) {
   2875         return isLetter((int) c);
   2876     }
   2877 
   2878     /**
   2879      * Indicates whether the specified code point is a letter.
   2880      *
   2881      * @param codePoint
   2882      *            the code point to check.
   2883      * @return {@code true} if {@code codePoint} is a letter; {@code false}
   2884      *         otherwise.
   2885      */
   2886     public static boolean isLetter(int codePoint) {
   2887         if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
   2888             return true;
   2889         }
   2890         if (codePoint < 128) {
   2891             return false;
   2892         }
   2893         return isLetterImpl(codePoint);
   2894     }
   2895 
   2896     private static native boolean isLetterImpl(int codePoint);
   2897 
   2898     /**
   2899      * Indicates whether the specified character is a letter or a digit.
   2900      *
   2901      * @param c
   2902      *            the character to check.
   2903      * @return {@code true} if {@code c} is a letter or a digit; {@code false}
   2904      *         otherwise.
   2905      */
   2906     public static boolean isLetterOrDigit(char c) {
   2907         return isLetterOrDigit((int) c);
   2908     }
   2909 
   2910     /**
   2911      * Indicates whether the specified code point is a letter or a digit.
   2912      *
   2913      * @param codePoint
   2914      *            the code point to check.
   2915      * @return {@code true} if {@code codePoint} is a letter or a digit;
   2916      *         {@code false} otherwise.
   2917      */
   2918     public static boolean isLetterOrDigit(int codePoint) {
   2919         // Optimized case for ASCII
   2920         if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
   2921             return true;
   2922         }
   2923         if ('0' <= codePoint && codePoint <= '9') {
   2924             return true;
   2925         }
   2926         if (codePoint < 128) {
   2927             return false;
   2928         }
   2929         return isLetterOrDigitImpl(codePoint);
   2930     }
   2931 
   2932     private static native boolean isLetterOrDigitImpl(int codePoint);
   2933 
   2934     /**
   2935      * Indicates whether the specified character is a lower case letter.
   2936      *
   2937      * @param c
   2938      *            the character to check.
   2939      * @return {@code true} if {@code c} is a lower case letter; {@code false}
   2940      *         otherwise.
   2941      */
   2942     public static boolean isLowerCase(char c) {
   2943         return isLowerCase((int) c);
   2944     }
   2945 
   2946     /**
   2947      * Indicates whether the specified code point is a lower case letter.
   2948      *
   2949      * @param codePoint
   2950      *            the code point to check.
   2951      * @return {@code true} if {@code codePoint} is a lower case letter;
   2952      *         {@code false} otherwise.
   2953      */
   2954     public static boolean isLowerCase(int codePoint) {
   2955         // Optimized case for ASCII
   2956         if ('a' <= codePoint && codePoint <= 'z') {
   2957             return true;
   2958         }
   2959         if (codePoint < 128) {
   2960             return false;
   2961         }
   2962         return isLowerCaseImpl(codePoint);
   2963     }
   2964 
   2965     private static native boolean isLowerCaseImpl(int codePoint);
   2966 
   2967     /**
   2968      * Indicates whether the specified character is a Java space.
   2969      *
   2970      * @param c
   2971      *            the character to check.
   2972      * @return {@code true} if {@code c} is a Java space; {@code false}
   2973      *         otherwise.
   2974      * @deprecated Use {@link #isWhitespace(char)}
   2975      */
   2976     @Deprecated
   2977     public static boolean isSpace(char c) {
   2978         return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
   2979     }
   2980 
   2981     /**
   2982      * Indicates whether the specified character is a Unicode space character.
   2983      * That is, if it is a member of one of the Unicode categories Space
   2984      * Separator, Line Separator, or Paragraph Separator.
   2985      *
   2986      * @param c
   2987      *            the character to check.
   2988      * @return {@code true} if {@code c} is a Unicode space character,
   2989      *         {@code false} otherwise.
   2990      */
   2991     public static boolean isSpaceChar(char c) {
   2992         return isSpaceChar((int) c);
   2993     }
   2994 
   2995     /**
   2996      * Indicates whether the specified code point is a Unicode space character.
   2997      * That is, if it is a member of one of the Unicode categories Space
   2998      * Separator, Line Separator, or Paragraph Separator.
   2999      *
   3000      * @param codePoint
   3001      *            the code point to check.
   3002      * @return {@code true} if {@code codePoint} is a Unicode space character,
   3003      *         {@code false} otherwise.
   3004      */
   3005     public static boolean isSpaceChar(int codePoint) {
   3006         if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) {
   3007             return true;
   3008         }
   3009         if (codePoint < 0x2000) {
   3010             return false;
   3011         }
   3012         if (codePoint <= 0xffff) {
   3013             return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
   3014                     codePoint == 0x202f || codePoint == 0x3000;
   3015         }
   3016         return isSpaceCharImpl(codePoint);
   3017     }
   3018 
   3019     private static native boolean isSpaceCharImpl(int codePoint);
   3020 
   3021     /**
   3022      * Indicates whether the specified character is a titlecase character.
   3023      *
   3024      * @param c
   3025      *            the character to check.
   3026      * @return {@code true} if {@code c} is a titlecase character, {@code false}
   3027      *         otherwise.
   3028      */
   3029     public static boolean isTitleCase(char c) {
   3030         return isTitleCaseImpl(c);
   3031     }
   3032 
   3033     /**
   3034      * Indicates whether the specified code point is a titlecase character.
   3035      *
   3036      * @param codePoint
   3037      *            the code point to check.
   3038      * @return {@code true} if {@code codePoint} is a titlecase character,
   3039      *         {@code false} otherwise.
   3040      */
   3041     public static boolean isTitleCase(int codePoint) {
   3042         return isTitleCaseImpl(codePoint);
   3043     }
   3044 
   3045     private static native boolean isTitleCaseImpl(int codePoint);
   3046 
   3047     /**
   3048      * Indicates whether the specified character is valid as part of a Unicode
   3049      * identifier other than the first character.
   3050      *
   3051      * @param c
   3052      *            the character to check.
   3053      * @return {@code true} if {@code c} is valid as part of a Unicode
   3054      *         identifier; {@code false} otherwise.
   3055      */
   3056     public static boolean isUnicodeIdentifierPart(char c) {
   3057         return isUnicodeIdentifierPartImpl(c);
   3058     }
   3059 
   3060     /**
   3061      * Indicates whether the specified code point is valid as part of a Unicode
   3062      * identifier other than the first character.
   3063      *
   3064      * @param codePoint
   3065      *            the code point to check.
   3066      * @return {@code true} if {@code codePoint} is valid as part of a Unicode
   3067      *         identifier; {@code false} otherwise.
   3068      */
   3069     public static boolean isUnicodeIdentifierPart(int codePoint) {
   3070         return isUnicodeIdentifierPartImpl(codePoint);
   3071     }
   3072 
   3073     private static native boolean isUnicodeIdentifierPartImpl(int codePoint);
   3074 
   3075     /**
   3076      * Indicates whether the specified character is a valid initial character
   3077      * for a Unicode identifier.
   3078      *
   3079      * @param c
   3080      *            the character to check.
   3081      * @return {@code true} if {@code c} is a valid first character for a
   3082      *         Unicode identifier; {@code false} otherwise.
   3083      */
   3084     public static boolean isUnicodeIdentifierStart(char c) {
   3085         return isUnicodeIdentifierStartImpl(c);
   3086     }
   3087 
   3088     /**
   3089      * Indicates whether the specified code point is a valid initial character
   3090      * for a Unicode identifier.
   3091      *
   3092      * @param codePoint
   3093      *            the code point to check.
   3094      * @return {@code true} if {@code codePoint} is a valid first character for
   3095      *         a Unicode identifier; {@code false} otherwise.
   3096      */
   3097     public static boolean isUnicodeIdentifierStart(int codePoint) {
   3098         return isUnicodeIdentifierStartImpl(codePoint);
   3099     }
   3100 
   3101     private static native boolean isUnicodeIdentifierStartImpl(int codePoint);
   3102 
   3103     /**
   3104      * Indicates whether the specified character is an upper case letter.
   3105      *
   3106      * @param c
   3107      *            the character to check.
   3108      * @return {@code true} if {@code c} is a upper case letter; {@code false}
   3109      *         otherwise.
   3110      */
   3111     public static boolean isUpperCase(char c) {
   3112         return isUpperCase((int) c);
   3113     }
   3114 
   3115     /**
   3116      * Indicates whether the specified code point is an upper case letter.
   3117      *
   3118      * @param codePoint
   3119      *            the code point to check.
   3120      * @return {@code true} if {@code codePoint} is a upper case letter;
   3121      *         {@code false} otherwise.
   3122      */
   3123     public static boolean isUpperCase(int codePoint) {
   3124         // Optimized case for ASCII
   3125         if ('A' <= codePoint && codePoint <= 'Z') {
   3126             return true;
   3127         }
   3128         if (codePoint < 128) {
   3129             return false;
   3130         }
   3131         return isUpperCaseImpl(codePoint);
   3132     }
   3133 
   3134     private static native boolean isUpperCaseImpl(int codePoint);
   3135 
   3136     /**
   3137      * Indicates whether the specified character is a whitespace character in
   3138      * Java.
   3139      *
   3140      * @param c
   3141      *            the character to check.
   3142      * @return {@code true} if the supplied {@code c} is a whitespace character
   3143      *         in Java; {@code false} otherwise.
   3144      */
   3145     public static boolean isWhitespace(char c) {
   3146         return isWhitespace((int) c);
   3147     }
   3148 
   3149     /**
   3150      * Indicates whether the specified code point is a whitespace character in
   3151      * Java.
   3152      *
   3153      * @param codePoint
   3154      *            the code point to check.
   3155      * @return {@code true} if the supplied {@code c} is a whitespace character
   3156      *         in Java; {@code false} otherwise.
   3157      */
   3158     public static boolean isWhitespace(int codePoint) {
   3159         // This is both an optimization and papers over differences between Java and ICU.
   3160         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) {
   3161             return true;
   3162         }
   3163         if (codePoint == 0x1680) {
   3164             return true;
   3165         }
   3166         if (codePoint < 0x2000 || codePoint == 0x2007) {
   3167             return false;
   3168         }
   3169         if (codePoint <= 0xffff) {
   3170             return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
   3171                     codePoint == 0x3000;
   3172         }
   3173         return isWhitespaceImpl(codePoint);
   3174     }
   3175 
   3176     private static native boolean isWhitespaceImpl(int codePoint);
   3177 
   3178     /**
   3179      * Reverses the order of the first and second byte in the specified
   3180      * character.
   3181      *
   3182      * @param c
   3183      *            the character to reverse.
   3184      * @return the character with reordered bytes.
   3185      */
   3186     public static char reverseBytes(char c) {
   3187         return (char)((c<<8) | (c>>8));
   3188     }
   3189 
   3190     /**
   3191      * Returns the lower case equivalent for the specified character if the
   3192      * character is an upper case letter. Otherwise, the specified character is
   3193      * returned unchanged.
   3194      *
   3195      * @param c
   3196      *            the character
   3197      * @return if {@code c} is an upper case character then its lower case
   3198      *         counterpart, otherwise just {@code c}.
   3199      */
   3200     public static char toLowerCase(char c) {
   3201         return (char) toLowerCase((int) c);
   3202     }
   3203 
   3204     /**
   3205      * Returns the lower case equivalent for the specified code point if it is
   3206      * an upper case letter. Otherwise, the specified code point is returned
   3207      * unchanged.
   3208      *
   3209      * @param codePoint
   3210      *            the code point to check.
   3211      * @return if {@code codePoint} is an upper case character then its lower
   3212      *         case counterpart, otherwise just {@code codePoint}.
   3213      */
   3214     public static int toLowerCase(int codePoint) {
   3215         // Optimized case for ASCII
   3216         if ('A' <= codePoint && codePoint <= 'Z') {
   3217             return (char) (codePoint + ('a' - 'A'));
   3218         }
   3219         if (codePoint < 192) {
   3220             return codePoint;
   3221         }
   3222         return toLowerCaseImpl(codePoint);
   3223     }
   3224 
   3225     private static native int toLowerCaseImpl(int codePoint);
   3226 
   3227     @Override
   3228     public String toString() {
   3229         return String.valueOf(value);
   3230     }
   3231 
   3232     /**
   3233      * Converts the specified character to its string representation.
   3234      *
   3235      * @param value
   3236      *            the character to convert.
   3237      * @return the character converted to a string.
   3238      */
   3239     public static String toString(char value) {
   3240         return String.valueOf(value);
   3241     }
   3242 
   3243     /**
   3244      * Returns the title case equivalent for the specified character if it
   3245      * exists. Otherwise, the specified character is returned unchanged.
   3246      *
   3247      * @param c
   3248      *            the character to convert.
   3249      * @return the title case equivalent of {@code c} if it exists, otherwise
   3250      *         {@code c}.
   3251      */
   3252     public static char toTitleCase(char c) {
   3253         return (char) toTitleCaseImpl(c);
   3254     }
   3255 
   3256     /**
   3257      * Returns the title case equivalent for the specified code point if it
   3258      * exists. Otherwise, the specified code point is returned unchanged.
   3259      *
   3260      * @param codePoint
   3261      *            the code point to convert.
   3262      * @return the title case equivalent of {@code codePoint} if it exists,
   3263      *         otherwise {@code codePoint}.
   3264      */
   3265     public static int toTitleCase(int codePoint) {
   3266         return toTitleCaseImpl(codePoint);
   3267     }
   3268 
   3269     private static native int toTitleCaseImpl(int codePoint);
   3270 
   3271     /**
   3272      * Returns the upper case equivalent for the specified character if the
   3273      * character is a lower case letter. Otherwise, the specified character is
   3274      * returned unchanged.
   3275      *
   3276      * @param c
   3277      *            the character to convert.
   3278      * @return if {@code c} is a lower case character then its upper case
   3279      *         counterpart, otherwise just {@code c}.
   3280      */
   3281     public static char toUpperCase(char c) {
   3282         return (char) toUpperCase((int) c);
   3283     }
   3284 
   3285     /**
   3286      * Returns the upper case equivalent for the specified code point if the
   3287      * code point is a lower case letter. Otherwise, the specified code point is
   3288      * returned unchanged.
   3289      *
   3290      * @param codePoint
   3291      *            the code point to convert.
   3292      * @return if {@code codePoint} is a lower case character then its upper
   3293      *         case counterpart, otherwise just {@code codePoint}.
   3294      */
   3295     public static int toUpperCase(int codePoint) {
   3296         // Optimized case for ASCII
   3297         if ('a' <= codePoint && codePoint <= 'z') {
   3298             return (char) (codePoint - ('a' - 'A'));
   3299         }
   3300         if (codePoint < 181) {
   3301             return codePoint;
   3302         }
   3303         return toUpperCaseImpl(codePoint);
   3304     }
   3305 
   3306     private static native int toUpperCaseImpl(int codePoint);
   3307 }
   3308