Home | History | Annotate | Download | only in lang
      1 /*
      2  *  Licensed to the Apache Software Foundation (ASF) under one or more
      3  *  contributor license agreements.  See the NOTICE file distributed with
      4  *  this work for additional information regarding copyright ownership.
      5  *  The ASF licenses this file to You under the Apache License, Version 2.0
      6  *  (the "License"); you may not use this file except in compliance with
      7  *  the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  *  Unless required by applicable law or agreed to in writing, software
     12  *  distributed under the License is distributed on an "AS IS" BASIS,
     13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  *  See the License for the specific language governing permissions and
     15  *  limitations under the License.
     16  */
     17 
     18 package java.lang;
     19 
     20 import java.io.Serializable;
     21 import java.util.Arrays;
     22 
     23 /**
     24  * The wrapper for the primitive type {@code char}. This class also provides a
     25  * number of utility methods for working with characters.
     26  *
     27  * <p>Character data is kept up to date as Unicode evolves.
     28  * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of
     29  * the {@code Locale} documentation for details of the Unicode versions implemented by current
     30  * and historical Android releases.
     31  *
     32  * <p>The Unicode specification, character tables, and other information are available at
     33  * <a href="http://www.unicode.org/">http://www.unicode.org/</a>.
     34  *
     35  * <p>Unicode characters are referred to as <i>code points</i>. The range of valid
     36  * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
     37  * is the code point range U+0000 to U+FFFF. Characters above the BMP are
     38  * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
     39  * encoding and {@code char} pairs are used to represent code points in the
     40  * supplementary range. A pair of {@code char} values that represent a
     41  * supplementary character are made up of a <i>high surrogate</i> with a value
     42  * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
     43  * 0xDC00 to 0xDFFF.
     44  * <p>
     45  * On the Java platform a {@code char} value represents either a single BMP code
     46  * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
     47  * is used to represent all Unicode code points.
     48  *
     49  * <a name="unicode_categories"></a><h3>Unicode categories</h3>
     50  * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
     51  * grouped semantically to provide a convenient overview. This table is also useful in
     52  * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
     53  * <span class="datatable">
     54  * <style type="text/css">
     55  * .datatable td { padding-right: 20px; }
     56  * </style>
     57  * <p><table>
     58  * <tr> <td> Cn </td> <td> Unassigned </td>  <td>{@link #UNASSIGNED}</td> </tr>
     59  * <tr> <td> Cc </td> <td> Control </td>     <td>{@link #CONTROL}</td> </tr>
     60  * <tr> <td> Cf </td> <td> Format </td>      <td>{@link #FORMAT}</td> </tr>
     61  * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
     62  * <tr> <td> Cs </td> <td> Surrogate </td>   <td>{@link #SURROGATE}</td> </tr>
     63  * <tr> <td><br></td> </tr>
     64  * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
     65  * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
     66  * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
     67  * <tr> <td> Lm </td> <td> Modifier letter </td>  <td>{@link #MODIFIER_LETTER}</td> </tr>
     68  * <tr> <td> Lo </td> <td> Other letter </td>     <td>{@link #OTHER_LETTER}</td> </tr>
     69  * <tr> <td><br></td> </tr>
     70  * <tr> <td> Mn </td> <td> Non-spacing mark </td>       <td>{@link #NON_SPACING_MARK}</td> </tr>
     71  * <tr> <td> Me </td> <td> Enclosing mark </td>         <td>{@link #ENCLOSING_MARK}</td> </tr>
     72  * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
     73  * <tr> <td><br></td> </tr>
     74  * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
     75  * <tr> <td> Nl </td> <td> Letter number </td>        <td>{@link #LETTER_NUMBER}</td> </tr>
     76  * <tr> <td> No </td> <td> Other number </td>         <td>{@link #OTHER_NUMBER}</td> </tr>
     77  * <tr> <td><br></td> </tr>
     78  * <tr> <td> Pd </td> <td> Dash punctuation </td>          <td>{@link #DASH_PUNCTUATION}</td> </tr>
     79  * <tr> <td> Ps </td> <td> Start punctuation </td>         <td>{@link #START_PUNCTUATION}</td> </tr>
     80  * <tr> <td> Pe </td> <td> End punctuation </td>           <td>{@link #END_PUNCTUATION}</td> </tr>
     81  * <tr> <td> Pc </td> <td> Connector punctuation </td>     <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
     82  * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
     83  * <tr> <td> Pf </td> <td> Final quote punctuation </td>   <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
     84  * <tr> <td> Po </td> <td> Other punctuation </td>         <td>{@link #OTHER_PUNCTUATION}</td> </tr>
     85  * <tr> <td><br></td> </tr>
     86  * <tr> <td> Sm </td> <td> Math symbol </td>     <td>{@link #MATH_SYMBOL}</td> </tr>
     87  * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
     88  * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
     89  * <tr> <td> So </td> <td> Other symbol </td>    <td>{@link #OTHER_SYMBOL}</td> </tr>
     90  * <tr> <td><br></td> </tr>
     91  * <tr> <td> Zs </td> <td> Space separator </td>     <td>{@link #SPACE_SEPARATOR}</td> </tr>
     92  * <tr> <td> Zl </td> <td> Line separator </td>      <td>{@link #LINE_SEPARATOR}</td> </tr>
     93  * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
     94  * </table>
     95  * </span>
     96  *
     97  * @since 1.0
     98  */
     99 @FindBugsSuppressWarnings("DM_NUMBER_CTOR")
    100 public final class Character implements Serializable, Comparable<Character> {
    101     private static final long serialVersionUID = 3786198910865385080L;
    102 
    103     private final char value;
    104 
    105     /**
    106      * The minimum {@code Character} value.
    107      */
    108     public static final char MIN_VALUE = '\u0000';
    109 
    110     /**
    111      * The maximum {@code Character} value.
    112      */
    113     public static final char MAX_VALUE = '\uffff';
    114 
    115     /**
    116      * The minimum radix used for conversions between characters and integers.
    117      */
    118     public static final int MIN_RADIX = 2;
    119 
    120     /**
    121      * The maximum radix used for conversions between characters and integers.
    122      */
    123     public static final int MAX_RADIX = 36;
    124 
    125     /**
    126      * The {@link Class} object that represents the primitive type {@code char}.
    127      */
    128     @SuppressWarnings("unchecked")
    129     public static final Class<Character> TYPE
    130             = (Class<Character>) char[].class.getComponentType();
    131     // Note: Character.TYPE can't be set to "char.class", since *that* is
    132     // defined to be "java.lang.Character.TYPE";
    133 
    134     /**
    135      * Unicode category constant Cn.
    136      */
    137     public static final byte UNASSIGNED = 0;
    138 
    139     /**
    140      * Unicode category constant Lu.
    141      */
    142     public static final byte UPPERCASE_LETTER = 1;
    143 
    144     /**
    145      * Unicode category constant Ll.
    146      */
    147     public static final byte LOWERCASE_LETTER = 2;
    148 
    149     /**
    150      * Unicode category constant Lt.
    151      */
    152     public static final byte TITLECASE_LETTER = 3;
    153 
    154     /**
    155      * Unicode category constant Lm.
    156      */
    157     public static final byte MODIFIER_LETTER = 4;
    158 
    159     /**
    160      * Unicode category constant Lo.
    161      */
    162     public static final byte OTHER_LETTER = 5;
    163 
    164     /**
    165      * Unicode category constant Mn.
    166      */
    167     public static final byte NON_SPACING_MARK = 6;
    168 
    169     /**
    170      * Unicode category constant Me.
    171      */
    172     public static final byte ENCLOSING_MARK = 7;
    173 
    174     /**
    175      * Unicode category constant Mc.
    176      */
    177     public static final byte COMBINING_SPACING_MARK = 8;
    178 
    179     /**
    180      * Unicode category constant Nd.
    181      */
    182     public static final byte DECIMAL_DIGIT_NUMBER = 9;
    183 
    184     /**
    185      * Unicode category constant Nl.
    186      */
    187     public static final byte LETTER_NUMBER = 10;
    188 
    189     /**
    190      * Unicode category constant No.
    191      */
    192     public static final byte OTHER_NUMBER = 11;
    193 
    194     /**
    195      * Unicode category constant Zs.
    196      */
    197     public static final byte SPACE_SEPARATOR = 12;
    198 
    199     /**
    200      * Unicode category constant Zl.
    201      */
    202     public static final byte LINE_SEPARATOR = 13;
    203 
    204     /**
    205      * Unicode category constant Zp.
    206      */
    207     public static final byte PARAGRAPH_SEPARATOR = 14;
    208 
    209     /**
    210      * Unicode category constant Cc.
    211      */
    212     public static final byte CONTROL = 15;
    213 
    214     /**
    215      * Unicode category constant Cf.
    216      */
    217     public static final byte FORMAT = 16;
    218 
    219     /**
    220      * Unicode category constant Co.
    221      */
    222     public static final byte PRIVATE_USE = 18;
    223 
    224     /**
    225      * Unicode category constant Cs.
    226      */
    227     public static final byte SURROGATE = 19;
    228 
    229     /**
    230      * Unicode category constant Pd.
    231      */
    232     public static final byte DASH_PUNCTUATION = 20;
    233 
    234     /**
    235      * Unicode category constant Ps.
    236      */
    237     public static final byte START_PUNCTUATION = 21;
    238 
    239     /**
    240      * Unicode category constant Pe.
    241      */
    242     public static final byte END_PUNCTUATION = 22;
    243 
    244     /**
    245      * Unicode category constant Pc.
    246      */
    247     public static final byte CONNECTOR_PUNCTUATION = 23;
    248 
    249     /**
    250      * Unicode category constant Po.
    251      */
    252     public static final byte OTHER_PUNCTUATION = 24;
    253 
    254     /**
    255      * Unicode category constant Sm.
    256      */
    257     public static final byte MATH_SYMBOL = 25;
    258 
    259     /**
    260      * Unicode category constant Sc.
    261      */
    262     public static final byte CURRENCY_SYMBOL = 26;
    263 
    264     /**
    265      * Unicode category constant Sk.
    266      */
    267     public static final byte MODIFIER_SYMBOL = 27;
    268 
    269     /**
    270      * Unicode category constant So.
    271      */
    272     public static final byte OTHER_SYMBOL = 28;
    273 
    274     /**
    275      * Unicode category constant Pi.
    276      *
    277      * @since 1.4
    278      */
    279     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
    280 
    281     /**
    282      * Unicode category constant Pf.
    283      *
    284      * @since 1.4
    285      */
    286     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
    287 
    288     /**
    289      * Unicode bidirectional constant.
    290      *
    291      * @since 1.4
    292      */
    293     public static final byte DIRECTIONALITY_UNDEFINED = -1;
    294 
    295     /**
    296      * Unicode bidirectional constant L.
    297      *
    298      * @since 1.4
    299      */
    300     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
    301 
    302     /**
    303      * Unicode bidirectional constant R.
    304      *
    305      * @since 1.4
    306      */
    307     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
    308 
    309     /**
    310      * Unicode bidirectional constant AL.
    311      *
    312      * @since 1.4
    313      */
    314     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
    315 
    316     /**
    317      * Unicode bidirectional constant EN.
    318      *
    319      * @since 1.4
    320      */
    321     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
    322 
    323     /**
    324      * Unicode bidirectional constant ES.
    325      *
    326      * @since 1.4
    327      */
    328     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
    329 
    330     /**
    331      * Unicode bidirectional constant ET.
    332      *
    333      * @since 1.4
    334      */
    335     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
    336 
    337     /**
    338      * Unicode bidirectional constant AN.
    339      *
    340      * @since 1.4
    341      */
    342     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
    343 
    344     /**
    345      * Unicode bidirectional constant CS.
    346      *
    347      * @since 1.4
    348      */
    349     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
    350 
    351     /**
    352      * Unicode bidirectional constant NSM.
    353      *
    354      * @since 1.4
    355      */
    356     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
    357 
    358     /**
    359      * Unicode bidirectional constant BN.
    360      *
    361      * @since 1.4
    362      */
    363     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
    364 
    365     /**
    366      * Unicode bidirectional constant B.
    367      *
    368      * @since 1.4
    369      */
    370     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
    371 
    372     /**
    373      * Unicode bidirectional constant S.
    374      *
    375      * @since 1.4
    376      */
    377     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
    378 
    379     /**
    380      * Unicode bidirectional constant WS.
    381      *
    382      * @since 1.4
    383      */
    384     public static final byte DIRECTIONALITY_WHITESPACE = 12;
    385 
    386     /**
    387      * Unicode bidirectional constant ON.
    388      *
    389      * @since 1.4
    390      */
    391     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
    392 
    393     /**
    394      * Unicode bidirectional constant LRE.
    395      *
    396      * @since 1.4
    397      */
    398     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
    399 
    400     /**
    401      * Unicode bidirectional constant LRO.
    402      *
    403      * @since 1.4
    404      */
    405     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
    406 
    407     /**
    408      * Unicode bidirectional constant RLE.
    409      *
    410      * @since 1.4
    411      */
    412     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
    413 
    414     /**
    415      * Unicode bidirectional constant RLO.
    416      *
    417      * @since 1.4
    418      */
    419     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
    420 
    421     /**
    422      * Unicode bidirectional constant PDF.
    423      *
    424      * @since 1.4
    425      */
    426     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
    427 
    428     /**
    429      * The minimum value of a high surrogate or leading surrogate unit in UTF-16
    430      * encoding, {@code '\uD800'}.
    431      *
    432      * @since 1.5
    433      */
    434     public static final char MIN_HIGH_SURROGATE = '\uD800';
    435 
    436     /**
    437      * The maximum value of a high surrogate or leading surrogate unit in UTF-16
    438      * encoding, {@code '\uDBFF'}.
    439      *
    440      * @since 1.5
    441      */
    442     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
    443 
    444     /**
    445      * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
    446      * encoding, {@code '\uDC00'}.
    447      *
    448      * @since 1.5
    449      */
    450     public static final char MIN_LOW_SURROGATE = '\uDC00';
    451 
    452     /**
    453      * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
    454      * encoding, {@code '\uDFFF'}.
    455      *
    456      * @since 1.5
    457      */
    458     public static final char MAX_LOW_SURROGATE = '\uDFFF';
    459 
    460     /**
    461      * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
    462      *
    463      * @since 1.5
    464      */
    465     public static final char MIN_SURROGATE = '\uD800';
    466 
    467     /**
    468      * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
    469      *
    470      * @since 1.5
    471      */
    472     public static final char MAX_SURROGATE = '\uDFFF';
    473 
    474     /**
    475      * The minimum value of a supplementary code point, {@code U+010000}.
    476      *
    477      * @since 1.5
    478      */
    479     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
    480 
    481     /**
    482      * The minimum code point value, {@code U+0000}.
    483      *
    484      * @since 1.5
    485      */
    486     public static final int MIN_CODE_POINT = 0x000000;
    487 
    488     /**
    489      * The maximum code point value, {@code U+10FFFF}.
    490      *
    491      * @since 1.5
    492      */
    493     public static final int MAX_CODE_POINT = 0x10FFFF;
    494 
    495     /**
    496      * The number of bits required to represent a {@code Character} value
    497      * unsigned form.
    498      *
    499      * @since 1.5
    500      */
    501     public static final int SIZE = 16;
    502 
    503     private static final byte[] DIRECTIONALITY = new byte[] {
    504             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
    505             DIRECTIONALITY_EUROPEAN_NUMBER,
    506             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
    507             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
    508             DIRECTIONALITY_ARABIC_NUMBER,
    509             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
    510             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
    511             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
    512             DIRECTIONALITY_OTHER_NEUTRALS,
    513             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
    514             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
    515             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
    516             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
    517             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
    518             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
    519             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
    520 
    521     /*
    522      * Represents a subset of the Unicode character set.
    523      */
    524     public static class Subset {
    525         private final String name;
    526 
    527         /**
    528          * Constructs a new {@code Subset}.
    529          */
    530         protected Subset(String name) {
    531             if (name == null) {
    532                 throw new NullPointerException("name == null");
    533             }
    534             this.name = name;
    535         }
    536 
    537         /**
    538          * Compares this character subset for identity with the specified object.
    539          */
    540         @Override public final boolean equals(Object object) {
    541             return object == this;
    542         }
    543 
    544         /**
    545          * Returns this subset's hash code, which is the hash code computed by
    546          *         {@link java.lang.Object#hashCode()}.
    547          */
    548         @Override public final int hashCode() {
    549             return super.hashCode();
    550         }
    551 
    552         /**
    553          * Returns this subset's name.
    554          */
    555         @Override public final String toString() {
    556             return name;
    557         }
    558     }
    559 
    560     /**
    561      * Represents a block of Unicode characters. This class provides constants for various
    562      * well-known blocks (but not all blocks) and methods for looking up a block
    563      * by name {@link #forName} or by code point {@link #of}.
    564      *
    565      * @since 1.2
    566      */
    567     public static final class UnicodeBlock extends Subset {
    568         /**
    569          * The Surrogates Area Unicode block.
    570          *
    571          * @deprecated As of Java 5, this block has been replaced by
    572          *             {@link #HIGH_SURROGATES},
    573          *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
    574          *             {@link #LOW_SURROGATES}.
    575          */
    576         @Deprecated
    577         public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA");
    578 
    579         /** The Basic Latin Unicode block. */
    580         public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN");
    581 
    582         /** The Latin-1 Supplement Unicode block. */
    583         public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT");
    584 
    585         /** The Latin Extended-A Unicode block. */
    586         public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A");
    587 
    588         /** The Latin Extended-B Unicode block. */
    589         public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B");
    590 
    591         /** The IPA Extensions Unicode block. */
    592         public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS");
    593 
    594         /** The Spacing Modifier Letters Unicode block. */
    595         public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS");
    596 
    597         /** The Combining Diacritical Marks Unicode block. */
    598         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS");
    599 
    600         /**
    601          * The Greek and Coptic Unicode block. Previously referred to as Greek.
    602          */
    603         public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK");
    604 
    605         /** The Cyrillic Unicode block. */
    606         public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC");
    607 
    608         /**
    609          * The Cyrillic Supplement Unicode block. Previously referred to as Cyrillic Supplementary.
    610          */
    611         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY");
    612 
    613         /** The Armenian Unicode block. */
    614         public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN");
    615 
    616         /** The Hebrew Unicode block. */
    617         public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW");
    618 
    619         /** The Arabic Unicode block. */
    620         public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC");
    621 
    622         /** The Syriac Unicode block. */
    623         public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC");
    624 
    625         /** The Thaana Unicode block. */
    626         public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA");
    627 
    628         /** The Devanagari Unicode block. */
    629         public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI");
    630 
    631         /** The Bengali Unicode block. */
    632         public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI");
    633 
    634         /** The Gurmukhi Unicode block. */
    635         public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI");
    636 
    637         /** The Gujarati Unicode block. */
    638         public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI");
    639 
    640         /** The Oriya Unicode block. */
    641         public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA");
    642 
    643         /** The Tamil Unicode block. */
    644         public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL");
    645 
    646         /** The Telugu Unicode block. */
    647         public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU");
    648 
    649         /** The Kannada Unicode block. */
    650         public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA");
    651 
    652         /** The Malayalam Unicode block. */
    653         public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM");
    654 
    655         /** The Sinhala Unicode block. */
    656         public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA");
    657 
    658         /** The Thai Unicode block. */
    659         public static final UnicodeBlock THAI = new UnicodeBlock("THAI");
    660 
    661         /** The Lao Unicode block. */
    662         public static final UnicodeBlock LAO = new UnicodeBlock("LAO");
    663 
    664         /** The Tibetan Unicode block. */
    665         public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN");
    666 
    667         /** The Myanmar Unicode block. */
    668         public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR");
    669 
    670         /** The Georgian Unicode block. */
    671         public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN");
    672 
    673         /** The Hangul Jamo Unicode block. */
    674         public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO");
    675 
    676         /** The Ethiopic Unicode block. */
    677         public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC");
    678 
    679         /** The Cherokee Unicode block. */
    680         public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE");
    681 
    682         /** The Unified Canadian Aboriginal Syllabics Unicode block. */
    683         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
    684 
    685         /** The Ogham Unicode block. */
    686         public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM");
    687 
    688         /** The Runic Unicode block. */
    689         public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC");
    690 
    691         /** The Tagalog Unicode block. */
    692         public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG");
    693 
    694         /** The Hanunoo Unicode block. */
    695         public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO");
    696 
    697         /** The Buhid Unicode block. */
    698         public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID");
    699 
    700         /** The Tagbanwa Unicode block. */
    701         public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA");
    702 
    703         /** The Khmer Unicode block. */
    704         public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER");
    705 
    706         /** The Mongolian Unicode block. */
    707         public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN");
    708 
    709         /** The Limbu Unicode block. */
    710         public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU");
    711 
    712         /** The Tai Le Unicode block. */
    713         public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE");
    714 
    715         /** The Khmer Symbols Unicode block. */
    716         public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS");
    717 
    718         /** The Phonetic Extensions Unicode block. */
    719         public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS");
    720 
    721         /** The Latin Extended Additional Unicode block. */
    722         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL");
    723 
    724         /** The Greek Extended Unicode block. */
    725         public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED");
    726 
    727         /** The General Punctuation Unicode block. */
    728         public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION");
    729 
    730         /** The Superscripts and Subscripts Unicode block. */
    731         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS");
    732 
    733         /** The Currency Symbols Unicode block. */
    734         public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS");
    735 
    736         /**
    737          * The Combining Diacritical Marks for Symbols Unicode
    738          * Block. Previously referred to as Combining Marks for
    739          * Symbols.
    740          */
    741         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS");
    742 
    743         /** The Letterlike Symbols Unicode block. */
    744         public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS");
    745 
    746         /** The Number Forms Unicode block. */
    747         public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS");
    748 
    749         /** The Arrows Unicode block. */
    750         public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS");
    751 
    752         /** The Mathematical Operators Unicode block. */
    753         public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS");
    754 
    755         /** The Miscellaneous Technical Unicode block. */
    756         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL");
    757 
    758         /** The Control Pictures Unicode block. */
    759         public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES");
    760 
    761         /** The Optical Character Recognition Unicode block. */
    762         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION");
    763 
    764         /** The Enclosed Alphanumerics Unicode block. */
    765         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS");
    766 
    767         /** The Box Drawing Unicode block. */
    768         public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING");
    769 
    770         /** The Block Elements Unicode block. */
    771         public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS");
    772 
    773         /** The Geometric Shapes Unicode block. */
    774         public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES");
    775 
    776         /** The Miscellaneous Symbols Unicode block. */
    777         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS");
    778 
    779         /** The Dingbats Unicode block. */
    780         public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS");
    781 
    782         /** The Miscellaneous Mathematical Symbols-A Unicode block. */
    783         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A");
    784 
    785         /** The Supplemental Arrows-A Unicode block. */
    786         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A");
    787 
    788         /** The Braille Patterns Unicode block. */
    789         public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS");
    790 
    791         /** The Supplemental Arrows-B Unicode block. */
    792         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B");
    793 
    794         /** The Miscellaneous Mathematical Symbols-B Unicode block. */
    795         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B");
    796 
    797         /** The Supplemental Mathematical Operators Unicode block. */
    798         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS");
    799 
    800         /** The Miscellaneous Symbols and Arrows Unicode block. */
    801         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS");
    802 
    803         /** The CJK Radicals Supplement Unicode block. */
    804         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT");
    805 
    806         /** The Kangxi Radicals Unicode block. */
    807         public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS");
    808 
    809         /** The Ideographic Description Characters Unicode block. */
    810         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
    811 
    812         /** The CJK Symbols and Punctuation Unicode block. */
    813         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION");
    814 
    815         /** The Hiragana Unicode block. */
    816         public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA");
    817 
    818         /** The Katakana Unicode block. */
    819         public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA");
    820 
    821         /** The Bopomofo Unicode block. */
    822         public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO");
    823 
    824         /** The Hangul Compatibility Jamo Unicode block. */
    825         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO");
    826 
    827         /** The Kanbun Unicode block. */
    828         public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN");
    829 
    830         /** The Bopomofo Extended Unicode block. */
    831         public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED");
    832 
    833         /** The Katakana Phonetic Extensions Unicode block. */
    834         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS");
    835 
    836         /** The Enclosed CJK Letters and Months Unicode block. */
    837         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS");
    838 
    839         /** The CJK Compatibility Unicode block. */
    840         public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY");
    841 
    842         /** The CJK Unified Ideographs Extension A Unicode block. */
    843         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
    844 
    845         /** The Yijing Hexagram Symbols Unicode block. */
    846         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS");
    847 
    848         /** The CJK Unified Ideographs Unicode block. */
    849         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS");
    850 
    851         /** The Yi Syllables Unicode block. */
    852         public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES");
    853 
    854         /** The Yi Radicals Unicode block. */
    855         public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS");
    856 
    857         /** The Hangul Syllables Unicode block. */
    858         public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES");
    859 
    860         /**
    861          * The High Surrogates Unicode block. This block represents
    862          * code point values in the high surrogate range 0xD800 to 0xDB7F
    863          */
    864         public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES");
    865 
    866         /**
    867          * The High Private Use Surrogates Unicode block. This block
    868          * represents code point values in the high surrogate range 0xDB80 to
    869          * 0xDBFF
    870          */
    871         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES");
    872 
    873         /**
    874          * The Low Surrogates Unicode block. This block represents
    875          * code point values in the low surrogate range 0xDC00 to 0xDFFF
    876          */
    877         public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES");
    878 
    879         /** The Private Use Area Unicode block. */
    880         public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA");
    881 
    882         /** The CJK Compatibility Ideographs Unicode block. */
    883         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS");
    884 
    885         /** The Alphabetic Presentation Forms Unicode block. */
    886         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS");
    887 
    888         /** The Arabic Presentation Forms-A Unicode block. */
    889         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A");
    890 
    891         /** The Variation Selectors Unicode block. */
    892         public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS");
    893 
    894         /** The Combining Half Marks Unicode block. */
    895         public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS");
    896 
    897         /** The CJK Compatibility Forms Unicode block. */
    898         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS");
    899 
    900         /** The Small Form Variants Unicode block. */
    901         public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS");
    902 
    903         /** The Arabic Presentation Forms-B Unicode block. */
    904         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B");
    905 
    906         /** The Halfwidth and Fullwidth Forms Unicode block. */
    907         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS");
    908 
    909         /** The Specials Unicode block. */
    910         public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS");
    911 
    912         /** The Linear B Syllabary Unicode block. */
    913         public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY");
    914 
    915         /** The Linear B Ideograms Unicode block. */
    916         public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS");
    917 
    918         /** The Aegean Numbers Unicode block. */
    919         public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS");
    920 
    921         /** The Old Italic Unicode block. */
    922         public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC");
    923 
    924         /** The Gothic Unicode block. */
    925         public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
    926 
    927         /** The Ugaritic Unicode block. */
    928         public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
    929 
    930         /** The Deseret Unicode block. */
    931         public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
    932 
    933         /** The Shavian Unicode block. */
    934         public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
    935 
    936         /** The Osmanya Unicode block. */
    937         public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
    938 
    939         /** The Cypriot Syllabary Unicode block. */
    940         public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY");
    941 
    942         /** The Byzantine Musical Symbols Unicode block. */
    943         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS");
    944 
    945         /** The Musical Symbols Unicode block. */
    946         public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS");
    947 
    948         /** The Tai Xuan Jing Symbols Unicode block. */
    949         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS");
    950 
    951         /** The Mathematical Alphanumeric Symbols Unicode block. */
    952         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS");
    953 
    954         /** The CJK Unified Ideographs Extension B Unicode block. */
    955         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B");
    956 
    957         /** The CJK Compatibility Ideographs Supplement Unicode block. */
    958         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT");
    959 
    960         /** The Tags Unicode block. */
    961         public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
    962 
    963         /** The Variation Selectors Supplement Unicode block. */
    964         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT");
    965 
    966         /** The Supplementary Private Use Area-A Unicode block. */
    967         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A");
    968 
    969         /** The Supplementary Private Use Area-B Unicode block. */
    970         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B");
    971 
    972         // Unicode 4.1.
    973 
    974         /** The Ancient Greek Musical Notation Unicode 4.1 block. */
    975         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION");
    976 
    977         /** The Ancient Greek Numbers Unicode 4.1 block. */
    978         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock("ANCIENT_GREEK_NUMBERS");
    979 
    980         /** The Arabic Supplement Unicode 4.1 block. */
    981         public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock("ARABIC_SUPPLEMENT");
    982 
    983         /** The Buginese Unicode 4.1 block. */
    984         public static final UnicodeBlock BUGINESE = new UnicodeBlock("BUGINESE");
    985 
    986         /** The CJK Strokes Unicode 4.1 block. */
    987         public static final UnicodeBlock CJK_STROKES = new UnicodeBlock("CJK_STROKES");
    988 
    989         /** The Combining Diacritical Marks Supplement Unicode 4.1 block. */
    990         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT");
    991 
    992         /** The Coptic Unicode 4.1 block. */
    993         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC");
    994 
    995         /** The Ethiopic Extended Unicode 4.1 block. */
    996         public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock("ETHIOPIC_EXTENDED");
    997 
    998         /** The Ethiopic Supplement Unicode 4.1 block. */
    999         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock("ETHIOPIC_SUPPLEMENT");
   1000 
   1001         /** The Georgian Supplement Unicode 4.1 block. */
   1002         public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock("GEORGIAN_SUPPLEMENT");
   1003 
   1004         /** The Glagolitic Unicode 4.1 block. */
   1005         public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock("GLAGOLITIC");
   1006 
   1007         /** The Kharoshthi Unicode 4.1 block. */
   1008         public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock("KHAROSHTHI");
   1009 
   1010         /** The Modifier Tone Letters Unicode 4.1 block. */
   1011         public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock("MODIFIER_TONE_LETTERS");
   1012 
   1013         /** The New Tai Lue Unicode 4.1 block. */
   1014         public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock("NEW_TAI_LUE");
   1015 
   1016         /** The Old Persian Unicode 4.1 block. */
   1017         public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock("OLD_PERSIAN");
   1018 
   1019         /** The Phonetic Extensions Supplement Unicode 4.1 block. */
   1020         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT");
   1021 
   1022         /** The Supplemental Punctuation Unicode 4.1 block. */
   1023         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION");
   1024 
   1025         /** The Syloti Nagri Unicode 4.1 block. */
   1026         public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock("SYLOTI_NAGRI");
   1027 
   1028         /** The Tifinagh Unicode 4.1 block. */
   1029         public static final UnicodeBlock TIFINAGH = new UnicodeBlock("TIFINAGH");
   1030 
   1031         /** The Vertical Forms Unicode 4.1 block. */
   1032         public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock("VERTICAL_FORMS");
   1033 
   1034         // Unicode 5.0.
   1035 
   1036         /** The NKo Unicode 5.0 block. */
   1037         public static final UnicodeBlock NKO = new UnicodeBlock("NKO");
   1038 
   1039         /** The Balinese Unicode 5.0 block. */
   1040         public static final UnicodeBlock BALINESE = new UnicodeBlock("BALINESE");
   1041 
   1042         /** The Latin Extended C Unicode 5.0 block. */
   1043         public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock("LATIN_EXTENDED_C");
   1044 
   1045         /** The Latin Extended D Unicode 5.0 block. */
   1046         public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock("LATIN_EXTENDED_D");
   1047 
   1048         /** The Phags-pa Unicode 5.0 block. */
   1049         public static final UnicodeBlock PHAGS_PA = new UnicodeBlock("PHAGS_PA");
   1050 
   1051         /** The Phoenician Unicode 5.0 block. */
   1052         public static final UnicodeBlock PHOENICIAN = new UnicodeBlock("PHOENICIAN");
   1053 
   1054         /** The Cuneiform Unicode 5.0 block. */
   1055         public static final UnicodeBlock CUNEIFORM = new UnicodeBlock("CUNEIFORM");
   1056 
   1057         /** The Cuneiform Numbers And Punctuation Unicode 5.0 block. */
   1058         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION");
   1059 
   1060         /** The Counting Rod Numerals Unicode 5.0 block. */
   1061         public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock("COUNTING_ROD_NUMERALS");
   1062 
   1063         // Unicode 5.1.
   1064 
   1065         /** The Sudanese Unicode 5.1 block. */
   1066         public static final UnicodeBlock SUNDANESE = new UnicodeBlock("SUNDANESE");
   1067 
   1068         /** The Lepcha Unicode 5.1 block. */
   1069         public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA");
   1070 
   1071         /** The Ol Chiki Unicode 5.1 block. */
   1072         public static final UnicodeBlock OL_CHIKI = new UnicodeBlock("OL_CHIKI");
   1073 
   1074         /** The Cyrillic Extended-A Unicode 5.1 block. */
   1075         public static final UnicodeBlock CYRILLIC_EXTENDED_A = new UnicodeBlock("CYRILLIC_EXTENDED_A");
   1076 
   1077         /** The Vai Unicode 5.1 block. */
   1078         public static final UnicodeBlock VAI = new UnicodeBlock("VAI");
   1079 
   1080         /** The Cyrillic Extended-B Unicode 5.1 block. */
   1081         public static final UnicodeBlock CYRILLIC_EXTENDED_B = new UnicodeBlock("CYRILLIC_EXTENDED_B");
   1082 
   1083         /** The Saurashtra Unicode 5.1 block. */
   1084         public static final UnicodeBlock SAURASHTRA = new UnicodeBlock("SAURASHTRA");
   1085 
   1086         /** The Kayah Li Unicode 5.1 block. */
   1087         public static final UnicodeBlock KAYAH_LI = new UnicodeBlock("KAYAH_LI");
   1088 
   1089         /** The Rejang Unicode 5.1 block. */
   1090         public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG");
   1091 
   1092         /** The Cham Unicode 5.1 block. */
   1093         public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM");
   1094 
   1095         /** The Ancient Symbols Unicode 5.1 block. */
   1096         public static final UnicodeBlock ANCIENT_SYMBOLS = new UnicodeBlock("ANCIENT_SYMBOLS");
   1097 
   1098         /** The Phaistos Disc Unicode 5.1 block. */
   1099         public static final UnicodeBlock PHAISTOS_DISC = new UnicodeBlock("PHAISTOS_DISC");
   1100 
   1101         /** The Lycian Unicode 5.1 block. */
   1102         public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN");
   1103 
   1104         /** The Carian Unicode 5.1 block. */
   1105         public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN");
   1106 
   1107         /** The Lydian Unicode 5.1 block. */
   1108         public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN");
   1109 
   1110         /** The Mahjong Tiles Unicode 5.1 block. */
   1111         public static final UnicodeBlock MAHJONG_TILES = new UnicodeBlock("MAHJONG_TILES");
   1112 
   1113         /** The Domino Tiles Unicode 5.1 block. */
   1114         public static final UnicodeBlock DOMINO_TILES = new UnicodeBlock("DOMINO_TILES");
   1115 
   1116         // Unicode 5.2.
   1117 
   1118         /** The Samaritan Unicode 5.2 block. */
   1119         public static final UnicodeBlock SAMARITAN = new UnicodeBlock("SAMARITAN");
   1120 
   1121         /** The Unified Canadian Aboriginal Syllabics Expanded Unicode 5.2 block. */
   1122         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED");
   1123 
   1124         /** The Tai Tham Unicode 5.2 block. */
   1125         public static final UnicodeBlock TAI_THAM = new UnicodeBlock("TAI_THAM");
   1126 
   1127         /** The Vedic Extensions Unicode 5.2 block. */
   1128         public static final UnicodeBlock VEDIC_EXTENSIONS = new UnicodeBlock("VEDIC_EXTENSIONS");
   1129 
   1130         /** The Lisu Extensions Unicode 5.2 block. */
   1131         public static final UnicodeBlock LISU = new UnicodeBlock("LISU");
   1132 
   1133         /** The Bamum Extensions Unicode 5.2 block. */
   1134         public static final UnicodeBlock BAMUM = new UnicodeBlock("BAMUM");
   1135 
   1136         /** The Common Indic Number Forms Unicode 5.2 block. */
   1137         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS");
   1138 
   1139         /** The Devanagari Extended Unicode 5.2 block. */
   1140         public static final UnicodeBlock DEVANAGARI_EXTENDED = new UnicodeBlock("DEVANAGARI_EXTENDED");
   1141 
   1142         /** The Hangul Jamo Extended-A Unicode 5.2 block. */
   1143         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = new UnicodeBlock("HANGUL_JAMO_EXTENDED_A");
   1144 
   1145         /** The Javanese Unicode 5.2 block. */
   1146         public static final UnicodeBlock JAVANESE = new UnicodeBlock("JAVANESE");
   1147 
   1148         /** The Myanmar Extended-A Unicode 5.2 block. */
   1149         public static final UnicodeBlock MYANMAR_EXTENDED_A = new UnicodeBlock("MYANMAR_EXTENDED_A");
   1150 
   1151         /** The Tai Viet Unicode 5.2 block. */
   1152         public static final UnicodeBlock TAI_VIET = new UnicodeBlock("TAI_VIET");
   1153 
   1154         /** The Meetei Mayek Unicode 5.2 block. */
   1155         public static final UnicodeBlock MEETEI_MAYEK = new UnicodeBlock("MEETEI_MAYEK");
   1156 
   1157         /** The Hangul Jamo Extended-B Unicode 5.2 block. */
   1158         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = new UnicodeBlock("HANGUL_JAMO_EXTENDED_B");
   1159 
   1160         /** The Imperial Aramaic Unicode 5.2 block. */
   1161         public static final UnicodeBlock IMPERIAL_ARAMAIC = new UnicodeBlock("IMPERIAL_ARAMAIC");
   1162 
   1163         /** The Old South Arabian Unicode 5.2 block. */
   1164         public static final UnicodeBlock OLD_SOUTH_ARABIAN = new UnicodeBlock("OLD_SOUTH_ARABIAN");
   1165 
   1166         /** The Avestan Unicode 5.2 block. */
   1167         public static final UnicodeBlock AVESTAN = new UnicodeBlock("AVESTAN");
   1168 
   1169         /** The Inscriptional Pathian Unicode 5.2 block. */
   1170         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = new UnicodeBlock("INSCRIPTIONAL_PARTHIAN");
   1171 
   1172         /** The Inscriptional Pahlavi Unicode 5.2 block. */
   1173         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = new UnicodeBlock("INSCRIPTIONAL_PAHLAVI");
   1174 
   1175         /** The Old Turkic Unicode 5.2 block. */
   1176         public static final UnicodeBlock OLD_TURKIC = new UnicodeBlock("OLD_TURKIC");
   1177 
   1178         /** The Rumi Numeral Symbols Unicode 5.2 block. */
   1179         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = new UnicodeBlock("RUMI_NUMERAL_SYMBOLS");
   1180 
   1181         /** The Kaithi Unicode 5.2 block. */
   1182         public static final UnicodeBlock KAITHI = new UnicodeBlock("KAITHI");
   1183 
   1184         /** The Egyptian Hieroglyphs Unicode 5.2 block. */
   1185         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = new UnicodeBlock("EGYPTIAN_HIEROGLYPHS");
   1186 
   1187         /** The Enclosed Alphanumeric Supplement Unicode 5.2 block. */
   1188         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT");
   1189 
   1190         /** The Enclosed Ideographic Supplement Unicode 5.2 block. */
   1191         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT");
   1192 
   1193         /** The CJK Unified Ideographs Unicode 5.2 block. */
   1194         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C");
   1195 
   1196         // Unicode 6.0.
   1197 
   1198         /** The Mandaic Unicode 6.0 block. */
   1199         public static final UnicodeBlock MANDAIC = new UnicodeBlock("MANDAIC");
   1200 
   1201         /** The Batak Unicode 6.0 block. */
   1202         public static final UnicodeBlock BATAK = new UnicodeBlock("BATAK");
   1203 
   1204         /** The Ethiopic Extended-A Unicode 6.0 block. */
   1205         public static final UnicodeBlock ETHIOPIC_EXTENDED_A = new UnicodeBlock("ETHIOPIC_EXTENDED_A");
   1206 
   1207         /** The Brahmi Unicode 6.0 block. */
   1208         public static final UnicodeBlock BRAHMI = new UnicodeBlock("BRAHMI");
   1209 
   1210         /** The Bamum Supplement Unicode 6.0 block. */
   1211         public static final UnicodeBlock BAMUM_SUPPLEMENT = new UnicodeBlock("BAMUM_SUPPLEMENT");
   1212 
   1213         /** The Kana Supplement Unicode 6.0 block. */
   1214         public static final UnicodeBlock KANA_SUPPLEMENT = new UnicodeBlock("KANA_SUPPLEMENT");
   1215 
   1216         /** The Playing Cards Supplement Unicode 6.0 block. */
   1217         public static final UnicodeBlock PLAYING_CARDS = new UnicodeBlock("PLAYING_CARDS");
   1218 
   1219         /** The Miscellaneous Symbols And Pictographs Supplement Unicode 6.0 block. */
   1220         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS");
   1221 
   1222         /** The Emoticons Unicode 6.0 block. */
   1223         public static final UnicodeBlock EMOTICONS = new UnicodeBlock("EMOTICONS");
   1224 
   1225         /** The Transport And Map Symbols Unicode 6.0 block. */
   1226         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS");
   1227 
   1228         /** The Alchemical Symbols Unicode 6.0 block. */
   1229         public static final UnicodeBlock ALCHEMICAL_SYMBOLS = new UnicodeBlock("ALCHEMICAL_SYMBOLS");
   1230 
   1231         /** The CJK Unified Ideographs Extension-D Unicode 6.0 block. */
   1232         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D");
   1233 
   1234         /*
   1235          * All of the UnicodeBlocks above, in the icu4c UBlock enum order.
   1236          */
   1237         private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] {
   1238             null, // icu4c numbers blocks starting at 1, so index 0 should be null.
   1239 
   1240             UnicodeBlock.BASIC_LATIN,
   1241             UnicodeBlock.LATIN_1_SUPPLEMENT,
   1242             UnicodeBlock.LATIN_EXTENDED_A,
   1243             UnicodeBlock.LATIN_EXTENDED_B,
   1244             UnicodeBlock.IPA_EXTENSIONS,
   1245             UnicodeBlock.SPACING_MODIFIER_LETTERS,
   1246             UnicodeBlock.COMBINING_DIACRITICAL_MARKS,
   1247             UnicodeBlock.GREEK,
   1248             UnicodeBlock.CYRILLIC,
   1249             UnicodeBlock.ARMENIAN,
   1250             UnicodeBlock.HEBREW,
   1251             UnicodeBlock.ARABIC,
   1252             UnicodeBlock.SYRIAC,
   1253             UnicodeBlock.THAANA,
   1254             UnicodeBlock.DEVANAGARI,
   1255             UnicodeBlock.BENGALI,
   1256             UnicodeBlock.GURMUKHI,
   1257             UnicodeBlock.GUJARATI,
   1258             UnicodeBlock.ORIYA,
   1259             UnicodeBlock.TAMIL,
   1260             UnicodeBlock.TELUGU,
   1261             UnicodeBlock.KANNADA,
   1262             UnicodeBlock.MALAYALAM,
   1263             UnicodeBlock.SINHALA,
   1264             UnicodeBlock.THAI,
   1265             UnicodeBlock.LAO,
   1266             UnicodeBlock.TIBETAN,
   1267             UnicodeBlock.MYANMAR,
   1268             UnicodeBlock.GEORGIAN,
   1269             UnicodeBlock.HANGUL_JAMO,
   1270             UnicodeBlock.ETHIOPIC,
   1271             UnicodeBlock.CHEROKEE,
   1272             UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
   1273             UnicodeBlock.OGHAM,
   1274             UnicodeBlock.RUNIC,
   1275             UnicodeBlock.KHMER,
   1276             UnicodeBlock.MONGOLIAN,
   1277             UnicodeBlock.LATIN_EXTENDED_ADDITIONAL,
   1278             UnicodeBlock.GREEK_EXTENDED,
   1279             UnicodeBlock.GENERAL_PUNCTUATION,
   1280             UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS,
   1281             UnicodeBlock.CURRENCY_SYMBOLS,
   1282             UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS,
   1283             UnicodeBlock.LETTERLIKE_SYMBOLS,
   1284             UnicodeBlock.NUMBER_FORMS,
   1285             UnicodeBlock.ARROWS,
   1286             UnicodeBlock.MATHEMATICAL_OPERATORS,
   1287             UnicodeBlock.MISCELLANEOUS_TECHNICAL,
   1288             UnicodeBlock.CONTROL_PICTURES,
   1289             UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION,
   1290             UnicodeBlock.ENCLOSED_ALPHANUMERICS,
   1291             UnicodeBlock.BOX_DRAWING,
   1292             UnicodeBlock.BLOCK_ELEMENTS,
   1293             UnicodeBlock.GEOMETRIC_SHAPES,
   1294             UnicodeBlock.MISCELLANEOUS_SYMBOLS,
   1295             UnicodeBlock.DINGBATS,
   1296             UnicodeBlock.BRAILLE_PATTERNS,
   1297             UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
   1298             UnicodeBlock.KANGXI_RADICALS,
   1299             UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
   1300             UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
   1301             UnicodeBlock.HIRAGANA,
   1302             UnicodeBlock.KATAKANA,
   1303             UnicodeBlock.BOPOMOFO,
   1304             UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
   1305             UnicodeBlock.KANBUN,
   1306             UnicodeBlock.BOPOMOFO_EXTENDED,
   1307             UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS,
   1308             UnicodeBlock.CJK_COMPATIBILITY,
   1309             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
   1310             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
   1311             UnicodeBlock.YI_SYLLABLES,
   1312             UnicodeBlock.YI_RADICALS,
   1313             UnicodeBlock.HANGUL_SYLLABLES,
   1314             UnicodeBlock.HIGH_SURROGATES,
   1315             UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES,
   1316             UnicodeBlock.LOW_SURROGATES,
   1317             UnicodeBlock.PRIVATE_USE_AREA,
   1318             UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
   1319             UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS,
   1320             UnicodeBlock.ARABIC_PRESENTATION_FORMS_A,
   1321             UnicodeBlock.COMBINING_HALF_MARKS,
   1322             UnicodeBlock.CJK_COMPATIBILITY_FORMS,
   1323             UnicodeBlock.SMALL_FORM_VARIANTS,
   1324             UnicodeBlock.ARABIC_PRESENTATION_FORMS_B,
   1325             UnicodeBlock.SPECIALS,
   1326             UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
   1327 
   1328             // Unicode 3.1.
   1329             UnicodeBlock.OLD_ITALIC,
   1330             UnicodeBlock.GOTHIC,
   1331             UnicodeBlock.DESERET,
   1332             UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS,
   1333             UnicodeBlock.MUSICAL_SYMBOLS,
   1334             UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
   1335             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
   1336             UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
   1337             UnicodeBlock.TAGS,
   1338 
   1339             // Unicode 3.2.
   1340             UnicodeBlock.CYRILLIC_SUPPLEMENTARY,
   1341             UnicodeBlock.TAGALOG,
   1342             UnicodeBlock.HANUNOO,
   1343             UnicodeBlock.BUHID,
   1344             UnicodeBlock.TAGBANWA,
   1345             UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
   1346             UnicodeBlock.SUPPLEMENTAL_ARROWS_A,
   1347             UnicodeBlock.SUPPLEMENTAL_ARROWS_B,
   1348             UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
   1349             UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
   1350             UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
   1351             UnicodeBlock.VARIATION_SELECTORS,
   1352             UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
   1353             UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B,
   1354 
   1355             // Unicode 4.0.
   1356             UnicodeBlock.LIMBU,
   1357             UnicodeBlock.TAI_LE,
   1358             UnicodeBlock.KHMER_SYMBOLS,
   1359             UnicodeBlock.PHONETIC_EXTENSIONS,
   1360             UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS,
   1361             UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS,
   1362             UnicodeBlock.LINEAR_B_SYLLABARY,
   1363             UnicodeBlock.LINEAR_B_IDEOGRAMS,
   1364             UnicodeBlock.AEGEAN_NUMBERS,
   1365             UnicodeBlock.UGARITIC,
   1366             UnicodeBlock.SHAVIAN,
   1367             UnicodeBlock.OSMANYA,
   1368             UnicodeBlock.CYPRIOT_SYLLABARY,
   1369             UnicodeBlock.TAI_XUAN_JING_SYMBOLS,
   1370             UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT,
   1371 
   1372             // Unicode 4.1.
   1373             UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION,
   1374             UnicodeBlock.ANCIENT_GREEK_NUMBERS,
   1375             UnicodeBlock.ARABIC_SUPPLEMENT,
   1376             UnicodeBlock.BUGINESE,
   1377             UnicodeBlock.CJK_STROKES,
   1378             UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
   1379             UnicodeBlock.COPTIC,
   1380             UnicodeBlock.ETHIOPIC_EXTENDED,
   1381             UnicodeBlock.ETHIOPIC_SUPPLEMENT,
   1382             UnicodeBlock.GEORGIAN_SUPPLEMENT,
   1383             UnicodeBlock.GLAGOLITIC,
   1384             UnicodeBlock.KHAROSHTHI,
   1385             UnicodeBlock.MODIFIER_TONE_LETTERS,
   1386             UnicodeBlock.NEW_TAI_LUE,
   1387             UnicodeBlock.OLD_PERSIAN,
   1388             UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT,
   1389             UnicodeBlock.SUPPLEMENTAL_PUNCTUATION,
   1390             UnicodeBlock.SYLOTI_NAGRI,
   1391             UnicodeBlock.TIFINAGH,
   1392             UnicodeBlock.VERTICAL_FORMS,
   1393 
   1394             // Unicode 5.0.
   1395             UnicodeBlock.NKO,
   1396             UnicodeBlock.BALINESE,
   1397             UnicodeBlock.LATIN_EXTENDED_C,
   1398             UnicodeBlock.LATIN_EXTENDED_D,
   1399             UnicodeBlock.PHAGS_PA,
   1400             UnicodeBlock.PHOENICIAN,
   1401             UnicodeBlock.CUNEIFORM,
   1402             UnicodeBlock.CUNEIFORM_NUMBERS_AND_PUNCTUATION,
   1403             UnicodeBlock.COUNTING_ROD_NUMERALS,
   1404 
   1405             // Unicode 5.1.
   1406             UnicodeBlock.SUNDANESE,
   1407             UnicodeBlock.LEPCHA,
   1408             UnicodeBlock.OL_CHIKI,
   1409             UnicodeBlock.CYRILLIC_EXTENDED_A,
   1410             UnicodeBlock.VAI,
   1411             UnicodeBlock.CYRILLIC_EXTENDED_B,
   1412             UnicodeBlock.SAURASHTRA,
   1413             UnicodeBlock.KAYAH_LI,
   1414             UnicodeBlock.REJANG,
   1415             UnicodeBlock.CHAM,
   1416             UnicodeBlock.ANCIENT_SYMBOLS,
   1417             UnicodeBlock.PHAISTOS_DISC,
   1418             UnicodeBlock.LYCIAN,
   1419             UnicodeBlock.CARIAN,
   1420             UnicodeBlock.LYDIAN,
   1421             UnicodeBlock.MAHJONG_TILES,
   1422             UnicodeBlock.DOMINO_TILES,
   1423 
   1424             // Unicode 5.2.
   1425             UnicodeBlock.SAMARITAN,
   1426             UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
   1427             UnicodeBlock.TAI_THAM,
   1428             UnicodeBlock.VEDIC_EXTENSIONS,
   1429             UnicodeBlock.LISU,
   1430             UnicodeBlock.BAMUM,
   1431             UnicodeBlock.COMMON_INDIC_NUMBER_FORMS,
   1432             UnicodeBlock.DEVANAGARI_EXTENDED,
   1433             UnicodeBlock.HANGUL_JAMO_EXTENDED_A,
   1434             UnicodeBlock.JAVANESE,
   1435             UnicodeBlock.MYANMAR_EXTENDED_A,
   1436             UnicodeBlock.TAI_VIET,
   1437             UnicodeBlock.MEETEI_MAYEK,
   1438             UnicodeBlock.HANGUL_JAMO_EXTENDED_B,
   1439             UnicodeBlock.IMPERIAL_ARAMAIC,
   1440             UnicodeBlock.OLD_SOUTH_ARABIAN,
   1441             UnicodeBlock.AVESTAN,
   1442             UnicodeBlock.INSCRIPTIONAL_PARTHIAN,
   1443             UnicodeBlock.INSCRIPTIONAL_PAHLAVI,
   1444             UnicodeBlock.OLD_TURKIC,
   1445             UnicodeBlock.RUMI_NUMERAL_SYMBOLS,
   1446             UnicodeBlock.KAITHI,
   1447             UnicodeBlock.EGYPTIAN_HIEROGLYPHS,
   1448             UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
   1449             UnicodeBlock.ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
   1450             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
   1451 
   1452             // Unicode 6.0.
   1453             UnicodeBlock.MANDAIC,
   1454             UnicodeBlock.BATAK,
   1455             UnicodeBlock.ETHIOPIC_EXTENDED_A,
   1456             UnicodeBlock.BRAHMI,
   1457             UnicodeBlock.BAMUM_SUPPLEMENT,
   1458             UnicodeBlock.KANA_SUPPLEMENT,
   1459             UnicodeBlock.PLAYING_CARDS,
   1460             UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
   1461             UnicodeBlock.EMOTICONS,
   1462             UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS,
   1463             UnicodeBlock.ALCHEMICAL_SYMBOLS,
   1464             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
   1465         };
   1466 
   1467         /**
   1468          * Returns the Unicode block for the given block name, or null if there is no
   1469          * such block.
   1470          *
   1471          * <p>Block names may be one of the following:
   1472          * <ul>
   1473          * <li>Canonical block name, as defined by the Unicode specification;
   1474          * case-insensitive.</li>
   1475          * <li>Canonical block name without any spaces, as defined by the
   1476          * Unicode specification; case-insensitive.</li>
   1477          * <li>A {@code UnicodeBlock} constant identifier. This is determined by
   1478          * converting the canonical name to uppercase and replacing all spaces and hyphens
   1479          * with underscores.</li>
   1480          * </ul>
   1481          *
   1482          * @throws NullPointerException
   1483          *             if {@code blockName == null}.
   1484          * @throws IllegalArgumentException
   1485          *             if {@code blockName} is not the name of any known block.
   1486          * @since 1.5
   1487          */
   1488         public static UnicodeBlock forName(String blockName) {
   1489             if (blockName == null) {
   1490                 throw new NullPointerException("blockName == null");
   1491             }
   1492             int block = unicodeBlockForName(blockName);
   1493             if (block == -1) {
   1494                 throw new IllegalArgumentException("Unknown block: " + blockName);
   1495             }
   1496             return BLOCKS[block];
   1497         }
   1498 
   1499         /**
   1500          * Returns the Unicode block containing the given code point, or null if the
   1501          * code point does not belong to any known block.
   1502          */
   1503         public static UnicodeBlock of(char c) {
   1504             return of((int) c);
   1505         }
   1506 
   1507         /**
   1508          * Returns the Unicode block containing the given code point, or null if the
   1509          * code point does not belong to any known block.
   1510          */
   1511         public static UnicodeBlock of(int codePoint) {
   1512             checkValidCodePoint(codePoint);
   1513             int block = unicodeBlockForCodePoint(codePoint);
   1514             if (block == -1 || block >= BLOCKS.length) {
   1515                 return null;
   1516             }
   1517             return BLOCKS[block];
   1518         }
   1519 
   1520         private UnicodeBlock(String blockName) {
   1521             super(blockName);
   1522         }
   1523     }
   1524 
   1525     private static native int unicodeBlockForName(String blockName);
   1526 
   1527     private static native int unicodeBlockForCodePoint(int codePoint);
   1528 
   1529     private static native int unicodeScriptForName(String blockName);
   1530 
   1531     private static native int unicodeScriptForCodePoint(int codePoint);
   1532 
   1533 
   1534     /**
   1535      * Constructs a new {@code Character} with the specified primitive char
   1536      * value.
   1537      *
   1538      * @param value
   1539      *            the primitive char value to store in the new instance.
   1540      */
   1541     public Character(char value) {
   1542         this.value = value;
   1543     }
   1544 
   1545     /**
   1546      * Gets the primitive value of this character.
   1547      *
   1548      * @return this object's primitive value.
   1549      */
   1550     public char charValue() {
   1551         return value;
   1552     }
   1553 
   1554     private static void checkValidCodePoint(int codePoint) {
   1555         if (!isValidCodePoint(codePoint)) {
   1556             throw new IllegalArgumentException("Invalid code point: " + codePoint);
   1557         }
   1558     }
   1559 
   1560     /**
   1561      * Compares this object to the specified character object to determine their
   1562      * relative order.
   1563      *
   1564      * @param c
   1565      *            the character object to compare this object to.
   1566      * @return {@code 0} if the value of this character and the value of
   1567      *         {@code c} are equal; a positive value if the value of this
   1568      *         character is greater than the value of {@code c}; a negative
   1569      *         value if the value of this character is less than the value of
   1570      *         {@code c}.
   1571      * @see java.lang.Comparable
   1572      * @since 1.2
   1573      */
   1574     public int compareTo(Character c) {
   1575         return compare(value, c.value);
   1576     }
   1577 
   1578     /**
   1579      * Compares two {@code char} values.
   1580      * @return 0 if lhs = rhs, less than 0 if lhs &lt; rhs, and greater than 0 if lhs &gt; rhs.
   1581      * @since 1.7
   1582      */
   1583     public static int compare(char lhs, char rhs) {
   1584         return lhs - rhs;
   1585     }
   1586 
   1587     /**
   1588      * Returns a {@code Character} instance for the {@code char} value passed.
   1589      * <p>
   1590      * If it is not necessary to get a new {@code Character} instance, it is
   1591      * recommended to use this method instead of the constructor, since it
   1592      * maintains a cache of instances which may result in better performance.
   1593      *
   1594      * @param c
   1595      *            the char value for which to get a {@code Character} instance.
   1596      * @return the {@code Character} instance for {@code c}.
   1597      * @since 1.5
   1598      */
   1599     public static Character valueOf(char c) {
   1600         return c < 128 ? SMALL_VALUES[c] : new Character(c);
   1601     }
   1602 
   1603     /**
   1604      * A cache of instances used by {@link #valueOf(char)} and auto-boxing
   1605      */
   1606     private static final Character[] SMALL_VALUES = new Character[128];
   1607 
   1608     static {
   1609         for (int i = 0; i < 128; i++) {
   1610             SMALL_VALUES[i] = new Character((char) i);
   1611         }
   1612     }
   1613     /**
   1614      * Indicates whether {@code codePoint} is a valid Unicode code point.
   1615      *
   1616      * @param codePoint
   1617      *            the code point to test.
   1618      * @return {@code true} if {@code codePoint} is a valid Unicode code point;
   1619      *         {@code false} otherwise.
   1620      * @since 1.5
   1621      */
   1622     public static boolean isValidCodePoint(int codePoint) {
   1623         return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
   1624     }
   1625 
   1626     /**
   1627      * Indicates whether {@code codePoint} is within the supplementary code
   1628      * point range.
   1629      *
   1630      * @param codePoint
   1631      *            the code point to test.
   1632      * @return {@code true} if {@code codePoint} is within the supplementary
   1633      *         code point range; {@code false} otherwise.
   1634      * @since 1.5
   1635      */
   1636     public static boolean isSupplementaryCodePoint(int codePoint) {
   1637         return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
   1638     }
   1639 
   1640     /**
   1641      * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
   1642      * that is used for representing supplementary characters in UTF-16
   1643      * encoding.
   1644      *
   1645      * @param ch
   1646      *            the character to test.
   1647      * @return {@code true} if {@code ch} is a high-surrogate code unit;
   1648      *         {@code false} otherwise.
   1649      * @see #isLowSurrogate(char)
   1650      * @since 1.5
   1651      */
   1652     public static boolean isHighSurrogate(char ch) {
   1653         return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
   1654     }
   1655 
   1656     /**
   1657      * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
   1658      * that is used for representing supplementary characters in UTF-16
   1659      * encoding.
   1660      *
   1661      * @param ch
   1662      *            the character to test.
   1663      * @return {@code true} if {@code ch} is a low-surrogate code unit;
   1664      *         {@code false} otherwise.
   1665      * @see #isHighSurrogate(char)
   1666      * @since 1.5
   1667      */
   1668     public static boolean isLowSurrogate(char ch) {
   1669         return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
   1670     }
   1671 
   1672     /**
   1673      * Returns true if the given character is a high or low surrogate.
   1674      * @since 1.7
   1675      */
   1676     public static boolean isSurrogate(char ch) {
   1677         return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
   1678     }
   1679 
   1680     /**
   1681      * Indicates whether the specified character pair is a valid surrogate pair.
   1682      *
   1683      * @param high
   1684      *            the high surrogate unit to test.
   1685      * @param low
   1686      *            the low surrogate unit to test.
   1687      * @return {@code true} if {@code high} is a high-surrogate code unit and
   1688      *         {@code low} is a low-surrogate code unit; {@code false}
   1689      *         otherwise.
   1690      * @see #isHighSurrogate(char)
   1691      * @see #isLowSurrogate(char)
   1692      * @since 1.5
   1693      */
   1694     public static boolean isSurrogatePair(char high, char low) {
   1695         return (isHighSurrogate(high) && isLowSurrogate(low));
   1696     }
   1697 
   1698     /**
   1699      * Calculates the number of {@code char} values required to represent the
   1700      * specified Unicode code point. This method checks if the {@code codePoint}
   1701      * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
   1702      * returned, otherwise {@code 1}. To test if the code point is valid, use
   1703      * the {@link #isValidCodePoint(int)} method.
   1704      *
   1705      * @param codePoint
   1706      *            the code point for which to calculate the number of required
   1707      *            chars.
   1708      * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
   1709      * @see #isValidCodePoint(int)
   1710      * @see #isSupplementaryCodePoint(int)
   1711      * @since 1.5
   1712      */
   1713     public static int charCount(int codePoint) {
   1714         return (codePoint >= 0x10000 ? 2 : 1);
   1715     }
   1716 
   1717     /**
   1718      * Converts a surrogate pair into a Unicode code point. This method assumes
   1719      * that the pair are valid surrogates. If the pair are <i>not</i> valid
   1720      * surrogates, then the result is indeterminate. The
   1721      * {@link #isSurrogatePair(char, char)} method should be used prior to this
   1722      * method to validate the pair.
   1723      *
   1724      * @param high
   1725      *            the high surrogate unit.
   1726      * @param low
   1727      *            the low surrogate unit.
   1728      * @return the Unicode code point corresponding to the surrogate unit pair.
   1729      * @see #isSurrogatePair(char, char)
   1730      * @since 1.5
   1731      */
   1732     public static int toCodePoint(char high, char low) {
   1733         // See RFC 2781, Section 2.2
   1734         // http://www.ietf.org/rfc/rfc2781.txt
   1735         int h = (high & 0x3FF) << 10;
   1736         int l = low & 0x3FF;
   1737         return (h | l) + 0x10000;
   1738     }
   1739 
   1740     /**
   1741      * Returns the code point at {@code index} in the specified sequence of
   1742      * character units. If the unit at {@code index} is a high-surrogate unit,
   1743      * {@code index + 1} is less than the length of the sequence and the unit at
   1744      * {@code index + 1} is a low-surrogate unit, then the supplementary code
   1745      * point represented by the pair is returned; otherwise the {@code char}
   1746      * value at {@code index} is returned.
   1747      *
   1748      * @param seq
   1749      *            the source sequence of {@code char} units.
   1750      * @param index
   1751      *            the position in {@code seq} from which to retrieve the code
   1752      *            point.
   1753      * @return the Unicode code point or {@code char} value at {@code index} in
   1754      *         {@code seq}.
   1755      * @throws NullPointerException
   1756      *             if {@code seq} is {@code null}.
   1757      * @throws IndexOutOfBoundsException
   1758      *             if the {@code index} is negative or greater than or equal to
   1759      *             the length of {@code seq}.
   1760      * @since 1.5
   1761      */
   1762     public static int codePointAt(CharSequence seq, int index) {
   1763         if (seq == null) {
   1764             throw new NullPointerException("seq == null");
   1765         }
   1766         int len = seq.length();
   1767         if (index < 0 || index >= len) {
   1768             throw new IndexOutOfBoundsException();
   1769         }
   1770 
   1771         char high = seq.charAt(index++);
   1772         if (index >= len) {
   1773             return high;
   1774         }
   1775         char low = seq.charAt(index);
   1776         if (isSurrogatePair(high, low)) {
   1777             return toCodePoint(high, low);
   1778         }
   1779         return high;
   1780     }
   1781 
   1782     /**
   1783      * Returns the code point at {@code index} in the specified array of
   1784      * character units. If the unit at {@code index} is a high-surrogate unit,
   1785      * {@code index + 1} is less than the length of the array and the unit at
   1786      * {@code index + 1} is a low-surrogate unit, then the supplementary code
   1787      * point represented by the pair is returned; otherwise the {@code char}
   1788      * value at {@code index} is returned.
   1789      *
   1790      * @param seq
   1791      *            the source array of {@code char} units.
   1792      * @param index
   1793      *            the position in {@code seq} from which to retrieve the code
   1794      *            point.
   1795      * @return the Unicode code point or {@code char} value at {@code index} in
   1796      *         {@code seq}.
   1797      * @throws NullPointerException
   1798      *             if {@code seq} is {@code null}.
   1799      * @throws IndexOutOfBoundsException
   1800      *             if the {@code index} is negative or greater than or equal to
   1801      *             the length of {@code seq}.
   1802      * @since 1.5
   1803      */
   1804     public static int codePointAt(char[] seq, int index) {
   1805         if (seq == null) {
   1806             throw new NullPointerException("seq == null");
   1807         }
   1808         int len = seq.length;
   1809         if (index < 0 || index >= len) {
   1810             throw new IndexOutOfBoundsException();
   1811         }
   1812 
   1813         char high = seq[index++];
   1814         if (index >= len) {
   1815             return high;
   1816         }
   1817         char low = seq[index];
   1818         if (isSurrogatePair(high, low)) {
   1819             return toCodePoint(high, low);
   1820         }
   1821         return high;
   1822     }
   1823 
   1824     /**
   1825      * Returns the code point at {@code index} in the specified array of
   1826      * character units, where {@code index} has to be less than {@code limit}.
   1827      * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
   1828      * is less than {@code limit} and the unit at {@code index + 1} is a
   1829      * low-surrogate unit, then the supplementary code point represented by the
   1830      * pair is returned; otherwise the {@code char} value at {@code index} is
   1831      * returned.
   1832      *
   1833      * @param seq
   1834      *            the source array of {@code char} units.
   1835      * @param index
   1836      *            the position in {@code seq} from which to get the code point.
   1837      * @param limit
   1838      *            the index after the last unit in {@code seq} that can be used.
   1839      * @return the Unicode code point or {@code char} value at {@code index} in
   1840      *         {@code seq}.
   1841      * @throws NullPointerException
   1842      *             if {@code seq} is {@code null}.
   1843      * @throws IndexOutOfBoundsException
   1844      *             if {@code index < 0}, {@code index >= limit},
   1845      *             {@code limit < 0} or if {@code limit} is greater than the
   1846      *             length of {@code seq}.
   1847      * @since 1.5
   1848      */
   1849     public static int codePointAt(char[] seq, int index, int limit) {
   1850         if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
   1851             throw new IndexOutOfBoundsException();
   1852         }
   1853 
   1854         char high = seq[index++];
   1855         if (index >= limit) {
   1856             return high;
   1857         }
   1858         char low = seq[index];
   1859         if (isSurrogatePair(high, low)) {
   1860             return toCodePoint(high, low);
   1861         }
   1862         return high;
   1863     }
   1864 
   1865     /**
   1866      * Returns the code point that precedes {@code index} in the specified
   1867      * sequence of character units. If the unit at {@code index - 1} is a
   1868      * low-surrogate unit, {@code index - 2} is not negative and the unit at
   1869      * {@code index - 2} is a high-surrogate unit, then the supplementary code
   1870      * point represented by the pair is returned; otherwise the {@code char}
   1871      * value at {@code index - 1} is returned.
   1872      *
   1873      * @param seq
   1874      *            the source sequence of {@code char} units.
   1875      * @param index
   1876      *            the position in {@code seq} following the code
   1877      *            point that should be returned.
   1878      * @return the Unicode code point or {@code char} value before {@code index}
   1879      *         in {@code seq}.
   1880      * @throws NullPointerException
   1881      *             if {@code seq} is {@code null}.
   1882      * @throws IndexOutOfBoundsException
   1883      *             if the {@code index} is less than 1 or greater than the
   1884      *             length of {@code seq}.
   1885      * @since 1.5
   1886      */
   1887     public static int codePointBefore(CharSequence seq, int index) {
   1888         if (seq == null) {
   1889             throw new NullPointerException("seq == null");
   1890         }
   1891         int len = seq.length();
   1892         if (index < 1 || index > len) {
   1893             throw new IndexOutOfBoundsException();
   1894         }
   1895 
   1896         char low = seq.charAt(--index);
   1897         if (--index < 0) {
   1898             return low;
   1899         }
   1900         char high = seq.charAt(index);
   1901         if (isSurrogatePair(high, low)) {
   1902             return toCodePoint(high, low);
   1903         }
   1904         return low;
   1905     }
   1906 
   1907     /**
   1908      * Returns the code point that precedes {@code index} in the specified
   1909      * array of character units. If the unit at {@code index - 1} is a
   1910      * low-surrogate unit, {@code index - 2} is not negative and the unit at
   1911      * {@code index - 2} is a high-surrogate unit, then the supplementary code
   1912      * point represented by the pair is returned; otherwise the {@code char}
   1913      * value at {@code index - 1} is returned.
   1914      *
   1915      * @param seq
   1916      *            the source array of {@code char} units.
   1917      * @param index
   1918      *            the position in {@code seq} following the code
   1919      *            point that should be returned.
   1920      * @return the Unicode code point or {@code char} value before {@code index}
   1921      *         in {@code seq}.
   1922      * @throws NullPointerException
   1923      *             if {@code seq} is {@code null}.
   1924      * @throws IndexOutOfBoundsException
   1925      *             if the {@code index} is less than 1 or greater than the
   1926      *             length of {@code seq}.
   1927      * @since 1.5
   1928      */
   1929     public static int codePointBefore(char[] seq, int index) {
   1930         if (seq == null) {
   1931             throw new NullPointerException("seq == null");
   1932         }
   1933         int len = seq.length;
   1934         if (index < 1 || index > len) {
   1935             throw new IndexOutOfBoundsException();
   1936         }
   1937 
   1938         char low = seq[--index];
   1939         if (--index < 0) {
   1940             return low;
   1941         }
   1942         char high = seq[index];
   1943         if (isSurrogatePair(high, low)) {
   1944             return toCodePoint(high, low);
   1945         }
   1946         return low;
   1947     }
   1948 
   1949     /**
   1950      * Returns the code point that precedes the {@code index} in the specified
   1951      * array of character units and is not less than {@code start}. If the unit
   1952      * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
   1953      * less than {@code start} and the unit at {@code index - 2} is a
   1954      * high-surrogate unit, then the supplementary code point represented by the
   1955      * pair is returned; otherwise the {@code char} value at {@code index - 1}
   1956      * is returned.
   1957      *
   1958      * @param seq
   1959      *            the source array of {@code char} units.
   1960      * @param index
   1961      *            the position in {@code seq} following the code point that
   1962      *            should be returned.
   1963      * @param start
   1964      *            the index of the first element in {@code seq}.
   1965      * @return the Unicode code point or {@code char} value before {@code index}
   1966      *         in {@code seq}.
   1967      * @throws NullPointerException
   1968      *             if {@code seq} is {@code null}.
   1969      * @throws IndexOutOfBoundsException
   1970      *             if the {@code index <= start}, {@code start < 0},
   1971      *             {@code index} is greater than the length of {@code seq}, or
   1972      *             if {@code start} is equal or greater than the length of
   1973      *             {@code seq}.
   1974      * @since 1.5
   1975      */
   1976     public static int codePointBefore(char[] seq, int index, int start) {
   1977         if (seq == null) {
   1978             throw new NullPointerException("seq == null");
   1979         }
   1980         int len = seq.length;
   1981         if (index <= start || index > len || start < 0 || start >= len) {
   1982             throw new IndexOutOfBoundsException();
   1983         }
   1984 
   1985         char low = seq[--index];
   1986         if (--index < start) {
   1987             return low;
   1988         }
   1989         char high = seq[index];
   1990         if (isSurrogatePair(high, low)) {
   1991             return toCodePoint(high, low);
   1992         }
   1993         return low;
   1994     }
   1995 
   1996     /**
   1997      * Converts the specified Unicode code point into a UTF-16 encoded sequence
   1998      * and copies the value(s) into the char array {@code dst}, starting at
   1999      * index {@code dstIndex}.
   2000      *
   2001      * @param codePoint
   2002      *            the Unicode code point to encode.
   2003      * @param dst
   2004      *            the destination array to copy the encoded value into.
   2005      * @param dstIndex
   2006      *            the index in {@code dst} from where to start copying.
   2007      * @return the number of {@code char} value units copied into {@code dst}.
   2008      * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
   2009      * @throws NullPointerException
   2010      *             if {@code dst} is {@code null}.
   2011      * @throws IndexOutOfBoundsException
   2012      *             if {@code dstIndex} is negative, greater than or equal to
   2013      *             {@code dst.length} or equals {@code dst.length - 1} when
   2014      *             {@code codePoint} is a
   2015      *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
   2016      * @since 1.5
   2017      */
   2018     public static int toChars(int codePoint, char[] dst, int dstIndex) {
   2019         checkValidCodePoint(codePoint);
   2020         if (dst == null) {
   2021             throw new NullPointerException("dst == null");
   2022         }
   2023         if (dstIndex < 0 || dstIndex >= dst.length) {
   2024             throw new IndexOutOfBoundsException();
   2025         }
   2026 
   2027         if (isSupplementaryCodePoint(codePoint)) {
   2028             if (dstIndex == dst.length - 1) {
   2029                 throw new IndexOutOfBoundsException();
   2030             }
   2031             // See RFC 2781, Section 2.1
   2032             // http://www.ietf.org/rfc/rfc2781.txt
   2033             int cpPrime = codePoint - 0x10000;
   2034             int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
   2035             int low = 0xDC00 | (cpPrime & 0x3FF);
   2036             dst[dstIndex] = (char) high;
   2037             dst[dstIndex + 1] = (char) low;
   2038             return 2;
   2039         }
   2040 
   2041         dst[dstIndex] = (char) codePoint;
   2042         return 1;
   2043     }
   2044 
   2045     /**
   2046      * Converts the specified Unicode code point into a UTF-16 encoded sequence
   2047      * and returns it as a char array.
   2048      *
   2049      * @param codePoint
   2050      *            the Unicode code point to encode.
   2051      * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
   2052      *         {@link #isSupplementaryCodePoint(int) supplementary code point},
   2053      *         then the returned array contains two characters, otherwise it
   2054      *         contains just one character.
   2055      * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
   2056      * @since 1.5
   2057      */
   2058     public static char[] toChars(int codePoint) {
   2059         checkValidCodePoint(codePoint);
   2060         if (isSupplementaryCodePoint(codePoint)) {
   2061             int cpPrime = codePoint - 0x10000;
   2062             int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
   2063             int low = 0xDC00 | (cpPrime & 0x3FF);
   2064             return new char[] { (char) high, (char) low };
   2065         }
   2066         return new char[] { (char) codePoint };
   2067     }
   2068 
   2069     /**
   2070      * Counts the number of Unicode code points in the subsequence of the
   2071      * specified character sequence, as delineated by {@code beginIndex} and
   2072      * {@code endIndex}. Any surrogate values with missing pair values will be
   2073      * counted as one code point.
   2074      *
   2075      * @param seq
   2076      *            the {@code CharSequence} to look through.
   2077      * @param beginIndex
   2078      *            the inclusive index to begin counting at.
   2079      * @param endIndex
   2080      *            the exclusive index to stop counting at.
   2081      * @return the number of Unicode code points.
   2082      * @throws NullPointerException
   2083      *             if {@code seq} is {@code null}.
   2084      * @throws IndexOutOfBoundsException
   2085      *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
   2086      *             if {@code endIndex} is greater than the length of {@code seq}.
   2087      * @since 1.5
   2088      */
   2089     public static int codePointCount(CharSequence seq, int beginIndex,
   2090             int endIndex) {
   2091         if (seq == null) {
   2092             throw new NullPointerException("seq == null");
   2093         }
   2094         int len = seq.length();
   2095         if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
   2096             throw new IndexOutOfBoundsException();
   2097         }
   2098 
   2099         int result = 0;
   2100         for (int i = beginIndex; i < endIndex; i++) {
   2101             char c = seq.charAt(i);
   2102             if (isHighSurrogate(c)) {
   2103                 if (++i < endIndex) {
   2104                     c = seq.charAt(i);
   2105                     if (!isLowSurrogate(c)) {
   2106                         result++;
   2107                     }
   2108                 }
   2109             }
   2110             result++;
   2111         }
   2112         return result;
   2113     }
   2114 
   2115     /**
   2116      * Counts the number of Unicode code points in the subsequence of the
   2117      * specified char array, as delineated by {@code offset} and {@code count}.
   2118      * Any surrogate values with missing pair values will be counted as one code
   2119      * point.
   2120      *
   2121      * @param seq
   2122      *            the char array to look through
   2123      * @param offset
   2124      *            the inclusive index to begin counting at.
   2125      * @param count
   2126      *            the number of {@code char} values to look through in
   2127      *            {@code seq}.
   2128      * @return the number of Unicode code points.
   2129      * @throws NullPointerException
   2130      *             if {@code seq} is {@code null}.
   2131      * @throws IndexOutOfBoundsException
   2132      *             if {@code offset < 0}, {@code count < 0} or if
   2133      *             {@code offset + count} is greater than the length of
   2134      *             {@code seq}.
   2135      * @since 1.5
   2136      */
   2137     public static int codePointCount(char[] seq, int offset, int count) {
   2138         Arrays.checkOffsetAndCount(seq.length, offset, count);
   2139         int endIndex = offset + count;
   2140         int result = 0;
   2141         for (int i = offset; i < endIndex; i++) {
   2142             char c = seq[i];
   2143             if (isHighSurrogate(c)) {
   2144                 if (++i < endIndex) {
   2145                     c = seq[i];
   2146                     if (!isLowSurrogate(c)) {
   2147                         result++;
   2148                     }
   2149                 }
   2150             }
   2151             result++;
   2152         }
   2153         return result;
   2154     }
   2155 
   2156     /**
   2157      * Determines the index in the specified character sequence that is offset
   2158      * {@code codePointOffset} code points from {@code index}.
   2159      *
   2160      * @param seq
   2161      *            the character sequence to find the index in.
   2162      * @param index
   2163      *            the start index in {@code seq}.
   2164      * @param codePointOffset
   2165      *            the number of code points to look backwards or forwards; may
   2166      *            be a negative or positive value.
   2167      * @return the index in {@code seq} that is {@code codePointOffset} code
   2168      *         points away from {@code index}.
   2169      * @throws NullPointerException
   2170      *             if {@code seq} is {@code null}.
   2171      * @throws IndexOutOfBoundsException
   2172      *             if {@code index < 0}, {@code index} is greater than the
   2173      *             length of {@code seq}, or if there are not enough values in
   2174      *             {@code seq} to skip {@code codePointOffset} code points
   2175      *             forwards or backwards (if {@code codePointOffset} is
   2176      *             negative) from {@code index}.
   2177      * @since 1.5
   2178      */
   2179     public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) {
   2180         if (seq == null) {
   2181             throw new NullPointerException("seq == null");
   2182         }
   2183         int len = seq.length();
   2184         if (index < 0 || index > len) {
   2185             throw new IndexOutOfBoundsException();
   2186         }
   2187 
   2188         if (codePointOffset == 0) {
   2189             return index;
   2190         }
   2191 
   2192         if (codePointOffset > 0) {
   2193             int codePoints = codePointOffset;
   2194             int i = index;
   2195             while (codePoints > 0) {
   2196                 codePoints--;
   2197                 if (i >= len) {
   2198                     throw new IndexOutOfBoundsException();
   2199                 }
   2200                 if (isHighSurrogate(seq.charAt(i))) {
   2201                     int next = i + 1;
   2202                     if (next < len && isLowSurrogate(seq.charAt(next))) {
   2203                         i++;
   2204                     }
   2205                 }
   2206                 i++;
   2207             }
   2208             return i;
   2209         }
   2210 
   2211         int codePoints = -codePointOffset;
   2212         int i = index;
   2213         while (codePoints > 0) {
   2214             codePoints--;
   2215             i--;
   2216             if (i < 0) {
   2217                 throw new IndexOutOfBoundsException();
   2218             }
   2219             if (isLowSurrogate(seq.charAt(i))) {
   2220                 int prev = i - 1;
   2221                 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
   2222                     i--;
   2223                 }
   2224             }
   2225         }
   2226         return i;
   2227     }
   2228 
   2229     /**
   2230      * Determines the index in a subsequence of the specified character array
   2231      * that is offset {@code codePointOffset} code points from {@code index}.
   2232      * The subsequence is delineated by {@code start} and {@code count}.
   2233      *
   2234      * @param seq
   2235      *            the character array to find the index in.
   2236      * @param start
   2237      *            the inclusive index that marks the beginning of the
   2238      *            subsequence.
   2239      * @param count
   2240      *            the number of {@code char} values to include within the
   2241      *            subsequence.
   2242      * @param index
   2243      *            the start index in the subsequence of the char array.
   2244      * @param codePointOffset
   2245      *            the number of code points to look backwards or forwards; may
   2246      *            be a negative or positive value.
   2247      * @return the index in {@code seq} that is {@code codePointOffset} code
   2248      *         points away from {@code index}.
   2249      * @throws NullPointerException
   2250      *             if {@code seq} is {@code null}.
   2251      * @throws IndexOutOfBoundsException
   2252      *             if {@code start < 0}, {@code count < 0},
   2253      *             {@code index < start}, {@code index > start + count},
   2254      *             {@code start + count} is greater than the length of
   2255      *             {@code seq}, or if there are not enough values in
   2256      *             {@code seq} to skip {@code codePointOffset} code points
   2257      *             forward or backward (if {@code codePointOffset} is
   2258      *             negative) from {@code index}.
   2259      * @since 1.5
   2260      */
   2261     public static int offsetByCodePoints(char[] seq, int start, int count,
   2262             int index, int codePointOffset) {
   2263         Arrays.checkOffsetAndCount(seq.length, start, count);
   2264         int end = start + count;
   2265         if (index < start || index > end) {
   2266             throw new IndexOutOfBoundsException();
   2267         }
   2268 
   2269         if (codePointOffset == 0) {
   2270             return index;
   2271         }
   2272 
   2273         if (codePointOffset > 0) {
   2274             int codePoints = codePointOffset;
   2275             int i = index;
   2276             while (codePoints > 0) {
   2277                 codePoints--;
   2278                 if (i >= end) {
   2279                     throw new IndexOutOfBoundsException();
   2280                 }
   2281                 if (isHighSurrogate(seq[i])) {
   2282                     int next = i + 1;
   2283                     if (next < end && isLowSurrogate(seq[next])) {
   2284                         i++;
   2285                     }
   2286                 }
   2287                 i++;
   2288             }
   2289             return i;
   2290         }
   2291 
   2292         int codePoints = -codePointOffset;
   2293         int i = index;
   2294         while (codePoints > 0) {
   2295             codePoints--;
   2296             i--;
   2297             if (i < start) {
   2298                 throw new IndexOutOfBoundsException();
   2299             }
   2300             if (isLowSurrogate(seq[i])) {
   2301                 int prev = i - 1;
   2302                 if (prev >= start && isHighSurrogate(seq[prev])) {
   2303                     i--;
   2304                 }
   2305             }
   2306         }
   2307         return i;
   2308     }
   2309 
   2310     /**
   2311      * Convenience method to determine the value of the specified character
   2312      * {@code c} in the supplied radix. The value of {@code radix} must be
   2313      * between MIN_RADIX and MAX_RADIX.
   2314      *
   2315      * @param c
   2316      *            the character to determine the value of.
   2317      * @param radix
   2318      *            the radix.
   2319      * @return the value of {@code c} in {@code radix} if {@code radix} lies
   2320      *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
   2321      */
   2322     public static int digit(char c, int radix) {
   2323         return digit((int) c, radix);
   2324     }
   2325 
   2326     /**
   2327      * Convenience method to determine the value of the character
   2328      * {@code codePoint} in the supplied radix. The value of {@code radix} must
   2329      * be between MIN_RADIX and MAX_RADIX.
   2330      *
   2331      * @param codePoint
   2332      *            the character, including supplementary characters.
   2333      * @param radix
   2334      *            the radix.
   2335      * @return if {@code radix} lies between {@link #MIN_RADIX} and
   2336      *         {@link #MAX_RADIX} then the value of the character in the radix;
   2337      *         -1 otherwise.
   2338      */
   2339     public static int digit(int codePoint, int radix) {
   2340         if (radix < MIN_RADIX || radix > MAX_RADIX) {
   2341             return -1;
   2342         }
   2343         if (codePoint < 128) {
   2344             // Optimized for ASCII
   2345             int result = -1;
   2346             if ('0' <= codePoint && codePoint <= '9') {
   2347                 result = codePoint - '0';
   2348             } else if ('a' <= codePoint && codePoint <= 'z') {
   2349                 result = 10 + (codePoint - 'a');
   2350             } else if ('A' <= codePoint && codePoint <= 'Z') {
   2351                 result = 10 + (codePoint - 'A');
   2352             }
   2353             return result < radix ? result : -1;
   2354         }
   2355         return digitImpl(codePoint, radix);
   2356     }
   2357 
   2358     private static native int digitImpl(int codePoint, int radix);
   2359 
   2360     /**
   2361      * Compares this object with the specified object and indicates if they are
   2362      * equal. In order to be equal, {@code object} must be an instance of
   2363      * {@code Character} and have the same char value as this object.
   2364      *
   2365      * @param object
   2366      *            the object to compare this double with.
   2367      * @return {@code true} if the specified object is equal to this
   2368      *         {@code Character}; {@code false} otherwise.
   2369      */
   2370     @Override
   2371     public boolean equals(Object object) {
   2372         return (object instanceof Character) && (((Character) object).value == value);
   2373     }
   2374 
   2375     /**
   2376      * Returns the character which represents the specified digit in the
   2377      * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
   2378      * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
   2379      * smaller than {@code radix}. If any of these conditions does not hold, 0
   2380      * is returned.
   2381      *
   2382      * @param digit
   2383      *            the integer value.
   2384      * @param radix
   2385      *            the radix.
   2386      * @return the character which represents the {@code digit} in the
   2387      *         {@code radix}.
   2388      */
   2389     public static char forDigit(int digit, int radix) {
   2390         if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
   2391             if (digit >= 0 && digit < radix) {
   2392                 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
   2393             }
   2394         }
   2395         return 0;
   2396     }
   2397 
   2398     /**
   2399      * Returns a human-readable name for the given code point,
   2400      * or null if the code point is unassigned.
   2401      *
   2402      * <p>As a fallback mechanism this method returns strings consisting of the Unicode
   2403      * block name (with underscores replaced by spaces), a single space, and the uppercase
   2404      * hex value of the code point, using as few digits as necessary.
   2405      *
   2406      * <p>Examples:
   2407      * <ul>
   2408      * <li>{@code Character.getName(0)} returns "NULL".
   2409      * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E".
   2410      * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX".
   2411      * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000".
   2412      * </ul>
   2413      *
   2414      * <p>Note that the exact strings returned will vary from release to release.
   2415      *
   2416      * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
   2417      * @since 1.7
   2418      */
   2419     public static String getName(int codePoint) {
   2420         checkValidCodePoint(codePoint);
   2421         if (getType(codePoint) == Character.UNASSIGNED) {
   2422             return null;
   2423         }
   2424         String result = getNameImpl(codePoint);
   2425         if (result == null) {
   2426             String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ');
   2427             result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0);
   2428         }
   2429         return result;
   2430     }
   2431 
   2432     private static native String getNameImpl(int codePoint);
   2433 
   2434     /**
   2435      * Returns the numeric value of the specified Unicode character.
   2436      * See {@link #getNumericValue(int)}.
   2437      *
   2438      * @param c the character
   2439      * @return a non-negative numeric integer value if a numeric value for
   2440      *         {@code c} exists, -1 if there is no numeric value for {@code c},
   2441      *         -2 if the numeric value can not be represented as an integer.
   2442      */
   2443     public static int getNumericValue(char c) {
   2444         return getNumericValue((int) c);
   2445     }
   2446 
   2447     /**
   2448      * Gets the numeric value of the specified Unicode code point. For example,
   2449      * the code point '\u216B' stands for the Roman number XII, which has the
   2450      * numeric value 12.
   2451      *
   2452      * <p>There are two points of divergence between this method and the Unicode
   2453      * specification. This method treats the letters a-z (in both upper and lower
   2454      * cases, and their full-width variants) as numbers from 10 to 35. The
   2455      * Unicode specification also supports the idea of code points with non-integer
   2456      * numeric values; this method does not (except to the extent of returning -2
   2457      * for such code points).
   2458      *
   2459      * @param codePoint the code point
   2460      * @return a non-negative numeric integer value if a numeric value for
   2461      *         {@code codePoint} exists, -1 if there is no numeric value for
   2462      *         {@code codePoint}, -2 if the numeric value can not be
   2463      *         represented with an integer.
   2464      */
   2465     public static int getNumericValue(int codePoint) {
   2466         // This is both an optimization and papers over differences between Java and ICU.
   2467         if (codePoint < 128) {
   2468             if (codePoint >= '0' && codePoint <= '9') {
   2469                 return codePoint - '0';
   2470             }
   2471             if (codePoint >= 'a' && codePoint <= 'z') {
   2472                 return codePoint - ('a' - 10);
   2473             }
   2474             if (codePoint >= 'A' && codePoint <= 'Z') {
   2475                 return codePoint - ('A' - 10);
   2476             }
   2477             return -1;
   2478         }
   2479         // Full-width uppercase A-Z.
   2480         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
   2481             return codePoint - 0xff17;
   2482         }
   2483         // Full-width lowercase a-z.
   2484         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
   2485             return codePoint - 0xff37;
   2486         }
   2487         return getNumericValueImpl(codePoint);
   2488     }
   2489 
   2490     private static native int getNumericValueImpl(int codePoint);
   2491 
   2492     /**
   2493      * Gets the general Unicode category of the specified character.
   2494      *
   2495      * @param c
   2496      *            the character to get the category of.
   2497      * @return the Unicode category of {@code c}.
   2498      */
   2499     public static int getType(char c) {
   2500         return getType((int) c);
   2501     }
   2502 
   2503     /**
   2504      * Gets the general Unicode category of the specified code point.
   2505      *
   2506      * @param codePoint
   2507      *            the Unicode code point to get the category of.
   2508      * @return the Unicode category of {@code codePoint}.
   2509      */
   2510     public static int getType(int codePoint) {
   2511         int type = getTypeImpl(codePoint);
   2512         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
   2513         if (type <= Character.FORMAT) {
   2514             return type;
   2515         }
   2516         return (type + 1);
   2517     }
   2518 
   2519     private static native int getTypeImpl(int codePoint);
   2520 
   2521     /**
   2522      * Gets the Unicode directionality of the specified character.
   2523      *
   2524      * @param c
   2525      *            the character to get the directionality of.
   2526      * @return the Unicode directionality of {@code c}.
   2527      */
   2528     public static byte getDirectionality(char c) {
   2529         return getDirectionality((int)c);
   2530     }
   2531 
   2532     /**
   2533      * Returns the Unicode directionality of the given code point.
   2534      * This will be one of the {@code DIRECTIONALITY_} constants.
   2535      * For characters whose directionality is undefined, or whose
   2536      * directionality has no appropriate constant in this class,
   2537      * {@code DIRECTIONALITY_UNDEFINED} is returned.
   2538      */
   2539     public static byte getDirectionality(int codePoint) {
   2540         if (getType(codePoint) == Character.UNASSIGNED) {
   2541             return Character.DIRECTIONALITY_UNDEFINED;
   2542         }
   2543 
   2544         byte directionality = getIcuDirectionality(codePoint);
   2545         if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
   2546             return DIRECTIONALITY[directionality];
   2547         }
   2548         return Character.DIRECTIONALITY_UNDEFINED;
   2549     }
   2550 
   2551     /**
   2552      * @hide - internal use only.
   2553      */
   2554     public static native byte getIcuDirectionality(int codePoint);
   2555 
   2556     /**
   2557      * Indicates whether the specified character is mirrored.
   2558      *
   2559      * @param c
   2560      *            the character to check.
   2561      * @return {@code true} if {@code c} is mirrored; {@code false}
   2562      *         otherwise.
   2563      */
   2564     public static boolean isMirrored(char c) {
   2565         return isMirrored((int) c);
   2566     }
   2567 
   2568     /**
   2569      * Indicates whether the specified code point is mirrored.
   2570      *
   2571      * @param codePoint
   2572      *            the code point to check.
   2573      * @return {@code true} if {@code codePoint} is mirrored, {@code false}
   2574      *         otherwise.
   2575      */
   2576     public static boolean isMirrored(int codePoint) {
   2577         return isMirroredImpl(codePoint);
   2578     }
   2579 
   2580     private static native boolean isMirroredImpl(int codePoint);
   2581 
   2582     @Override
   2583     public int hashCode() {
   2584         return value;
   2585     }
   2586 
   2587     /**
   2588      * Returns the high surrogate for the given code point. The result is meaningless if
   2589      * the given code point is not a supplementary character.
   2590      * @since 1.7
   2591      */
   2592     public static char highSurrogate(int codePoint) {
   2593         return (char) ((codePoint >> 10) + 0xd7c0);
   2594     }
   2595 
   2596     /**
   2597      * Returns the low surrogate for the given code point. The result is meaningless if
   2598      * the given code point is not a supplementary character.
   2599      * @since 1.7
   2600      */
   2601     public static char lowSurrogate(int codePoint) {
   2602         return (char) ((codePoint & 0x3ff) | 0xdc00);
   2603     }
   2604 
   2605     /**
   2606      * Returns true if the given code point is alphabetic. That is,
   2607      * if it is in any of the Lu, Ll, Lt, Lm, Lo, Nl, or Other_Alphabetic categories.
   2608      * @since 1.7
   2609      */
   2610     public static native boolean isAlphabetic(int codePoint);
   2611 
   2612     /**
   2613      * Returns true if the given code point is in the Basic Multilingual Plane (BMP).
   2614      * Such code points can be represented by a single {@code char}.
   2615      * @since 1.7
   2616      */
   2617     public static boolean isBmpCodePoint(int codePoint) {
   2618        return codePoint >= Character.MIN_VALUE && codePoint <= Character.MAX_VALUE;
   2619     }
   2620 
   2621     /**
   2622      * Indicates whether the specified character is defined in the Unicode
   2623      * specification.
   2624      *
   2625      * @param c
   2626      *            the character to check.
   2627      * @return {@code true} if the general Unicode category of the character is
   2628      *         not {@code UNASSIGNED}; {@code false} otherwise.
   2629      */
   2630     public static boolean isDefined(char c) {
   2631         return isDefinedImpl(c);
   2632     }
   2633 
   2634     /**
   2635      * Indicates whether the specified code point is defined in the Unicode
   2636      * specification.
   2637      *
   2638      * @param codePoint
   2639      *            the code point to check.
   2640      * @return {@code true} if the general Unicode category of the code point is
   2641      *         not {@code UNASSIGNED}; {@code false} otherwise.
   2642      */
   2643     public static boolean isDefined(int codePoint) {
   2644         return isDefinedImpl(codePoint);
   2645     }
   2646 
   2647     private static native boolean isDefinedImpl(int codePoint);
   2648 
   2649     /**
   2650      * Indicates whether the specified character is a digit.
   2651      *
   2652      * @param c
   2653      *            the character to check.
   2654      * @return {@code true} if {@code c} is a digit; {@code false}
   2655      *         otherwise.
   2656      */
   2657     public static boolean isDigit(char c) {
   2658         return isDigit((int) c);
   2659     }
   2660 
   2661     /**
   2662      * Indicates whether the specified code point is a digit.
   2663      *
   2664      * @param codePoint
   2665      *            the code point to check.
   2666      * @return {@code true} if {@code codePoint} is a digit; {@code false}
   2667      *         otherwise.
   2668      */
   2669     public static boolean isDigit(int codePoint) {
   2670         // Optimized case for ASCII
   2671         if ('0' <= codePoint && codePoint <= '9') {
   2672             return true;
   2673         }
   2674         if (codePoint < 1632) {
   2675             return false;
   2676         }
   2677         return isDigitImpl(codePoint);
   2678     }
   2679 
   2680     private static native boolean isDigitImpl(int codePoint);
   2681 
   2682     /**
   2683      * Indicates whether the specified character is ignorable in a Java or
   2684      * Unicode identifier.
   2685      *
   2686      * @param c
   2687      *            the character to check.
   2688      * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
   2689      */
   2690     public static boolean isIdentifierIgnorable(char c) {
   2691         return isIdentifierIgnorable((int) c);
   2692     }
   2693 
   2694     /**
   2695      * Returns true if the given code point is a CJKV ideographic character.
   2696      * @since 1.7
   2697      */
   2698     public static native boolean isIdeographic(int codePoint);
   2699 
   2700     /**
   2701      * Indicates whether the specified code point is ignorable in a Java or
   2702      * Unicode identifier.
   2703      *
   2704      * @param codePoint
   2705      *            the code point to check.
   2706      * @return {@code true} if {@code codePoint} is ignorable; {@code false}
   2707      *         otherwise.
   2708      */
   2709     public static boolean isIdentifierIgnorable(int codePoint) {
   2710         // This is both an optimization and papers over differences between Java and ICU.
   2711         if (codePoint < 0x600) {
   2712             return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
   2713                     (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
   2714         }
   2715         return isIdentifierIgnorableImpl(codePoint);
   2716     }
   2717 
   2718     private static native boolean isIdentifierIgnorableImpl(int codePoint);
   2719 
   2720     /**
   2721      * Indicates whether the specified character is an ISO control character.
   2722      *
   2723      * @param c
   2724      *            the character to check.
   2725      * @return {@code true} if {@code c} is an ISO control character;
   2726      *         {@code false} otherwise.
   2727      */
   2728     public static boolean isISOControl(char c) {
   2729         return isISOControl((int) c);
   2730     }
   2731 
   2732     /**
   2733      * Indicates whether the specified code point is an ISO control character.
   2734      *
   2735      * @param c
   2736      *            the code point to check.
   2737      * @return {@code true} if {@code c} is an ISO control character;
   2738      *         {@code false} otherwise.
   2739      */
   2740     public static boolean isISOControl(int c) {
   2741         return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
   2742     }
   2743 
   2744     /**
   2745      * Indicates whether the specified character is a valid part of a Java
   2746      * identifier other than the first character.
   2747      *
   2748      * @param c
   2749      *            the character to check.
   2750      * @return {@code true} if {@code c} is valid as part of a Java identifier;
   2751      *         {@code false} otherwise.
   2752      */
   2753     public static boolean isJavaIdentifierPart(char c) {
   2754         return isJavaIdentifierPart((int) c);
   2755     }
   2756 
   2757     /**
   2758      * Indicates whether the specified code point is a valid part of a Java
   2759      * identifier other than the first character.
   2760      *
   2761      * @param codePoint
   2762      *            the code point to check.
   2763      * @return {@code true} if {@code c} is valid as part of a Java identifier;
   2764      *         {@code false} otherwise.
   2765      */
   2766     public static boolean isJavaIdentifierPart(int codePoint) {
   2767         // Use precomputed bitmasks to optimize the ASCII range.
   2768         if (codePoint < 64) {
   2769             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
   2770         } else if (codePoint < 128) {
   2771             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   2772         }
   2773         int type = getType(codePoint);
   2774         return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
   2775                 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
   2776                 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
   2777                 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
   2778                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
   2779                 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
   2780     }
   2781 
   2782     /**
   2783      * Indicates whether the specified character is a valid first character for
   2784      * a Java identifier.
   2785      *
   2786      * @param c
   2787      *            the character to check.
   2788      * @return {@code true} if {@code c} is a valid first character of a Java
   2789      *         identifier; {@code false} otherwise.
   2790      */
   2791     public static boolean isJavaIdentifierStart(char c) {
   2792         return isJavaIdentifierStart((int) c);
   2793     }
   2794 
   2795     /**
   2796      * Indicates whether the specified code point is a valid first character for
   2797      * a Java identifier.
   2798      *
   2799      * @param codePoint
   2800      *            the code point to check.
   2801      * @return {@code true} if {@code codePoint} is a valid start of a Java
   2802      *         identifier; {@code false} otherwise.
   2803      */
   2804     public static boolean isJavaIdentifierStart(int codePoint) {
   2805         // Use precomputed bitmasks to optimize the ASCII range.
   2806         if (codePoint < 64) {
   2807             return (codePoint == '$'); // There's only one character in this range.
   2808         } else if (codePoint < 128) {
   2809             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
   2810         }
   2811         int type = getType(codePoint);
   2812         return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
   2813                 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
   2814     }
   2815 
   2816     /**
   2817      * Indicates whether the specified character is a Java letter.
   2818      *
   2819      * @param c
   2820      *            the character to check.
   2821      * @return {@code true} if {@code c} is a Java letter; {@code false}
   2822      *         otherwise.
   2823      * @deprecated Use {@link #isJavaIdentifierStart(char)} instead.
   2824      */
   2825     @Deprecated
   2826     public static boolean isJavaLetter(char c) {
   2827         return isJavaIdentifierStart(c);
   2828     }
   2829 
   2830     /**
   2831      * Indicates whether the specified character is a Java letter or digit
   2832      * character.
   2833      *
   2834      * @param c
   2835      *            the character to check.
   2836      * @return {@code true} if {@code c} is a Java letter or digit;
   2837      *         {@code false} otherwise.
   2838      * @deprecated Use {@link #isJavaIdentifierPart(char)} instead.
   2839      */
   2840     @Deprecated
   2841     public static boolean isJavaLetterOrDigit(char c) {
   2842         return isJavaIdentifierPart(c);
   2843     }
   2844 
   2845     /**
   2846      * Indicates whether the specified character is a letter.
   2847      *
   2848      * @param c
   2849      *            the character to check.
   2850      * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
   2851      */
   2852     public static boolean isLetter(char c) {
   2853         return isLetter((int) c);
   2854     }
   2855 
   2856     /**
   2857      * Indicates whether the specified code point is a letter.
   2858      *
   2859      * @param codePoint
   2860      *            the code point to check.
   2861      * @return {@code true} if {@code codePoint} is a letter; {@code false}
   2862      *         otherwise.
   2863      */
   2864     public static boolean isLetter(int codePoint) {
   2865         if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
   2866             return true;
   2867         }
   2868         if (codePoint < 128) {
   2869             return false;
   2870         }
   2871         return isLetterImpl(codePoint);
   2872     }
   2873 
   2874     private static native boolean isLetterImpl(int codePoint);
   2875 
   2876     /**
   2877      * Indicates whether the specified character is a letter or a digit.
   2878      *
   2879      * @param c
   2880      *            the character to check.
   2881      * @return {@code true} if {@code c} is a letter or a digit; {@code false}
   2882      *         otherwise.
   2883      */
   2884     public static boolean isLetterOrDigit(char c) {
   2885         return isLetterOrDigit((int) c);
   2886     }
   2887 
   2888     /**
   2889      * Indicates whether the specified code point is a letter or a digit.
   2890      *
   2891      * @param codePoint
   2892      *            the code point to check.
   2893      * @return {@code true} if {@code codePoint} is a letter or a digit;
   2894      *         {@code false} otherwise.
   2895      */
   2896     public static boolean isLetterOrDigit(int codePoint) {
   2897         // Optimized case for ASCII
   2898         if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
   2899             return true;
   2900         }
   2901         if ('0' <= codePoint && codePoint <= '9') {
   2902             return true;
   2903         }
   2904         if (codePoint < 128) {
   2905             return false;
   2906         }
   2907         return isLetterOrDigitImpl(codePoint);
   2908     }
   2909 
   2910     private static native boolean isLetterOrDigitImpl(int codePoint);
   2911 
   2912     /**
   2913      * Indicates whether the specified character is a lower case letter.
   2914      *
   2915      * @param c
   2916      *            the character to check.
   2917      * @return {@code true} if {@code c} is a lower case letter; {@code false}
   2918      *         otherwise.
   2919      */
   2920     public static boolean isLowerCase(char c) {
   2921         return isLowerCase((int) c);
   2922     }
   2923 
   2924     /**
   2925      * Indicates whether the specified code point is a lower case letter.
   2926      *
   2927      * @param codePoint
   2928      *            the code point to check.
   2929      * @return {@code true} if {@code codePoint} is a lower case letter;
   2930      *         {@code false} otherwise.
   2931      */
   2932     public static boolean isLowerCase(int codePoint) {
   2933         // Optimized case for ASCII
   2934         if ('a' <= codePoint && codePoint <= 'z') {
   2935             return true;
   2936         }
   2937         if (codePoint < 128) {
   2938             return false;
   2939         }
   2940         return isLowerCaseImpl(codePoint);
   2941     }
   2942 
   2943     private static native boolean isLowerCaseImpl(int codePoint);
   2944 
   2945     /**
   2946      * Use {@link #isWhitespace(char)} instead.
   2947      * @deprecated Use {@link #isWhitespace(char)} instead.
   2948      */
   2949     @Deprecated
   2950     public static boolean isSpace(char c) {
   2951         return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
   2952     }
   2953 
   2954     /**
   2955      * See {@link #isSpaceChar(int)}.
   2956      */
   2957     public static boolean isSpaceChar(char c) {
   2958         return isSpaceChar((int) c);
   2959     }
   2960 
   2961     /**
   2962      * Returns true if the given code point is a Unicode space character.
   2963      * The exact set of characters considered as whitespace varies with Unicode version.
   2964      * Note that non-breaking spaces are considered whitespace.
   2965      * Note also that line separators are not considered whitespace; see {@link #isWhitespace}
   2966      * for an alternative.
   2967      */
   2968     public static boolean isSpaceChar(int codePoint) {
   2969         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
   2970         // SPACE or NO-BREAK SPACE?
   2971         if (codePoint == 0x20 || codePoint == 0xa0) {
   2972             return true;
   2973         }
   2974         if (codePoint < 0x1000) {
   2975             return false;
   2976         }
   2977         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
   2978         if (codePoint == 0x1680 || codePoint == 0x180e) {
   2979             return true;
   2980         }
   2981         if (codePoint < 0x2000) {
   2982             return false;
   2983         }
   2984         if (codePoint <= 0xffff) {
   2985             // Other whitespace from General Punctuation...
   2986             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
   2987                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
   2988         }
   2989         // Let icu4c worry about non-BMP code points.
   2990         return isSpaceCharImpl(codePoint);
   2991     }
   2992 
   2993     private static native boolean isSpaceCharImpl(int codePoint);
   2994 
   2995     /**
   2996      * Indicates whether the specified character is a titlecase character.
   2997      *
   2998      * @param c
   2999      *            the character to check.
   3000      * @return {@code true} if {@code c} is a titlecase character, {@code false}
   3001      *         otherwise.
   3002      */
   3003     public static boolean isTitleCase(char c) {
   3004         return isTitleCaseImpl(c);
   3005     }
   3006 
   3007     /**
   3008      * Indicates whether the specified code point is a titlecase character.
   3009      *
   3010      * @param codePoint
   3011      *            the code point to check.
   3012      * @return {@code true} if {@code codePoint} is a titlecase character,
   3013      *         {@code false} otherwise.
   3014      */
   3015     public static boolean isTitleCase(int codePoint) {
   3016         return isTitleCaseImpl(codePoint);
   3017     }
   3018 
   3019     private static native boolean isTitleCaseImpl(int codePoint);
   3020 
   3021     /**
   3022      * Indicates whether the specified character is valid as part of a Unicode
   3023      * identifier other than the first character.
   3024      *
   3025      * @param c
   3026      *            the character to check.
   3027      * @return {@code true} if {@code c} is valid as part of a Unicode
   3028      *         identifier; {@code false} otherwise.
   3029      */
   3030     public static boolean isUnicodeIdentifierPart(char c) {
   3031         return isUnicodeIdentifierPartImpl(c);
   3032     }
   3033 
   3034     /**
   3035      * Indicates whether the specified code point is valid as part of a Unicode
   3036      * identifier other than the first character.
   3037      *
   3038      * @param codePoint
   3039      *            the code point to check.
   3040      * @return {@code true} if {@code codePoint} is valid as part of a Unicode
   3041      *         identifier; {@code false} otherwise.
   3042      */
   3043     public static boolean isUnicodeIdentifierPart(int codePoint) {
   3044         return isUnicodeIdentifierPartImpl(codePoint);
   3045     }
   3046 
   3047     private static native boolean isUnicodeIdentifierPartImpl(int codePoint);
   3048 
   3049     /**
   3050      * Indicates whether the specified character is a valid initial character
   3051      * for a Unicode identifier.
   3052      *
   3053      * @param c
   3054      *            the character to check.
   3055      * @return {@code true} if {@code c} is a valid first character for a
   3056      *         Unicode identifier; {@code false} otherwise.
   3057      */
   3058     public static boolean isUnicodeIdentifierStart(char c) {
   3059         return isUnicodeIdentifierStartImpl(c);
   3060     }
   3061 
   3062     /**
   3063      * Indicates whether the specified code point is a valid initial character
   3064      * for a Unicode identifier.
   3065      *
   3066      * @param codePoint
   3067      *            the code point to check.
   3068      * @return {@code true} if {@code codePoint} is a valid first character for
   3069      *         a Unicode identifier; {@code false} otherwise.
   3070      */
   3071     public static boolean isUnicodeIdentifierStart(int codePoint) {
   3072         return isUnicodeIdentifierStartImpl(codePoint);
   3073     }
   3074 
   3075     private static native boolean isUnicodeIdentifierStartImpl(int codePoint);
   3076 
   3077     /**
   3078      * Indicates whether the specified character is an upper case letter.
   3079      *
   3080      * @param c
   3081      *            the character to check.
   3082      * @return {@code true} if {@code c} is a upper case letter; {@code false}
   3083      *         otherwise.
   3084      */
   3085     public static boolean isUpperCase(char c) {
   3086         return isUpperCase((int) c);
   3087     }
   3088 
   3089     /**
   3090      * Indicates whether the specified code point is an upper case letter.
   3091      *
   3092      * @param codePoint
   3093      *            the code point to check.
   3094      * @return {@code true} if {@code codePoint} is a upper case letter;
   3095      *         {@code false} otherwise.
   3096      */
   3097     public static boolean isUpperCase(int codePoint) {
   3098         // Optimized case for ASCII
   3099         if ('A' <= codePoint && codePoint <= 'Z') {
   3100             return true;
   3101         }
   3102         if (codePoint < 128) {
   3103             return false;
   3104         }
   3105         return isUpperCaseImpl(codePoint);
   3106     }
   3107 
   3108     private static native boolean isUpperCaseImpl(int codePoint);
   3109 
   3110     /**
   3111      * See {@link #isWhitespace(int)}.
   3112      */
   3113     public static boolean isWhitespace(char c) {
   3114         return isWhitespace((int) c);
   3115     }
   3116 
   3117     /**
   3118      * Returns true if the given code point is a Unicode whitespace character.
   3119      * The exact set of characters considered as whitespace varies with Unicode version.
   3120      * Note that non-breaking spaces are not considered whitespace.
   3121      * Note also that line separators are considered whitespace; see {@link #isSpaceChar}
   3122      * for an alternative.
   3123      */
   3124     public static boolean isWhitespace(int codePoint) {
   3125         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
   3126         // Any ASCII whitespace character?
   3127         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
   3128             return true;
   3129         }
   3130         if (codePoint < 0x1000) {
   3131             return false;
   3132         }
   3133         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
   3134         if (codePoint == 0x1680 || codePoint == 0x180e) {
   3135             return true;
   3136         }
   3137         if (codePoint < 0x2000) {
   3138             return false;
   3139         }
   3140         // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
   3141         if (codePoint == 0x2007 || codePoint == 0x202f) {
   3142             return false;
   3143         }
   3144         if (codePoint <= 0xffff) {
   3145             // Other whitespace from General Punctuation...
   3146             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
   3147                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
   3148         }
   3149         // Let icu4c worry about non-BMP code points.
   3150         return isWhitespaceImpl(codePoint);
   3151     }
   3152 
   3153     private static native boolean isWhitespaceImpl(int codePoint);
   3154 
   3155     /**
   3156      * Reverses the order of the first and second byte in the specified
   3157      * character.
   3158      *
   3159      * @param c
   3160      *            the character to reverse.
   3161      * @return the character with reordered bytes.
   3162      */
   3163     public static char reverseBytes(char c) {
   3164         return (char)((c<<8) | (c>>8));
   3165     }
   3166 
   3167     /**
   3168      * Returns the lower case equivalent for the specified character if the
   3169      * character is an upper case letter. Otherwise, the specified character is
   3170      * returned unchanged.
   3171      *
   3172      * @param c
   3173      *            the character
   3174      * @return if {@code c} is an upper case character then its lower case
   3175      *         counterpart, otherwise just {@code c}.
   3176      */
   3177     public static char toLowerCase(char c) {
   3178         return (char) toLowerCase((int) c);
   3179     }
   3180 
   3181     /**
   3182      * Returns the lower case equivalent for the specified code point if it is
   3183      * an upper case letter. Otherwise, the specified code point is returned
   3184      * unchanged.
   3185      *
   3186      * @param codePoint
   3187      *            the code point to check.
   3188      * @return if {@code codePoint} is an upper case character then its lower
   3189      *         case counterpart, otherwise just {@code codePoint}.
   3190      */
   3191     public static int toLowerCase(int codePoint) {
   3192         // Optimized case for ASCII
   3193         if ('A' <= codePoint && codePoint <= 'Z') {
   3194             return (char) (codePoint + ('a' - 'A'));
   3195         }
   3196         if (codePoint < 192) {
   3197             return codePoint;
   3198         }
   3199         return toLowerCaseImpl(codePoint);
   3200     }
   3201 
   3202     private static native int toLowerCaseImpl(int codePoint);
   3203 
   3204     @Override
   3205     public String toString() {
   3206         return String.valueOf(value);
   3207     }
   3208 
   3209     /**
   3210      * Converts the specified character to its string representation.
   3211      *
   3212      * @param value
   3213      *            the character to convert.
   3214      * @return the character converted to a string.
   3215      */
   3216     public static String toString(char value) {
   3217         return String.valueOf(value);
   3218     }
   3219 
   3220     /**
   3221      * Returns the title case equivalent for the specified character if it
   3222      * exists. Otherwise, the specified character is returned unchanged.
   3223      *
   3224      * @param c
   3225      *            the character to convert.
   3226      * @return the title case equivalent of {@code c} if it exists, otherwise
   3227      *         {@code c}.
   3228      */
   3229     public static char toTitleCase(char c) {
   3230         return (char) toTitleCaseImpl(c);
   3231     }
   3232 
   3233     /**
   3234      * Returns the title case equivalent for the specified code point if it
   3235      * exists. Otherwise, the specified code point is returned unchanged.
   3236      *
   3237      * @param codePoint
   3238      *            the code point to convert.
   3239      * @return the title case equivalent of {@code codePoint} if it exists,
   3240      *         otherwise {@code codePoint}.
   3241      */
   3242     public static int toTitleCase(int codePoint) {
   3243         return toTitleCaseImpl(codePoint);
   3244     }
   3245 
   3246     private static native int toTitleCaseImpl(int codePoint);
   3247 
   3248     /**
   3249      * Returns the upper case equivalent for the specified character if the
   3250      * character is a lower case letter. Otherwise, the specified character is
   3251      * returned unchanged.
   3252      *
   3253      * @param c
   3254      *            the character to convert.
   3255      * @return if {@code c} is a lower case character then its upper case
   3256      *         counterpart, otherwise just {@code c}.
   3257      */
   3258     public static char toUpperCase(char c) {
   3259         return (char) toUpperCase((int) c);
   3260     }
   3261 
   3262     /**
   3263      * Returns the upper case equivalent for the specified code point if the
   3264      * code point is a lower case letter. Otherwise, the specified code point is
   3265      * returned unchanged.
   3266      *
   3267      * @param codePoint
   3268      *            the code point to convert.
   3269      * @return if {@code codePoint} is a lower case character then its upper
   3270      *         case counterpart, otherwise just {@code codePoint}.
   3271      */
   3272     public static int toUpperCase(int codePoint) {
   3273         // Optimized case for ASCII
   3274         if ('a' <= codePoint && codePoint <= 'z') {
   3275             return (char) (codePoint - ('a' - 'A'));
   3276         }
   3277         if (codePoint < 181) {
   3278             return codePoint;
   3279         }
   3280         return toUpperCaseImpl(codePoint);
   3281     }
   3282 
   3283     private static native int toUpperCaseImpl(int codePoint);
   3284 }
   3285