1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.lang; 19 20 import java.io.Serializable; 21 import java.util.Arrays; 22 23 /** 24 * The wrapper for the primitive type {@code char}. This class also provides a 25 * number of utility methods for working with characters. 26 * 27 * <p>Character data is kept up to date as Unicode evolves. 28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of 29 * the {@code Locale} documentation for details of the Unicode versions implemented by current 30 * and historical Android releases. 31 * 32 * <p>The Unicode specification, character tables, and other information are available at 33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>. 34 * 35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid 36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 39 * encoding and {@code char} pairs are used to represent code points in the 40 * supplementary range. A pair of {@code char} values that represent a 41 * supplementary character are made up of a <i>high surrogate</i> with a value 42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 43 * 0xDC00 to 0xDFFF. 44 * <p> 45 * On the Java platform a {@code char} value represents either a single BMP code 46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 47 * is used to represent all Unicode code points. 48 * 49 * <a name="unicode_categories"><h3>Unicode categories</h3></a> 50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant, 51 * grouped semantically to provide a convenient overview. This table is also useful in 52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}. 53 * <span class="datatable"> 54 * <style type="text/css"> 55 * .datatable td { padding-right: 20px; } 56 * </style> 57 * <p><table> 58 * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr> 59 * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr> 60 * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr> 61 * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr> 62 * <tr> <td> Cs </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr> 63 * <tr> <td><br></td> </tr> 64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr> 65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr> 66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr> 67 * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr> 68 * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr> 69 * <tr> <td><br></td> </tr> 70 * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr> 71 * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr> 72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr> 73 * <tr> <td><br></td> </tr> 74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr> 75 * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr> 76 * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr> 77 * <tr> <td><br></td> </tr> 78 * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr> 79 * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr> 80 * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr> 81 * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr> 82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr> 83 * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr> 84 * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr> 85 * <tr> <td><br></td> </tr> 86 * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr> 87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr> 88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr> 89 * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr> 90 * <tr> <td><br></td> </tr> 91 * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr> 92 * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr> 93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr> 94 * </table> 95 * </span> 96 * 97 * @since 1.0 98 */ 99 @FindBugsSuppressWarnings("DM_NUMBER_CTOR") 100 public final class Character implements Serializable, Comparable<Character> { 101 private static final long serialVersionUID = 3786198910865385080L; 102 103 private final char value; 104 105 /** 106 * The minimum {@code Character} value. 107 */ 108 public static final char MIN_VALUE = '\u0000'; 109 110 /** 111 * The maximum {@code Character} value. 112 */ 113 public static final char MAX_VALUE = '\uffff'; 114 115 /** 116 * The minimum radix used for conversions between characters and integers. 117 */ 118 public static final int MIN_RADIX = 2; 119 120 /** 121 * The maximum radix used for conversions between characters and integers. 122 */ 123 public static final int MAX_RADIX = 36; 124 125 /** 126 * The {@link Class} object that represents the primitive type {@code char}. 127 */ 128 @SuppressWarnings("unchecked") 129 public static final Class<Character> TYPE 130 = (Class<Character>) char[].class.getComponentType(); 131 // Note: Character.TYPE can't be set to "char.class", since *that* is 132 // defined to be "java.lang.Character.TYPE"; 133 134 /** 135 * Unicode category constant Cn. 136 */ 137 public static final byte UNASSIGNED = 0; 138 139 /** 140 * Unicode category constant Lu. 141 */ 142 public static final byte UPPERCASE_LETTER = 1; 143 144 /** 145 * Unicode category constant Ll. 146 */ 147 public static final byte LOWERCASE_LETTER = 2; 148 149 /** 150 * Unicode category constant Lt. 151 */ 152 public static final byte TITLECASE_LETTER = 3; 153 154 /** 155 * Unicode category constant Lm. 156 */ 157 public static final byte MODIFIER_LETTER = 4; 158 159 /** 160 * Unicode category constant Lo. 161 */ 162 public static final byte OTHER_LETTER = 5; 163 164 /** 165 * Unicode category constant Mn. 166 */ 167 public static final byte NON_SPACING_MARK = 6; 168 169 /** 170 * Unicode category constant Me. 171 */ 172 public static final byte ENCLOSING_MARK = 7; 173 174 /** 175 * Unicode category constant Mc. 176 */ 177 public static final byte COMBINING_SPACING_MARK = 8; 178 179 /** 180 * Unicode category constant Nd. 181 */ 182 public static final byte DECIMAL_DIGIT_NUMBER = 9; 183 184 /** 185 * Unicode category constant Nl. 186 */ 187 public static final byte LETTER_NUMBER = 10; 188 189 /** 190 * Unicode category constant No. 191 */ 192 public static final byte OTHER_NUMBER = 11; 193 194 /** 195 * Unicode category constant Zs. 196 */ 197 public static final byte SPACE_SEPARATOR = 12; 198 199 /** 200 * Unicode category constant Zl. 201 */ 202 public static final byte LINE_SEPARATOR = 13; 203 204 /** 205 * Unicode category constant Zp. 206 */ 207 public static final byte PARAGRAPH_SEPARATOR = 14; 208 209 /** 210 * Unicode category constant Cc. 211 */ 212 public static final byte CONTROL = 15; 213 214 /** 215 * Unicode category constant Cf. 216 */ 217 public static final byte FORMAT = 16; 218 219 /** 220 * Unicode category constant Co. 221 */ 222 public static final byte PRIVATE_USE = 18; 223 224 /** 225 * Unicode category constant Cs. 226 */ 227 public static final byte SURROGATE = 19; 228 229 /** 230 * Unicode category constant Pd. 231 */ 232 public static final byte DASH_PUNCTUATION = 20; 233 234 /** 235 * Unicode category constant Ps. 236 */ 237 public static final byte START_PUNCTUATION = 21; 238 239 /** 240 * Unicode category constant Pe. 241 */ 242 public static final byte END_PUNCTUATION = 22; 243 244 /** 245 * Unicode category constant Pc. 246 */ 247 public static final byte CONNECTOR_PUNCTUATION = 23; 248 249 /** 250 * Unicode category constant Po. 251 */ 252 public static final byte OTHER_PUNCTUATION = 24; 253 254 /** 255 * Unicode category constant Sm. 256 */ 257 public static final byte MATH_SYMBOL = 25; 258 259 /** 260 * Unicode category constant Sc. 261 */ 262 public static final byte CURRENCY_SYMBOL = 26; 263 264 /** 265 * Unicode category constant Sk. 266 */ 267 public static final byte MODIFIER_SYMBOL = 27; 268 269 /** 270 * Unicode category constant So. 271 */ 272 public static final byte OTHER_SYMBOL = 28; 273 274 /** 275 * Unicode category constant Pi. 276 * 277 * @since 1.4 278 */ 279 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 280 281 /** 282 * Unicode category constant Pf. 283 * 284 * @since 1.4 285 */ 286 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 287 288 /** 289 * Unicode bidirectional constant. 290 * 291 * @since 1.4 292 */ 293 public static final byte DIRECTIONALITY_UNDEFINED = -1; 294 295 /** 296 * Unicode bidirectional constant L. 297 * 298 * @since 1.4 299 */ 300 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 301 302 /** 303 * Unicode bidirectional constant R. 304 * 305 * @since 1.4 306 */ 307 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 308 309 /** 310 * Unicode bidirectional constant AL. 311 * 312 * @since 1.4 313 */ 314 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 315 316 /** 317 * Unicode bidirectional constant EN. 318 * 319 * @since 1.4 320 */ 321 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 322 323 /** 324 * Unicode bidirectional constant ES. 325 * 326 * @since 1.4 327 */ 328 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 329 330 /** 331 * Unicode bidirectional constant ET. 332 * 333 * @since 1.4 334 */ 335 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 336 337 /** 338 * Unicode bidirectional constant AN. 339 * 340 * @since 1.4 341 */ 342 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 343 344 /** 345 * Unicode bidirectional constant CS. 346 * 347 * @since 1.4 348 */ 349 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 350 351 /** 352 * Unicode bidirectional constant NSM. 353 * 354 * @since 1.4 355 */ 356 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 357 358 /** 359 * Unicode bidirectional constant BN. 360 * 361 * @since 1.4 362 */ 363 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 364 365 /** 366 * Unicode bidirectional constant B. 367 * 368 * @since 1.4 369 */ 370 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 371 372 /** 373 * Unicode bidirectional constant S. 374 * 375 * @since 1.4 376 */ 377 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 378 379 /** 380 * Unicode bidirectional constant WS. 381 * 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_WHITESPACE = 12; 385 386 /** 387 * Unicode bidirectional constant ON. 388 * 389 * @since 1.4 390 */ 391 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 392 393 /** 394 * Unicode bidirectional constant LRE. 395 * 396 * @since 1.4 397 */ 398 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 399 400 /** 401 * Unicode bidirectional constant LRO. 402 * 403 * @since 1.4 404 */ 405 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 406 407 /** 408 * Unicode bidirectional constant RLE. 409 * 410 * @since 1.4 411 */ 412 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 413 414 /** 415 * Unicode bidirectional constant RLO. 416 * 417 * @since 1.4 418 */ 419 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 420 421 /** 422 * Unicode bidirectional constant PDF. 423 * 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 427 428 /** 429 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 430 * encoding, {@code '\uD800'}. 431 * 432 * @since 1.5 433 */ 434 public static final char MIN_HIGH_SURROGATE = '\uD800'; 435 436 /** 437 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 438 * encoding, {@code '\uDBFF'}. 439 * 440 * @since 1.5 441 */ 442 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 443 444 /** 445 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 446 * encoding, {@code '\uDC00'}. 447 * 448 * @since 1.5 449 */ 450 public static final char MIN_LOW_SURROGATE = '\uDC00'; 451 452 /** 453 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 454 * encoding, {@code '\uDFFF'}. 455 * 456 * @since 1.5 457 */ 458 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 459 460 /** 461 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 462 * 463 * @since 1.5 464 */ 465 public static final char MIN_SURROGATE = '\uD800'; 466 467 /** 468 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 469 * 470 * @since 1.5 471 */ 472 public static final char MAX_SURROGATE = '\uDFFF'; 473 474 /** 475 * The minimum value of a supplementary code point, {@code U+010000}. 476 * 477 * @since 1.5 478 */ 479 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 480 481 /** 482 * The minimum code point value, {@code U+0000}. 483 * 484 * @since 1.5 485 */ 486 public static final int MIN_CODE_POINT = 0x000000; 487 488 /** 489 * The maximum code point value, {@code U+10FFFF}. 490 * 491 * @since 1.5 492 */ 493 public static final int MAX_CODE_POINT = 0x10FFFF; 494 495 /** 496 * The number of bits required to represent a {@code Character} value 497 * unsigned form. 498 * 499 * @since 1.5 500 */ 501 public static final int SIZE = 16; 502 503 private static final byte[] DIRECTIONALITY = new byte[] { 504 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 505 DIRECTIONALITY_EUROPEAN_NUMBER, 506 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 507 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 508 DIRECTIONALITY_ARABIC_NUMBER, 509 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 510 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 511 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 512 DIRECTIONALITY_OTHER_NEUTRALS, 513 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 514 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 515 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 516 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 517 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 518 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 519 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 520 521 /* 522 * Represents a subset of the Unicode character set. 523 */ 524 public static class Subset { 525 String name; 526 527 /** 528 * Constructs a new {@code Subset}. 529 * 530 * @param string 531 * this subset's name. 532 */ 533 protected Subset(String string) { 534 if (string == null) { 535 throw new NullPointerException("string == null"); 536 } 537 name = string; 538 } 539 540 /** 541 * Compares this character subset with the specified object. Uses 542 * {@link java.lang.Object#equals(Object)} to do the comparison. 543 * 544 * @param object 545 * the object to compare this character subset with. 546 * @return {@code true} if {@code object} is this subset, that is, if 547 * {@code object == this}; {@code false} otherwise. 548 */ 549 @Override 550 public final boolean equals(Object object) { 551 return super.equals(object); 552 } 553 554 /** 555 * Returns the integer hash code for this character subset. 556 * 557 * @return this subset's hash code, which is the hash code computed by 558 * {@link java.lang.Object#hashCode()}. 559 */ 560 @Override 561 public final int hashCode() { 562 return super.hashCode(); 563 } 564 565 /** 566 * Returns the string representation of this subset. 567 * 568 * @return this subset's name. 569 */ 570 @Override 571 public final String toString() { 572 return name; 573 } 574 } 575 576 /** 577 * Represents a block of Unicode characters, as defined by the Unicode 4.0.1 578 * specification. 579 * 580 * @since 1.2 581 */ 582 public static final class UnicodeBlock extends Subset { 583 /** 584 * The "Surrogates Area" Unicode Block. 585 * 586 * @deprecated As of Java 5, this block has been replaced by 587 * {@link #HIGH_SURROGATES}, 588 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 589 * {@link #LOW_SURROGATES}. 590 */ 591 @Deprecated 592 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0); 593 /** 594 * The "Basic Latin" Unicode Block. 595 * 596 * @since 1.2 597 */ 598 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f); 599 /** 600 * The "Latin-1 Supplement" Unicode Block. 601 * 602 * @since 1.2 603 */ 604 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff); 605 /** 606 * The "Latin Extended-A" Unicode Block. 607 * 608 * @since 1.2 609 */ 610 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f); 611 /** 612 * The "Latin Extended-B" Unicode Block. 613 * 614 * @since 1.2 615 */ 616 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f); 617 /** 618 * The "IPA Extensions" Unicode Block. 619 * 620 * @since 1.2 621 */ 622 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af); 623 /** 624 * The "Spacing Modifier Letters" Unicode Block. 625 * 626 * @since 1.2 627 */ 628 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff); 629 /** 630 * The "Combining Diacritical Marks" Unicode Block. 631 * 632 * @since 1.2 633 */ 634 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f); 635 /** 636 * The "Greek and Coptic" Unicode Block. Previously referred 637 * to as "Greek". 638 * 639 * @since 1.2 640 */ 641 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff); 642 /** 643 * The "Cyrillic" Unicode Block. 644 * 645 * @since 1.2 646 */ 647 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff); 648 /** 649 * The "Cyrillic Supplement" Unicode Block. Previously 650 * referred to as "Cyrillic Supplementary". 651 * 652 * @since 1.5 653 */ 654 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f); 655 /** 656 * The "Armenian" Unicode Block. 657 * 658 * @since 1.2 659 */ 660 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f); 661 /** 662 * The "Hebrew" Unicode Block. 663 * 664 * @since 1.2 665 */ 666 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff); 667 /** 668 * The "Arabic" Unicode Block. 669 * 670 * @since 1.2 671 */ 672 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff); 673 /** 674 * The "Syriac" Unicode Block. 675 * 676 * @since 1.4 677 */ 678 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f); 679 /** 680 * The "Thaana" Unicode Block. 681 * 682 * @since 1.4 683 */ 684 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf); 685 /** 686 * The "Devanagari" Unicode Block. 687 * 688 * @since 1.2 689 */ 690 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f); 691 /** 692 * The "Bengali" Unicode Block. 693 * 694 * @since 1.2 695 */ 696 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff); 697 /** 698 * The "Gurmukhi" Unicode Block. 699 * 700 * @since 1.2 701 */ 702 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f); 703 /** 704 * The "Gujarati" Unicode Block. 705 * 706 * @since 1.2 707 */ 708 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff); 709 /** 710 * The "Oriya" Unicode Block. 711 * 712 * @since 1.2 713 */ 714 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f); 715 /** 716 * The "Tamil" Unicode Block. 717 * 718 * @since 1.2 719 */ 720 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff); 721 /** 722 * The "Telugu" Unicode Block. 723 * 724 * @since 1.2 725 */ 726 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f); 727 /** 728 * The "Kannada" Unicode Block. 729 * 730 * @since 1.2 731 */ 732 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff); 733 /** 734 * The "Malayalam" Unicode Block. 735 * 736 * @since 1.2 737 */ 738 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f); 739 /** 740 * The "Sinhala" Unicode Block. 741 * 742 * @since 1.4 743 */ 744 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff); 745 /** 746 * The "Thai" Unicode Block. 747 * 748 * @since 1.2 749 */ 750 public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f); 751 /** 752 * The "Lao" Unicode Block. 753 * 754 * @since 1.2 755 */ 756 public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff); 757 /** 758 * The "Tibetan" Unicode Block. 759 * 760 * @since 1.2 761 */ 762 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff); 763 /** 764 * The "Myanmar" Unicode Block. 765 * 766 * @since 1.4 767 */ 768 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f); 769 /** 770 * The "Georgian" Unicode Block. 771 * 772 * @since 1.2 773 */ 774 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff); 775 /** 776 * The "Hangul Jamo" Unicode Block. 777 * 778 * @since 1.2 779 */ 780 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff); 781 /** 782 * The "Ethiopic" Unicode Block. 783 * 784 * @since 1.4 785 */ 786 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f); 787 /** 788 * The "Cherokee" Unicode Block. 789 * 790 * @since 1.4 791 */ 792 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff); 793 /** 794 * The "Unified Canadian Aboriginal Syllabics" Unicode Block. 795 * 796 * @since 1.4 797 */ 798 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f); 799 /** 800 * The "Ogham" Unicode Block. 801 * 802 * @since 1.4 803 */ 804 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f); 805 /** 806 * The "Runic" Unicode Block. 807 * 808 * @since 1.4 809 */ 810 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff); 811 /** 812 * The "Tagalog" Unicode Block. 813 * 814 * @since 1.5 815 */ 816 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f); 817 /** 818 * The "Hanunoo" Unicode Block. 819 * 820 * @since 1.5 821 */ 822 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f); 823 /** 824 * The "Buhid" Unicode Block. 825 * 826 * @since 1.5 827 */ 828 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f); 829 /** 830 * The "Tagbanwa" Unicode Block. 831 * 832 * @since 1.5 833 */ 834 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f); 835 /** 836 * The "Khmer" Unicode Block. 837 * 838 * @since 1.4 839 */ 840 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff); 841 /** 842 * The "Mongolian" Unicode Block. 843 * 844 * @since 1.4 845 */ 846 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af); 847 /** 848 * The "Limbu" Unicode Block. 849 * 850 * @since 1.5 851 */ 852 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f); 853 /** 854 * The "Tai Le" Unicode Block. 855 * 856 * @since 1.5 857 */ 858 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f); 859 /** 860 * The "Khmer Symbols" Unicode Block. 861 * 862 * @since 1.5 863 */ 864 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff); 865 /** 866 * The "Phonetic Extensions" Unicode Block. 867 * 868 * @since 1.5 869 */ 870 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f); 871 /** 872 * The "Latin Extended Additional" Unicode Block. 873 * 874 * @since 1.2 875 */ 876 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff); 877 /** 878 * The "Greek Extended" Unicode Block. 879 * 880 * @since 1.2 881 */ 882 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff); 883 /** 884 * The "General Punctuation" Unicode Block. 885 * 886 * @since 1.2 887 */ 888 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f); 889 /** 890 * The "Superscripts and Subscripts" Unicode Block. 891 * 892 * @since 1.2 893 */ 894 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f); 895 /** 896 * The "Currency Symbols" Unicode Block. 897 * 898 * @since 1.2 899 */ 900 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf); 901 /** 902 * The "Combining Diacritical Marks for Symbols" Unicode 903 * Block. Previously referred to as "Combining Marks for 904 * Symbols". 905 * 906 * @since 1.2 907 */ 908 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff); 909 /** 910 * The "Letterlike Symbols" Unicode Block. 911 * 912 * @since 1.2 913 */ 914 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f); 915 /** 916 * The "Number Forms" Unicode Block. 917 * 918 * @since 1.2 919 */ 920 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f); 921 /** 922 * The "Arrows" Unicode Block. 923 * 924 * @since 1.2 925 */ 926 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff); 927 /** 928 * The "Mathematical Operators" Unicode Block. 929 * 930 * @since 1.2 931 */ 932 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff); 933 /** 934 * The "Miscellaneous Technical" Unicode Block. 935 * 936 * @since 1.2 937 */ 938 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff); 939 /** 940 * The "Control Pictures" Unicode Block. 941 * 942 * @since 1.2 943 */ 944 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f); 945 /** 946 * The "Optical Character Recognition" Unicode Block. 947 * 948 * @since 1.2 949 */ 950 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f); 951 /** 952 * The "Enclosed Alphanumerics" Unicode Block. 953 * 954 * @since 1.2 955 */ 956 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff); 957 /** 958 * The "Box Drawing" Unicode Block. 959 * 960 * @since 1.2 961 */ 962 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f); 963 /** 964 * The "Block Elements" Unicode Block. 965 * 966 * @since 1.2 967 */ 968 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f); 969 /** 970 * The "Geometric Shapes" Unicode Block. 971 * 972 * @since 1.2 973 */ 974 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff); 975 /** 976 * The "Miscellaneous Symbols" Unicode Block. 977 * 978 * @since 1.2 979 */ 980 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff); 981 /** 982 * The "Dingbats" Unicode Block. 983 * 984 * @since 1.2 985 */ 986 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf); 987 /** 988 * The "Miscellaneous Mathematical Symbols-A" Unicode Block. 989 * 990 * @since 1.5 991 */ 992 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef); 993 /** 994 * The "Supplemental Arrows-A" Unicode Block. 995 * 996 * @since 1.5 997 */ 998 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff); 999 /** 1000 * The "Braille Patterns" Unicode Block. 1001 * 1002 * @since 1.4 1003 */ 1004 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff); 1005 /** 1006 * The "Supplemental Arrows-B" Unicode Block. 1007 * 1008 * @since 1.5 1009 */ 1010 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f); 1011 /** 1012 * The "Miscellaneous Mathematical Symbols-B" Unicode Block. 1013 * 1014 * @since 1.5 1015 */ 1016 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff); 1017 /** 1018 * The "Supplemental Mathematical Operators" Unicode Block. 1019 * 1020 * @since 1.5 1021 */ 1022 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff); 1023 /** 1024 * The "Miscellaneous Symbols and Arrows" Unicode Block. 1025 * 1026 * @since 1.2 1027 */ 1028 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff); 1029 /** 1030 * The "CJK Radicals Supplement" Unicode Block. 1031 * 1032 * @since 1.4 1033 */ 1034 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff); 1035 /** 1036 * The "Kangxi Radicals" Unicode Block. 1037 * 1038 * @since 1.4 1039 */ 1040 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf); 1041 /** 1042 * The "Ideographic Description Characters" Unicode Block. 1043 * 1044 * @since 1.4 1045 */ 1046 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff); 1047 /** 1048 * The "CJK Symbols and Punctuation" Unicode Block. 1049 * 1050 * @since 1.2 1051 */ 1052 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f); 1053 /** 1054 * The "Hiragana" Unicode Block. 1055 * 1056 * @since 1.2 1057 */ 1058 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f); 1059 /** 1060 * The "Katakana" Unicode Block. 1061 * 1062 * @since 1.2 1063 */ 1064 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff); 1065 /** 1066 * The "Bopomofo" Unicode Block. 1067 * 1068 * @since 1.2 1069 */ 1070 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f); 1071 /** 1072 * The "Hangul Compatibility Jamo" Unicode Block. 1073 * 1074 * @since 1.2 1075 */ 1076 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f); 1077 /** 1078 * The "Kanbun" Unicode Block. 1079 * 1080 * @since 1.2 1081 */ 1082 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f); 1083 /** 1084 * The "Bopomofo Extended" Unicode Block. 1085 * 1086 * @since 1.4 1087 */ 1088 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf); 1089 /** 1090 * The "Katakana Phonetic Extensions" Unicode Block. 1091 * 1092 * @since 1.5 1093 */ 1094 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff); 1095 /** 1096 * The "Enclosed CJK Letters and Months" Unicode Block. 1097 * 1098 * @since 1.2 1099 */ 1100 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff); 1101 /** 1102 * The "CJK Compatibility" Unicode Block. 1103 * 1104 * @since 1.2 1105 */ 1106 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff); 1107 /** 1108 * The "CJK Unified Ideographs Extension A" Unicode Block. 1109 * 1110 * @since 1.4 1111 */ 1112 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf); 1113 /** 1114 * The "Yijing Hexagram Symbols" Unicode Block. 1115 * 1116 * @since 1.5 1117 */ 1118 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff); 1119 /** 1120 * The "CJK Unified Ideographs" Unicode Block. 1121 * 1122 * @since 1.2 1123 */ 1124 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff); 1125 /** 1126 * The "Yi Syllables" Unicode Block. 1127 * 1128 * @since 1.4 1129 */ 1130 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f); 1131 /** 1132 * The "Yi Radicals" Unicode Block. 1133 * 1134 * @since 1.4 1135 */ 1136 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf); 1137 /** 1138 * The "Hangul Syllables" Unicode Block. 1139 * 1140 * @since 1.2 1141 */ 1142 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af); 1143 /** 1144 * The "High Surrogates" Unicode Block. This block represents 1145 * code point values in the high surrogate range 0xD800 to 0xDB7F 1146 */ 1147 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f); 1148 /** 1149 * The "High Private Use Surrogates" Unicode Block. This block 1150 * represents code point values in the high surrogate range 0xDB80 to 1151 * 0xDBFF 1152 */ 1153 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff); 1154 /** 1155 * The "Low Surrogates" Unicode Block. This block represents 1156 * code point values in the low surrogate range 0xDC00 to 0xDFFF 1157 */ 1158 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff); 1159 /** 1160 * The "Private Use Area" Unicode Block. 1161 * 1162 * @since 1.2 1163 */ 1164 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff); 1165 /** 1166 * The "CJK Compatibility Ideographs" Unicode Block. 1167 * 1168 * @since 1.2 1169 */ 1170 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff); 1171 /** 1172 * The "Alphabetic Presentation Forms" Unicode Block. 1173 * 1174 * @since 1.2 1175 */ 1176 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f); 1177 /** 1178 * The "Arabic Presentation Forms-A" Unicode Block. 1179 * 1180 * @since 1.2 1181 */ 1182 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff); 1183 /** 1184 * The "Variation Selectors" Unicode Block. 1185 * 1186 * @since 1.5 1187 */ 1188 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f); 1189 /** 1190 * The "Combining Half Marks" Unicode Block. 1191 * 1192 * @since 1.2 1193 */ 1194 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f); 1195 /** 1196 * The "CJK Compatibility Forms" Unicode Block. 1197 * 1198 * @since 1.2 1199 */ 1200 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f); 1201 /** 1202 * The "Small Form Variants" Unicode Block. 1203 * 1204 * @since 1.2 1205 */ 1206 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f); 1207 /** 1208 * The "Arabic Presentation Forms-B" Unicode Block. 1209 * 1210 * @since 1.2 1211 */ 1212 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff); 1213 /** 1214 * The "Halfwidth and Fullwidth Forms" Unicode Block. 1215 * 1216 * @since 1.2 1217 */ 1218 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef); 1219 /** 1220 * The "Specials" Unicode Block. 1221 * 1222 * @since 1.2 1223 */ 1224 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff); 1225 /** 1226 * The "Linear B Syllabary" Unicode Block. 1227 * 1228 * @since 1.2 1229 */ 1230 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f); 1231 /** 1232 * The "Linear B Ideograms" Unicode Block. 1233 * 1234 * @since 1.5 1235 */ 1236 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff); 1237 /** 1238 * The "Aegean Numbers" Unicode Block. 1239 * 1240 * @since 1.5 1241 */ 1242 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f); 1243 /** 1244 * The "Old Italic" Unicode Block. 1245 * 1246 * @since 1.5 1247 */ 1248 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f); 1249 /** 1250 * The "Gothic" Unicode Block. 1251 * 1252 * @since 1.5 1253 */ 1254 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f); 1255 /** 1256 * The "Ugaritic" Unicode Block. 1257 * 1258 * @since 1.5 1259 */ 1260 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f); 1261 /** 1262 * The "Deseret" Unicode Block. 1263 * 1264 * @since 1.5 1265 */ 1266 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f); 1267 /** 1268 * The "Shavian" Unicode Block. 1269 * 1270 * @since 1.5 1271 */ 1272 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f); 1273 /** 1274 * The "Osmanya" Unicode Block. 1275 * 1276 * @since 1.5 1277 */ 1278 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af); 1279 /** 1280 * The "Cypriot Syllabary" Unicode Block. 1281 * 1282 * @since 1.5 1283 */ 1284 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f); 1285 /** 1286 * The "Byzantine Musical Symbols" Unicode Block. 1287 * 1288 * @since 1.5 1289 */ 1290 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff); 1291 /** 1292 * The "Musical Symbols" Unicode Block. 1293 * 1294 * @since 1.5 1295 */ 1296 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff); 1297 /** 1298 * The "Tai Xuan Jing Symbols" Unicode Block. 1299 * 1300 * @since 1.5 1301 */ 1302 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f); 1303 /** 1304 * The "Mathematical Alphanumeric Symbols" Unicode Block. 1305 * 1306 * @since 1.5 1307 */ 1308 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff); 1309 /** 1310 * The "CJK Unified Ideographs Extension B" Unicode Block. 1311 * 1312 * @since 1.5 1313 */ 1314 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df); 1315 /** 1316 * The "CJK Compatibility Ideographs Supplement" Unicode Block. 1317 * 1318 * @since 1.5 1319 */ 1320 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f); 1321 /** 1322 * The "Tags" Unicode Block. 1323 * 1324 * @since 1.5 1325 */ 1326 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f); 1327 /** 1328 * The "Variation Selectors Supplement" Unicode Block. 1329 * 1330 * @since 1.5 1331 */ 1332 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef); 1333 /** 1334 * The "Supplementary Private Use Area-A" Unicode Block. 1335 * 1336 * @since 1.5 1337 */ 1338 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff); 1339 /** 1340 * The "Supplementary Private Use Area-B" Unicode Block. 1341 * 1342 * @since 1.5 1343 */ 1344 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff); 1345 1346 /* 1347 * All of the UnicodeBlocks with valid ranges in ascending order. 1348 */ 1349 private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] { 1350 null, 1351 UnicodeBlock.BASIC_LATIN, 1352 UnicodeBlock.LATIN_1_SUPPLEMENT, 1353 UnicodeBlock.LATIN_EXTENDED_A, 1354 UnicodeBlock.LATIN_EXTENDED_B, 1355 UnicodeBlock.IPA_EXTENSIONS, 1356 UnicodeBlock.SPACING_MODIFIER_LETTERS, 1357 UnicodeBlock.COMBINING_DIACRITICAL_MARKS, 1358 UnicodeBlock.GREEK, 1359 UnicodeBlock.CYRILLIC, 1360 UnicodeBlock.ARMENIAN, 1361 UnicodeBlock.HEBREW, 1362 UnicodeBlock.ARABIC, 1363 UnicodeBlock.SYRIAC, 1364 UnicodeBlock.THAANA, 1365 UnicodeBlock.DEVANAGARI, 1366 UnicodeBlock.BENGALI, 1367 UnicodeBlock.GURMUKHI, 1368 UnicodeBlock.GUJARATI, 1369 UnicodeBlock.ORIYA, 1370 UnicodeBlock.TAMIL, 1371 UnicodeBlock.TELUGU, 1372 UnicodeBlock.KANNADA, 1373 UnicodeBlock.MALAYALAM, 1374 UnicodeBlock.SINHALA, 1375 UnicodeBlock.THAI, 1376 UnicodeBlock.LAO, 1377 UnicodeBlock.TIBETAN, 1378 UnicodeBlock.MYANMAR, 1379 UnicodeBlock.GEORGIAN, 1380 UnicodeBlock.HANGUL_JAMO, 1381 UnicodeBlock.ETHIOPIC, 1382 UnicodeBlock.CHEROKEE, 1383 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 1384 UnicodeBlock.OGHAM, 1385 UnicodeBlock.RUNIC, 1386 UnicodeBlock.KHMER, 1387 UnicodeBlock.MONGOLIAN, 1388 UnicodeBlock.LATIN_EXTENDED_ADDITIONAL, 1389 UnicodeBlock.GREEK_EXTENDED, 1390 UnicodeBlock.GENERAL_PUNCTUATION, 1391 UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS, 1392 UnicodeBlock.CURRENCY_SYMBOLS, 1393 UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS, 1394 UnicodeBlock.LETTERLIKE_SYMBOLS, 1395 UnicodeBlock.NUMBER_FORMS, 1396 UnicodeBlock.ARROWS, 1397 UnicodeBlock.MATHEMATICAL_OPERATORS, 1398 UnicodeBlock.MISCELLANEOUS_TECHNICAL, 1399 UnicodeBlock.CONTROL_PICTURES, 1400 UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION, 1401 UnicodeBlock.ENCLOSED_ALPHANUMERICS, 1402 UnicodeBlock.BOX_DRAWING, 1403 UnicodeBlock.BLOCK_ELEMENTS, 1404 UnicodeBlock.GEOMETRIC_SHAPES, 1405 UnicodeBlock.MISCELLANEOUS_SYMBOLS, 1406 UnicodeBlock.DINGBATS, 1407 UnicodeBlock.BRAILLE_PATTERNS, 1408 UnicodeBlock.CJK_RADICALS_SUPPLEMENT, 1409 UnicodeBlock.KANGXI_RADICALS, 1410 UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 1411 UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION, 1412 UnicodeBlock.HIRAGANA, 1413 UnicodeBlock.KATAKANA, 1414 UnicodeBlock.BOPOMOFO, 1415 UnicodeBlock.HANGUL_COMPATIBILITY_JAMO, 1416 UnicodeBlock.KANBUN, 1417 UnicodeBlock.BOPOMOFO_EXTENDED, 1418 UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS, 1419 UnicodeBlock.CJK_COMPATIBILITY, 1420 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 1421 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, 1422 UnicodeBlock.YI_SYLLABLES, 1423 UnicodeBlock.YI_RADICALS, 1424 UnicodeBlock.HANGUL_SYLLABLES, 1425 UnicodeBlock.HIGH_SURROGATES, 1426 UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES, 1427 UnicodeBlock.LOW_SURROGATES, 1428 UnicodeBlock.PRIVATE_USE_AREA, 1429 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS, 1430 UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS, 1431 UnicodeBlock.ARABIC_PRESENTATION_FORMS_A, 1432 UnicodeBlock.COMBINING_HALF_MARKS, 1433 UnicodeBlock.CJK_COMPATIBILITY_FORMS, 1434 UnicodeBlock.SMALL_FORM_VARIANTS, 1435 UnicodeBlock.ARABIC_PRESENTATION_FORMS_B, 1436 UnicodeBlock.SPECIALS, 1437 UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, 1438 UnicodeBlock.OLD_ITALIC, 1439 UnicodeBlock.GOTHIC, 1440 UnicodeBlock.DESERET, 1441 UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS, 1442 UnicodeBlock.MUSICAL_SYMBOLS, 1443 UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 1444 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 1445 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 1446 UnicodeBlock.TAGS, 1447 UnicodeBlock.CYRILLIC_SUPPLEMENTARY, 1448 UnicodeBlock.TAGALOG, 1449 UnicodeBlock.HANUNOO, 1450 UnicodeBlock.BUHID, 1451 UnicodeBlock.TAGBANWA, 1452 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 1453 UnicodeBlock.SUPPLEMENTAL_ARROWS_A, 1454 UnicodeBlock.SUPPLEMENTAL_ARROWS_B, 1455 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 1456 UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 1457 UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS, 1458 UnicodeBlock.VARIATION_SELECTORS, 1459 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, 1460 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B, 1461 UnicodeBlock.LIMBU, 1462 UnicodeBlock.TAI_LE, 1463 UnicodeBlock.KHMER_SYMBOLS, 1464 UnicodeBlock.PHONETIC_EXTENSIONS, 1465 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS, 1466 UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS, 1467 UnicodeBlock.LINEAR_B_SYLLABARY, 1468 UnicodeBlock.LINEAR_B_IDEOGRAMS, 1469 UnicodeBlock.AEGEAN_NUMBERS, 1470 UnicodeBlock.UGARITIC, 1471 UnicodeBlock.SHAVIAN, 1472 UnicodeBlock.OSMANYA, 1473 UnicodeBlock.CYPRIOT_SYLLABARY, 1474 UnicodeBlock.TAI_XUAN_JING_SYMBOLS, 1475 UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT 1476 }; 1477 1478 /** 1479 * Retrieves the constant that corresponds to the specified block name. 1480 * The block names are defined by the Unicode 4.0.1 specification in the 1481 * {@code Blocks-4.0.1.txt} file. 1482 * <p> 1483 * Block names may be one of the following: 1484 * <ul> 1485 * <li>Canonical block name, as defined by the Unicode specification; 1486 * case-insensitive.</li> 1487 * <li>Canonical block name without any spaces, as defined by the 1488 * Unicode specification; case-insensitive.</li> 1489 * <li>{@code UnicodeBlock} constant identifier. This is determined by 1490 * uppercasing the canonical name and replacing all spaces and hyphens 1491 * with underscores.</li> 1492 * </ul> 1493 * 1494 * @param blockName 1495 * the name of the block to retrieve. 1496 * @return the UnicodeBlock constant corresponding to {@code blockName}. 1497 * @throws NullPointerException 1498 * if {@code blockName} is {@code null}. 1499 * @throws IllegalArgumentException 1500 * if {@code blockName} is not a valid block name. 1501 * @since 1.5 1502 */ 1503 public static UnicodeBlock forName(String blockName) { 1504 if (blockName == null) { 1505 throw new NullPointerException("blockName == null"); 1506 } 1507 int block = forNameImpl(blockName); 1508 if (block == -1) { 1509 if (blockName.equals("SURROGATES_AREA")) { 1510 return SURROGATES_AREA; 1511 } else if(blockName.equalsIgnoreCase("greek")) { 1512 return GREEK; 1513 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") || 1514 blockName.equals("Combining Marks for Symbols") || 1515 blockName.equals("CombiningMarksforSymbols")) { 1516 return COMBINING_MARKS_FOR_SYMBOLS; 1517 } 1518 throw new IllegalArgumentException("Bad block name: " + blockName); 1519 } 1520 return BLOCKS[block]; 1521 } 1522 1523 /** 1524 * Gets the constant for the Unicode block that contains the specified 1525 * character. 1526 * 1527 * @param c 1528 * the character for which to get the {@code UnicodeBlock} 1529 * constant. 1530 * @return the {@code UnicodeBlock} constant for the block that contains 1531 * {@code c}, or {@code null} if {@code c} does not belong to 1532 * any defined block. 1533 */ 1534 public static UnicodeBlock of(char c) { 1535 return of((int) c); 1536 } 1537 1538 /** 1539 * Gets the constant for the Unicode block that contains the specified 1540 * Unicode code point. 1541 * 1542 * @param codePoint 1543 * the Unicode code point for which to get the 1544 * {@code UnicodeBlock} constant. 1545 * @return the {@code UnicodeBlock} constant for the block that contains 1546 * {@code codePoint}, or {@code null} if {@code codePoint} does 1547 * not belong to any defined block. 1548 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 1549 * @since 1.5 1550 */ 1551 public static UnicodeBlock of(int codePoint) { 1552 checkValidCodePoint(codePoint); 1553 int block = ofImpl(codePoint); 1554 if (block == -1 || block >= BLOCKS.length) { 1555 return null; 1556 } 1557 return BLOCKS[block]; 1558 } 1559 1560 private UnicodeBlock(String blockName, int start, int end) { 1561 super(blockName); 1562 } 1563 } 1564 1565 private static native int forNameImpl(String blockName); 1566 1567 private static native int ofImpl(int codePoint); 1568 1569 /** 1570 * Constructs a new {@code Character} with the specified primitive char 1571 * value. 1572 * 1573 * @param value 1574 * the primitive char value to store in the new instance. 1575 */ 1576 public Character(char value) { 1577 this.value = value; 1578 } 1579 1580 /** 1581 * Gets the primitive value of this character. 1582 * 1583 * @return this object's primitive value. 1584 */ 1585 public char charValue() { 1586 return value; 1587 } 1588 1589 private static void checkValidCodePoint(int codePoint) { 1590 if (!isValidCodePoint(codePoint)) { 1591 throw new IllegalArgumentException("Invalid code point: " + codePoint); 1592 } 1593 } 1594 1595 /** 1596 * Compares this object to the specified character object to determine their 1597 * relative order. 1598 * 1599 * @param c 1600 * the character object to compare this object to. 1601 * @return {@code 0} if the value of this character and the value of 1602 * {@code c} are equal; a positive value if the value of this 1603 * character is greater than the value of {@code c}; a negative 1604 * value if the value of this character is less than the value of 1605 * {@code c}. 1606 * @see java.lang.Comparable 1607 * @since 1.2 1608 */ 1609 public int compareTo(Character c) { 1610 return compare(value, c.value); 1611 } 1612 1613 /** 1614 * Compares two {@code char} values. 1615 * @return 0 if lhs = rhs, less than 0 if lhs < rhs, and greater than 0 if lhs > rhs. 1616 * @since 1.7 1617 * @hide 1.7 1618 */ 1619 public static int compare(char lhs, char rhs) { 1620 return lhs - rhs; 1621 } 1622 1623 /** 1624 * Returns a {@code Character} instance for the {@code char} value passed. 1625 * <p> 1626 * If it is not necessary to get a new {@code Character} instance, it is 1627 * recommended to use this method instead of the constructor, since it 1628 * maintains a cache of instances which may result in better performance. 1629 * 1630 * @param c 1631 * the char value for which to get a {@code Character} instance. 1632 * @return the {@code Character} instance for {@code c}. 1633 * @since 1.5 1634 */ 1635 public static Character valueOf(char c) { 1636 return c < 128 ? SMALL_VALUES[c] : new Character(c); 1637 } 1638 1639 /** 1640 * A cache of instances used by {@link #valueOf(char)} and auto-boxing 1641 */ 1642 private static final Character[] SMALL_VALUES = new Character[128]; 1643 1644 static { 1645 for (int i = 0; i < 128; i++) { 1646 SMALL_VALUES[i] = new Character((char) i); 1647 } 1648 } 1649 /** 1650 * Indicates whether {@code codePoint} is a valid Unicode code point. 1651 * 1652 * @param codePoint 1653 * the code point to test. 1654 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 1655 * {@code false} otherwise. 1656 * @since 1.5 1657 */ 1658 public static boolean isValidCodePoint(int codePoint) { 1659 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1660 } 1661 1662 /** 1663 * Indicates whether {@code codePoint} is within the supplementary code 1664 * point range. 1665 * 1666 * @param codePoint 1667 * the code point to test. 1668 * @return {@code true} if {@code codePoint} is within the supplementary 1669 * code point range; {@code false} otherwise. 1670 * @since 1.5 1671 */ 1672 public static boolean isSupplementaryCodePoint(int codePoint) { 1673 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1674 } 1675 1676 /** 1677 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 1678 * that is used for representing supplementary characters in UTF-16 1679 * encoding. 1680 * 1681 * @param ch 1682 * the character to test. 1683 * @return {@code true} if {@code ch} is a high-surrogate code unit; 1684 * {@code false} otherwise. 1685 * @see #isLowSurrogate(char) 1686 * @since 1.5 1687 */ 1688 public static boolean isHighSurrogate(char ch) { 1689 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1690 } 1691 1692 /** 1693 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 1694 * that is used for representing supplementary characters in UTF-16 1695 * encoding. 1696 * 1697 * @param ch 1698 * the character to test. 1699 * @return {@code true} if {@code ch} is a low-surrogate code unit; 1700 * {@code false} otherwise. 1701 * @see #isHighSurrogate(char) 1702 * @since 1.5 1703 */ 1704 public static boolean isLowSurrogate(char ch) { 1705 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1706 } 1707 1708 /** 1709 * Tests whether the given character is a high or low surrogate. 1710 * @since 1.7 1711 * @hide 1.7 1712 */ 1713 public static boolean isSurrogate(char ch) { 1714 return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE; 1715 } 1716 1717 /** 1718 * Indicates whether the specified character pair is a valid surrogate pair. 1719 * 1720 * @param high 1721 * the high surrogate unit to test. 1722 * @param low 1723 * the low surrogate unit to test. 1724 * @return {@code true} if {@code high} is a high-surrogate code unit and 1725 * {@code low} is a low-surrogate code unit; {@code false} 1726 * otherwise. 1727 * @see #isHighSurrogate(char) 1728 * @see #isLowSurrogate(char) 1729 * @since 1.5 1730 */ 1731 public static boolean isSurrogatePair(char high, char low) { 1732 return (isHighSurrogate(high) && isLowSurrogate(low)); 1733 } 1734 1735 /** 1736 * Calculates the number of {@code char} values required to represent the 1737 * specified Unicode code point. This method checks if the {@code codePoint} 1738 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 1739 * returned, otherwise {@code 1}. To test if the code point is valid, use 1740 * the {@link #isValidCodePoint(int)} method. 1741 * 1742 * @param codePoint 1743 * the code point for which to calculate the number of required 1744 * chars. 1745 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 1746 * @see #isValidCodePoint(int) 1747 * @see #isSupplementaryCodePoint(int) 1748 * @since 1.5 1749 */ 1750 public static int charCount(int codePoint) { 1751 return (codePoint >= 0x10000 ? 2 : 1); 1752 } 1753 1754 /** 1755 * Converts a surrogate pair into a Unicode code point. This method assumes 1756 * that the pair are valid surrogates. If the pair are <i>not</i> valid 1757 * surrogates, then the result is indeterminate. The 1758 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1759 * method to validate the pair. 1760 * 1761 * @param high 1762 * the high surrogate unit. 1763 * @param low 1764 * the low surrogate unit. 1765 * @return the Unicode code point corresponding to the surrogate unit pair. 1766 * @see #isSurrogatePair(char, char) 1767 * @since 1.5 1768 */ 1769 public static int toCodePoint(char high, char low) { 1770 // See RFC 2781, Section 2.2 1771 // http://www.ietf.org/rfc/rfc2781.txt 1772 int h = (high & 0x3FF) << 10; 1773 int l = low & 0x3FF; 1774 return (h | l) + 0x10000; 1775 } 1776 1777 /** 1778 * Returns the code point at {@code index} in the specified sequence of 1779 * character units. If the unit at {@code index} is a high-surrogate unit, 1780 * {@code index + 1} is less than the length of the sequence and the unit at 1781 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1782 * point represented by the pair is returned; otherwise the {@code char} 1783 * value at {@code index} is returned. 1784 * 1785 * @param seq 1786 * the source sequence of {@code char} units. 1787 * @param index 1788 * the position in {@code seq} from which to retrieve the code 1789 * point. 1790 * @return the Unicode code point or {@code char} value at {@code index} in 1791 * {@code seq}. 1792 * @throws NullPointerException 1793 * if {@code seq} is {@code null}. 1794 * @throws IndexOutOfBoundsException 1795 * if the {@code index} is negative or greater than or equal to 1796 * the length of {@code seq}. 1797 * @since 1.5 1798 */ 1799 public static int codePointAt(CharSequence seq, int index) { 1800 if (seq == null) { 1801 throw new NullPointerException("seq == null"); 1802 } 1803 int len = seq.length(); 1804 if (index < 0 || index >= len) { 1805 throw new IndexOutOfBoundsException(); 1806 } 1807 1808 char high = seq.charAt(index++); 1809 if (index >= len) { 1810 return high; 1811 } 1812 char low = seq.charAt(index); 1813 if (isSurrogatePair(high, low)) { 1814 return toCodePoint(high, low); 1815 } 1816 return high; 1817 } 1818 1819 /** 1820 * Returns the code point at {@code index} in the specified array of 1821 * character units. If the unit at {@code index} is a high-surrogate unit, 1822 * {@code index + 1} is less than the length of the array and the unit at 1823 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1824 * point represented by the pair is returned; otherwise the {@code char} 1825 * value at {@code index} is returned. 1826 * 1827 * @param seq 1828 * the source array of {@code char} units. 1829 * @param index 1830 * the position in {@code seq} from which to retrieve the code 1831 * point. 1832 * @return the Unicode code point or {@code char} value at {@code index} in 1833 * {@code seq}. 1834 * @throws NullPointerException 1835 * if {@code seq} is {@code null}. 1836 * @throws IndexOutOfBoundsException 1837 * if the {@code index} is negative or greater than or equal to 1838 * the length of {@code seq}. 1839 * @since 1.5 1840 */ 1841 public static int codePointAt(char[] seq, int index) { 1842 if (seq == null) { 1843 throw new NullPointerException("seq == null"); 1844 } 1845 int len = seq.length; 1846 if (index < 0 || index >= len) { 1847 throw new IndexOutOfBoundsException(); 1848 } 1849 1850 char high = seq[index++]; 1851 if (index >= len) { 1852 return high; 1853 } 1854 char low = seq[index]; 1855 if (isSurrogatePair(high, low)) { 1856 return toCodePoint(high, low); 1857 } 1858 return high; 1859 } 1860 1861 /** 1862 * Returns the code point at {@code index} in the specified array of 1863 * character units, where {@code index} has to be less than {@code limit}. 1864 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 1865 * is less than {@code limit} and the unit at {@code index + 1} is a 1866 * low-surrogate unit, then the supplementary code point represented by the 1867 * pair is returned; otherwise the {@code char} value at {@code index} is 1868 * returned. 1869 * 1870 * @param seq 1871 * the source array of {@code char} units. 1872 * @param index 1873 * the position in {@code seq} from which to get the code point. 1874 * @param limit 1875 * the index after the last unit in {@code seq} that can be used. 1876 * @return the Unicode code point or {@code char} value at {@code index} in 1877 * {@code seq}. 1878 * @throws NullPointerException 1879 * if {@code seq} is {@code null}. 1880 * @throws IndexOutOfBoundsException 1881 * if {@code index < 0}, {@code index >= limit}, 1882 * {@code limit < 0} or if {@code limit} is greater than the 1883 * length of {@code seq}. 1884 * @since 1.5 1885 */ 1886 public static int codePointAt(char[] seq, int index, int limit) { 1887 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1888 throw new IndexOutOfBoundsException(); 1889 } 1890 1891 char high = seq[index++]; 1892 if (index >= limit) { 1893 return high; 1894 } 1895 char low = seq[index]; 1896 if (isSurrogatePair(high, low)) { 1897 return toCodePoint(high, low); 1898 } 1899 return high; 1900 } 1901 1902 /** 1903 * Returns the code point that precedes {@code index} in the specified 1904 * sequence of character units. If the unit at {@code index - 1} is a 1905 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1906 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1907 * point represented by the pair is returned; otherwise the {@code char} 1908 * value at {@code index - 1} is returned. 1909 * 1910 * @param seq 1911 * the source sequence of {@code char} units. 1912 * @param index 1913 * the position in {@code seq} following the code 1914 * point that should be returned. 1915 * @return the Unicode code point or {@code char} value before {@code index} 1916 * in {@code seq}. 1917 * @throws NullPointerException 1918 * if {@code seq} is {@code null}. 1919 * @throws IndexOutOfBoundsException 1920 * if the {@code index} is less than 1 or greater than the 1921 * length of {@code seq}. 1922 * @since 1.5 1923 */ 1924 public static int codePointBefore(CharSequence seq, int index) { 1925 if (seq == null) { 1926 throw new NullPointerException("seq == null"); 1927 } 1928 int len = seq.length(); 1929 if (index < 1 || index > len) { 1930 throw new IndexOutOfBoundsException(); 1931 } 1932 1933 char low = seq.charAt(--index); 1934 if (--index < 0) { 1935 return low; 1936 } 1937 char high = seq.charAt(index); 1938 if (isSurrogatePair(high, low)) { 1939 return toCodePoint(high, low); 1940 } 1941 return low; 1942 } 1943 1944 /** 1945 * Returns the code point that precedes {@code index} in the specified 1946 * array of character units. If the unit at {@code index - 1} is a 1947 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1948 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1949 * point represented by the pair is returned; otherwise the {@code char} 1950 * value at {@code index - 1} is returned. 1951 * 1952 * @param seq 1953 * the source array of {@code char} units. 1954 * @param index 1955 * the position in {@code seq} following the code 1956 * point that should be returned. 1957 * @return the Unicode code point or {@code char} value before {@code index} 1958 * in {@code seq}. 1959 * @throws NullPointerException 1960 * if {@code seq} is {@code null}. 1961 * @throws IndexOutOfBoundsException 1962 * if the {@code index} is less than 1 or greater than the 1963 * length of {@code seq}. 1964 * @since 1.5 1965 */ 1966 public static int codePointBefore(char[] seq, int index) { 1967 if (seq == null) { 1968 throw new NullPointerException("seq == null"); 1969 } 1970 int len = seq.length; 1971 if (index < 1 || index > len) { 1972 throw new IndexOutOfBoundsException(); 1973 } 1974 1975 char low = seq[--index]; 1976 if (--index < 0) { 1977 return low; 1978 } 1979 char high = seq[index]; 1980 if (isSurrogatePair(high, low)) { 1981 return toCodePoint(high, low); 1982 } 1983 return low; 1984 } 1985 1986 /** 1987 * Returns the code point that precedes the {@code index} in the specified 1988 * array of character units and is not less than {@code start}. If the unit 1989 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 1990 * less than {@code start} and the unit at {@code index - 2} is a 1991 * high-surrogate unit, then the supplementary code point represented by the 1992 * pair is returned; otherwise the {@code char} value at {@code index - 1} 1993 * is returned. 1994 * 1995 * @param seq 1996 * the source array of {@code char} units. 1997 * @param index 1998 * the position in {@code seq} following the code point that 1999 * should be returned. 2000 * @param start 2001 * the index of the first element in {@code seq}. 2002 * @return the Unicode code point or {@code char} value before {@code index} 2003 * in {@code seq}. 2004 * @throws NullPointerException 2005 * if {@code seq} is {@code null}. 2006 * @throws IndexOutOfBoundsException 2007 * if the {@code index <= start}, {@code start < 0}, 2008 * {@code index} is greater than the length of {@code seq}, or 2009 * if {@code start} is equal or greater than the length of 2010 * {@code seq}. 2011 * @since 1.5 2012 */ 2013 public static int codePointBefore(char[] seq, int index, int start) { 2014 if (seq == null) { 2015 throw new NullPointerException("seq == null"); 2016 } 2017 int len = seq.length; 2018 if (index <= start || index > len || start < 0 || start >= len) { 2019 throw new IndexOutOfBoundsException(); 2020 } 2021 2022 char low = seq[--index]; 2023 if (--index < start) { 2024 return low; 2025 } 2026 char high = seq[index]; 2027 if (isSurrogatePair(high, low)) { 2028 return toCodePoint(high, low); 2029 } 2030 return low; 2031 } 2032 2033 /** 2034 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2035 * and copies the value(s) into the char array {@code dst}, starting at 2036 * index {@code dstIndex}. 2037 * 2038 * @param codePoint 2039 * the Unicode code point to encode. 2040 * @param dst 2041 * the destination array to copy the encoded value into. 2042 * @param dstIndex 2043 * the index in {@code dst} from where to start copying. 2044 * @return the number of {@code char} value units copied into {@code dst}. 2045 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2046 * @throws NullPointerException 2047 * if {@code dst} is {@code null}. 2048 * @throws IndexOutOfBoundsException 2049 * if {@code dstIndex} is negative, greater than or equal to 2050 * {@code dst.length} or equals {@code dst.length - 1} when 2051 * {@code codePoint} is a 2052 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 2053 * @since 1.5 2054 */ 2055 public static int toChars(int codePoint, char[] dst, int dstIndex) { 2056 checkValidCodePoint(codePoint); 2057 if (dst == null) { 2058 throw new NullPointerException("dst == null"); 2059 } 2060 if (dstIndex < 0 || dstIndex >= dst.length) { 2061 throw new IndexOutOfBoundsException(); 2062 } 2063 2064 if (isSupplementaryCodePoint(codePoint)) { 2065 if (dstIndex == dst.length - 1) { 2066 throw new IndexOutOfBoundsException(); 2067 } 2068 // See RFC 2781, Section 2.1 2069 // http://www.ietf.org/rfc/rfc2781.txt 2070 int cpPrime = codePoint - 0x10000; 2071 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2072 int low = 0xDC00 | (cpPrime & 0x3FF); 2073 dst[dstIndex] = (char) high; 2074 dst[dstIndex + 1] = (char) low; 2075 return 2; 2076 } 2077 2078 dst[dstIndex] = (char) codePoint; 2079 return 1; 2080 } 2081 2082 /** 2083 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2084 * and returns it as a char array. 2085 * 2086 * @param codePoint 2087 * the Unicode code point to encode. 2088 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 2089 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 2090 * then the returned array contains two characters, otherwise it 2091 * contains just one character. 2092 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2093 * @since 1.5 2094 */ 2095 public static char[] toChars(int codePoint) { 2096 checkValidCodePoint(codePoint); 2097 if (isSupplementaryCodePoint(codePoint)) { 2098 int cpPrime = codePoint - 0x10000; 2099 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2100 int low = 0xDC00 | (cpPrime & 0x3FF); 2101 return new char[] { (char) high, (char) low }; 2102 } 2103 return new char[] { (char) codePoint }; 2104 } 2105 2106 /** 2107 * Counts the number of Unicode code points in the subsequence of the 2108 * specified character sequence, as delineated by {@code beginIndex} and 2109 * {@code endIndex}. Any surrogate values with missing pair values will be 2110 * counted as one code point. 2111 * 2112 * @param seq 2113 * the {@code CharSequence} to look through. 2114 * @param beginIndex 2115 * the inclusive index to begin counting at. 2116 * @param endIndex 2117 * the exclusive index to stop counting at. 2118 * @return the number of Unicode code points. 2119 * @throws NullPointerException 2120 * if {@code seq} is {@code null}. 2121 * @throws IndexOutOfBoundsException 2122 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2123 * if {@code endIndex} is greater than the length of {@code seq}. 2124 * @since 1.5 2125 */ 2126 public static int codePointCount(CharSequence seq, int beginIndex, 2127 int endIndex) { 2128 if (seq == null) { 2129 throw new NullPointerException("seq == null"); 2130 } 2131 int len = seq.length(); 2132 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2133 throw new IndexOutOfBoundsException(); 2134 } 2135 2136 int result = 0; 2137 for (int i = beginIndex; i < endIndex; i++) { 2138 char c = seq.charAt(i); 2139 if (isHighSurrogate(c)) { 2140 if (++i < endIndex) { 2141 c = seq.charAt(i); 2142 if (!isLowSurrogate(c)) { 2143 result++; 2144 } 2145 } 2146 } 2147 result++; 2148 } 2149 return result; 2150 } 2151 2152 /** 2153 * Counts the number of Unicode code points in the subsequence of the 2154 * specified char array, as delineated by {@code offset} and {@code count}. 2155 * Any surrogate values with missing pair values will be counted as one code 2156 * point. 2157 * 2158 * @param seq 2159 * the char array to look through 2160 * @param offset 2161 * the inclusive index to begin counting at. 2162 * @param count 2163 * the number of {@code char} values to look through in 2164 * {@code seq}. 2165 * @return the number of Unicode code points. 2166 * @throws NullPointerException 2167 * if {@code seq} is {@code null}. 2168 * @throws IndexOutOfBoundsException 2169 * if {@code offset < 0}, {@code count < 0} or if 2170 * {@code offset + count} is greater than the length of 2171 * {@code seq}. 2172 * @since 1.5 2173 */ 2174 public static int codePointCount(char[] seq, int offset, int count) { 2175 Arrays.checkOffsetAndCount(seq.length, offset, count); 2176 int endIndex = offset + count; 2177 int result = 0; 2178 for (int i = offset; i < endIndex; i++) { 2179 char c = seq[i]; 2180 if (isHighSurrogate(c)) { 2181 if (++i < endIndex) { 2182 c = seq[i]; 2183 if (!isLowSurrogate(c)) { 2184 result++; 2185 } 2186 } 2187 } 2188 result++; 2189 } 2190 return result; 2191 } 2192 2193 /** 2194 * Determines the index in the specified character sequence that is offset 2195 * {@code codePointOffset} code points from {@code index}. 2196 * 2197 * @param seq 2198 * the character sequence to find the index in. 2199 * @param index 2200 * the start index in {@code seq}. 2201 * @param codePointOffset 2202 * the number of code points to look backwards or forwards; may 2203 * be a negative or positive value. 2204 * @return the index in {@code seq} that is {@code codePointOffset} code 2205 * points away from {@code index}. 2206 * @throws NullPointerException 2207 * if {@code seq} is {@code null}. 2208 * @throws IndexOutOfBoundsException 2209 * if {@code index < 0}, {@code index} is greater than the 2210 * length of {@code seq}, or if there are not enough values in 2211 * {@code seq} to skip {@code codePointOffset} code points 2212 * forwards or backwards (if {@code codePointOffset} is 2213 * negative) from {@code index}. 2214 * @since 1.5 2215 */ 2216 public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) { 2217 if (seq == null) { 2218 throw new NullPointerException("seq == null"); 2219 } 2220 int len = seq.length(); 2221 if (index < 0 || index > len) { 2222 throw new IndexOutOfBoundsException(); 2223 } 2224 2225 if (codePointOffset == 0) { 2226 return index; 2227 } 2228 2229 if (codePointOffset > 0) { 2230 int codePoints = codePointOffset; 2231 int i = index; 2232 while (codePoints > 0) { 2233 codePoints--; 2234 if (i >= len) { 2235 throw new IndexOutOfBoundsException(); 2236 } 2237 if (isHighSurrogate(seq.charAt(i))) { 2238 int next = i + 1; 2239 if (next < len && isLowSurrogate(seq.charAt(next))) { 2240 i++; 2241 } 2242 } 2243 i++; 2244 } 2245 return i; 2246 } 2247 2248 int codePoints = -codePointOffset; 2249 int i = index; 2250 while (codePoints > 0) { 2251 codePoints--; 2252 i--; 2253 if (i < 0) { 2254 throw new IndexOutOfBoundsException(); 2255 } 2256 if (isLowSurrogate(seq.charAt(i))) { 2257 int prev = i - 1; 2258 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2259 i--; 2260 } 2261 } 2262 } 2263 return i; 2264 } 2265 2266 /** 2267 * Determines the index in a subsequence of the specified character array 2268 * that is offset {@code codePointOffset} code points from {@code index}. 2269 * The subsequence is delineated by {@code start} and {@code count}. 2270 * 2271 * @param seq 2272 * the character array to find the index in. 2273 * @param start 2274 * the inclusive index that marks the beginning of the 2275 * subsequence. 2276 * @param count 2277 * the number of {@code char} values to include within the 2278 * subsequence. 2279 * @param index 2280 * the start index in the subsequence of the char array. 2281 * @param codePointOffset 2282 * the number of code points to look backwards or forwards; may 2283 * be a negative or positive value. 2284 * @return the index in {@code seq} that is {@code codePointOffset} code 2285 * points away from {@code index}. 2286 * @throws NullPointerException 2287 * if {@code seq} is {@code null}. 2288 * @throws IndexOutOfBoundsException 2289 * if {@code start < 0}, {@code count < 0}, 2290 * {@code index < start}, {@code index > start + count}, 2291 * {@code start + count} is greater than the length of 2292 * {@code seq}, or if there are not enough values in 2293 * {@code seq} to skip {@code codePointOffset} code points 2294 * forward or backward (if {@code codePointOffset} is 2295 * negative) from {@code index}. 2296 * @since 1.5 2297 */ 2298 public static int offsetByCodePoints(char[] seq, int start, int count, 2299 int index, int codePointOffset) { 2300 Arrays.checkOffsetAndCount(seq.length, start, count); 2301 int end = start + count; 2302 if (index < start || index > end) { 2303 throw new IndexOutOfBoundsException(); 2304 } 2305 2306 if (codePointOffset == 0) { 2307 return index; 2308 } 2309 2310 if (codePointOffset > 0) { 2311 int codePoints = codePointOffset; 2312 int i = index; 2313 while (codePoints > 0) { 2314 codePoints--; 2315 if (i >= end) { 2316 throw new IndexOutOfBoundsException(); 2317 } 2318 if (isHighSurrogate(seq[i])) { 2319 int next = i + 1; 2320 if (next < end && isLowSurrogate(seq[next])) { 2321 i++; 2322 } 2323 } 2324 i++; 2325 } 2326 return i; 2327 } 2328 2329 int codePoints = -codePointOffset; 2330 int i = index; 2331 while (codePoints > 0) { 2332 codePoints--; 2333 i--; 2334 if (i < start) { 2335 throw new IndexOutOfBoundsException(); 2336 } 2337 if (isLowSurrogate(seq[i])) { 2338 int prev = i - 1; 2339 if (prev >= start && isHighSurrogate(seq[prev])) { 2340 i--; 2341 } 2342 } 2343 } 2344 return i; 2345 } 2346 2347 /** 2348 * Convenience method to determine the value of the specified character 2349 * {@code c} in the supplied radix. The value of {@code radix} must be 2350 * between MIN_RADIX and MAX_RADIX. 2351 * 2352 * @param c 2353 * the character to determine the value of. 2354 * @param radix 2355 * the radix. 2356 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2357 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2358 */ 2359 public static int digit(char c, int radix) { 2360 return digit((int) c, radix); 2361 } 2362 2363 /** 2364 * Convenience method to determine the value of the character 2365 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2366 * be between MIN_RADIX and MAX_RADIX. 2367 * 2368 * @param codePoint 2369 * the character, including supplementary characters. 2370 * @param radix 2371 * the radix. 2372 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2373 * {@link #MAX_RADIX} then the value of the character in the radix; 2374 * -1 otherwise. 2375 */ 2376 public static int digit(int codePoint, int radix) { 2377 if (radix < MIN_RADIX || radix > MAX_RADIX) { 2378 return -1; 2379 } 2380 if (codePoint < 128) { 2381 // Optimized for ASCII 2382 int result = -1; 2383 if ('0' <= codePoint && codePoint <= '9') { 2384 result = codePoint - '0'; 2385 } else if ('a' <= codePoint && codePoint <= 'z') { 2386 result = 10 + (codePoint - 'a'); 2387 } else if ('A' <= codePoint && codePoint <= 'Z') { 2388 result = 10 + (codePoint - 'A'); 2389 } 2390 return result < radix ? result : -1; 2391 } 2392 return digitImpl(codePoint, radix); 2393 } 2394 2395 private static native int digitImpl(int codePoint, int radix); 2396 2397 /** 2398 * Compares this object with the specified object and indicates if they are 2399 * equal. In order to be equal, {@code object} must be an instance of 2400 * {@code Character} and have the same char value as this object. 2401 * 2402 * @param object 2403 * the object to compare this double with. 2404 * @return {@code true} if the specified object is equal to this 2405 * {@code Character}; {@code false} otherwise. 2406 */ 2407 @Override 2408 public boolean equals(Object object) { 2409 return (object instanceof Character) && (((Character) object).value == value); 2410 } 2411 2412 /** 2413 * Returns the character which represents the specified digit in the 2414 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2415 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2416 * smaller than {@code radix}. If any of these conditions does not hold, 0 2417 * is returned. 2418 * 2419 * @param digit 2420 * the integer value. 2421 * @param radix 2422 * the radix. 2423 * @return the character which represents the {@code digit} in the 2424 * {@code radix}. 2425 */ 2426 public static char forDigit(int digit, int radix) { 2427 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2428 if (digit >= 0 && digit < radix) { 2429 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2430 } 2431 } 2432 return 0; 2433 } 2434 2435 /** 2436 * Returns the name of the given code point, or null if the code point is unassigned. 2437 * 2438 * <p>As a fallback mechanism this method returns strings consisting of the Unicode 2439 * block name (with underscores replaced by spaces), a single space, and the uppercase 2440 * hex value of the code point, using as few digits as necessary. 2441 * 2442 * <p>Examples: 2443 * <ul> 2444 * <li>{@code Character.getName(0)} returns "NULL". 2445 * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E". 2446 * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX". 2447 * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000". 2448 * </ul> 2449 * 2450 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2451 * @since 1.7 2452 * @hide 1.7 2453 */ 2454 public static String getName(int codePoint) { 2455 checkValidCodePoint(codePoint); 2456 if (getType(codePoint) == Character.UNASSIGNED) { 2457 return null; 2458 } 2459 String result = getNameImpl(codePoint); 2460 if (result == null) { 2461 String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' '); 2462 result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0); 2463 } 2464 return result; 2465 } 2466 2467 private static native String getNameImpl(int codePoint); 2468 2469 /** 2470 * Returns the numeric value of the specified Unicode character. 2471 * See {@link #getNumericValue(int)}. 2472 * 2473 * @param c the character 2474 * @return a non-negative numeric integer value if a numeric value for 2475 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2476 * -2 if the numeric value can not be represented as an integer. 2477 */ 2478 public static int getNumericValue(char c) { 2479 return getNumericValue((int) c); 2480 } 2481 2482 /** 2483 * Gets the numeric value of the specified Unicode code point. For example, 2484 * the code point '\u216B' stands for the Roman number XII, which has the 2485 * numeric value 12. 2486 * 2487 * <p>There are two points of divergence between this method and the Unicode 2488 * specification. This method treats the letters a-z (in both upper and lower 2489 * cases, and their full-width variants) as numbers from 10 to 35. The 2490 * Unicode specification also supports the idea of code points with non-integer 2491 * numeric values; this method does not (except to the extent of returning -2 2492 * for such code points). 2493 * 2494 * @param codePoint the code point 2495 * @return a non-negative numeric integer value if a numeric value for 2496 * {@code codePoint} exists, -1 if there is no numeric value for 2497 * {@code codePoint}, -2 if the numeric value can not be 2498 * represented with an integer. 2499 */ 2500 public static int getNumericValue(int codePoint) { 2501 // This is both an optimization and papers over differences between Java and ICU. 2502 if (codePoint < 128) { 2503 if (codePoint >= '0' && codePoint <= '9') { 2504 return codePoint - '0'; 2505 } 2506 if (codePoint >= 'a' && codePoint <= 'z') { 2507 return codePoint - ('a' - 10); 2508 } 2509 if (codePoint >= 'A' && codePoint <= 'Z') { 2510 return codePoint - ('A' - 10); 2511 } 2512 return -1; 2513 } 2514 // Full-width uppercase A-Z. 2515 if (codePoint >= 0xff21 && codePoint <= 0xff3a) { 2516 return codePoint - 0xff17; 2517 } 2518 // Full-width lowercase a-z. 2519 if (codePoint >= 0xff41 && codePoint <= 0xff5a) { 2520 return codePoint - 0xff37; 2521 } 2522 return getNumericValueImpl(codePoint); 2523 } 2524 2525 private static native int getNumericValueImpl(int codePoint); 2526 2527 /** 2528 * Gets the general Unicode category of the specified character. 2529 * 2530 * @param c 2531 * the character to get the category of. 2532 * @return the Unicode category of {@code c}. 2533 */ 2534 public static int getType(char c) { 2535 return getType((int) c); 2536 } 2537 2538 /** 2539 * Gets the general Unicode category of the specified code point. 2540 * 2541 * @param codePoint 2542 * the Unicode code point to get the category of. 2543 * @return the Unicode category of {@code codePoint}. 2544 */ 2545 public static int getType(int codePoint) { 2546 int type = getTypeImpl(codePoint); 2547 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 2548 if (type <= Character.FORMAT) { 2549 return type; 2550 } 2551 return (type + 1); 2552 } 2553 2554 private static native int getTypeImpl(int codePoint); 2555 2556 /** 2557 * Gets the Unicode directionality of the specified character. 2558 * 2559 * @param c 2560 * the character to get the directionality of. 2561 * @return the Unicode directionality of {@code c}. 2562 */ 2563 public static byte getDirectionality(char c) { 2564 return getDirectionality((int)c); 2565 } 2566 2567 /** 2568 * Gets the Unicode directionality of the specified character. 2569 * 2570 * @param codePoint 2571 * the Unicode code point to get the directionality of. 2572 * @return the Unicode directionality of {@code codePoint}. 2573 */ 2574 public static byte getDirectionality(int codePoint) { 2575 if (getType(codePoint) == Character.UNASSIGNED) { 2576 return Character.DIRECTIONALITY_UNDEFINED; 2577 } 2578 2579 byte directionality = getDirectionalityImpl(codePoint); 2580 if (directionality == -1) { 2581 return -1; 2582 } 2583 return DIRECTIONALITY[directionality]; 2584 } 2585 2586 private static native byte getDirectionalityImpl(int codePoint); 2587 2588 /** 2589 * Indicates whether the specified character is mirrored. 2590 * 2591 * @param c 2592 * the character to check. 2593 * @return {@code true} if {@code c} is mirrored; {@code false} 2594 * otherwise. 2595 */ 2596 public static boolean isMirrored(char c) { 2597 return isMirrored((int) c); 2598 } 2599 2600 /** 2601 * Indicates whether the specified code point is mirrored. 2602 * 2603 * @param codePoint 2604 * the code point to check. 2605 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2606 * otherwise. 2607 */ 2608 public static boolean isMirrored(int codePoint) { 2609 return isMirroredImpl(codePoint); 2610 } 2611 2612 private static native boolean isMirroredImpl(int codePoint); 2613 2614 @Override 2615 public int hashCode() { 2616 return value; 2617 } 2618 2619 /** 2620 * Returns the high surrogate for the given code point. The result is meaningless if 2621 * the given code point is not a supplementary character. 2622 * @since 1.7 2623 * @hide 1.7 2624 */ 2625 public static char highSurrogate(int codePoint) { 2626 return (char) ((codePoint >> 10) + 0xd7c0); 2627 } 2628 2629 /** 2630 * Returns the low surrogate for the given code point. The result is meaningless if 2631 * the given code point is not a supplementary character. 2632 * @since 1.7 2633 * @hide 1.7 2634 */ 2635 public static char lowSurrogate(int codePoint) { 2636 return (char) ((codePoint & 0x3ff) | 0xdc00); 2637 } 2638 2639 /** 2640 * Tests whether the given code point is in the Basic Multilingual Plane (BMP). 2641 * Such code points can be represented by a single {@code char}. 2642 * @since 1.7 2643 * @hide 1.7 2644 */ 2645 public static boolean isBmpCodePoint(int codePoint) { 2646 return codePoint >= 0 && codePoint <= 0xffff; 2647 } 2648 2649 /** 2650 * Indicates whether the specified character is defined in the Unicode 2651 * specification. 2652 * 2653 * @param c 2654 * the character to check. 2655 * @return {@code true} if the general Unicode category of the character is 2656 * not {@code UNASSIGNED}; {@code false} otherwise. 2657 */ 2658 public static boolean isDefined(char c) { 2659 return isDefinedImpl(c); 2660 } 2661 2662 /** 2663 * Indicates whether the specified code point is defined in the Unicode 2664 * specification. 2665 * 2666 * @param codePoint 2667 * the code point to check. 2668 * @return {@code true} if the general Unicode category of the code point is 2669 * not {@code UNASSIGNED}; {@code false} otherwise. 2670 */ 2671 public static boolean isDefined(int codePoint) { 2672 return isDefinedImpl(codePoint); 2673 } 2674 2675 private static native boolean isDefinedImpl(int codePoint); 2676 2677 /** 2678 * Indicates whether the specified character is a digit. 2679 * 2680 * @param c 2681 * the character to check. 2682 * @return {@code true} if {@code c} is a digit; {@code false} 2683 * otherwise. 2684 */ 2685 public static boolean isDigit(char c) { 2686 return isDigit((int) c); 2687 } 2688 2689 /** 2690 * Indicates whether the specified code point is a digit. 2691 * 2692 * @param codePoint 2693 * the code point to check. 2694 * @return {@code true} if {@code codePoint} is a digit; {@code false} 2695 * otherwise. 2696 */ 2697 public static boolean isDigit(int codePoint) { 2698 // Optimized case for ASCII 2699 if ('0' <= codePoint && codePoint <= '9') { 2700 return true; 2701 } 2702 if (codePoint < 1632) { 2703 return false; 2704 } 2705 return isDigitImpl(codePoint); 2706 } 2707 2708 private static native boolean isDigitImpl(int codePoint); 2709 2710 /** 2711 * Indicates whether the specified character is ignorable in a Java or 2712 * Unicode identifier. 2713 * 2714 * @param c 2715 * the character to check. 2716 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 2717 */ 2718 public static boolean isIdentifierIgnorable(char c) { 2719 return isIdentifierIgnorable((int) c); 2720 } 2721 2722 /** 2723 * Indicates whether the specified code point is ignorable in a Java or 2724 * Unicode identifier. 2725 * 2726 * @param codePoint 2727 * the code point to check. 2728 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 2729 * otherwise. 2730 */ 2731 public static boolean isIdentifierIgnorable(int codePoint) { 2732 // This is both an optimization and papers over differences between Java and ICU. 2733 if (codePoint < 0x600) { 2734 return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || 2735 (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad); 2736 } 2737 return isIdentifierIgnorableImpl(codePoint); 2738 } 2739 2740 private static native boolean isIdentifierIgnorableImpl(int codePoint); 2741 2742 /** 2743 * Indicates whether the specified character is an ISO control character. 2744 * 2745 * @param c 2746 * the character to check. 2747 * @return {@code true} if {@code c} is an ISO control character; 2748 * {@code false} otherwise. 2749 */ 2750 public static boolean isISOControl(char c) { 2751 return isISOControl((int) c); 2752 } 2753 2754 /** 2755 * Indicates whether the specified code point is an ISO control character. 2756 * 2757 * @param c 2758 * the code point to check. 2759 * @return {@code true} if {@code c} is an ISO control character; 2760 * {@code false} otherwise. 2761 */ 2762 public static boolean isISOControl(int c) { 2763 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2764 } 2765 2766 /** 2767 * Indicates whether the specified character is a valid part of a Java 2768 * identifier other than the first character. 2769 * 2770 * @param c 2771 * the character to check. 2772 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2773 * {@code false} otherwise. 2774 */ 2775 public static boolean isJavaIdentifierPart(char c) { 2776 return isJavaIdentifierPart((int) c); 2777 } 2778 2779 /** 2780 * Indicates whether the specified code point is a valid part of a Java 2781 * identifier other than the first character. 2782 * 2783 * @param codePoint 2784 * the code point to check. 2785 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2786 * {@code false} otherwise. 2787 */ 2788 public static boolean isJavaIdentifierPart(int codePoint) { 2789 // Use precomputed bitmasks to optimize the ASCII range. 2790 if (codePoint < 64) { 2791 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 2792 } else if (codePoint < 128) { 2793 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2794 } 2795 int type = getType(codePoint); 2796 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2797 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2798 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2799 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2800 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 2801 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT; 2802 } 2803 2804 /** 2805 * Indicates whether the specified character is a valid first character for 2806 * a Java identifier. 2807 * 2808 * @param c 2809 * the character to check. 2810 * @return {@code true} if {@code c} is a valid first character of a Java 2811 * identifier; {@code false} otherwise. 2812 */ 2813 public static boolean isJavaIdentifierStart(char c) { 2814 return isJavaIdentifierStart((int) c); 2815 } 2816 2817 /** 2818 * Indicates whether the specified code point is a valid first character for 2819 * a Java identifier. 2820 * 2821 * @param codePoint 2822 * the code point to check. 2823 * @return {@code true} if {@code codePoint} is a valid start of a Java 2824 * identifier; {@code false} otherwise. 2825 */ 2826 public static boolean isJavaIdentifierStart(int codePoint) { 2827 // Use precomputed bitmasks to optimize the ASCII range. 2828 if (codePoint < 64) { 2829 return (codePoint == '$'); // There's only one character in this range. 2830 } else if (codePoint < 128) { 2831 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2832 } 2833 int type = getType(codePoint); 2834 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL 2835 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2836 } 2837 2838 /** 2839 * Indicates whether the specified character is a Java letter. 2840 * 2841 * @param c 2842 * the character to check. 2843 * @return {@code true} if {@code c} is a Java letter; {@code false} 2844 * otherwise. 2845 * @deprecated Use {@link #isJavaIdentifierStart(char)} 2846 */ 2847 @Deprecated 2848 public static boolean isJavaLetter(char c) { 2849 return isJavaIdentifierStart(c); 2850 } 2851 2852 /** 2853 * Indicates whether the specified character is a Java letter or digit 2854 * character. 2855 * 2856 * @param c 2857 * the character to check. 2858 * @return {@code true} if {@code c} is a Java letter or digit; 2859 * {@code false} otherwise. 2860 * @deprecated Use {@link #isJavaIdentifierPart(char)} 2861 */ 2862 @Deprecated 2863 public static boolean isJavaLetterOrDigit(char c) { 2864 return isJavaIdentifierPart(c); 2865 } 2866 2867 /** 2868 * Indicates whether the specified character is a letter. 2869 * 2870 * @param c 2871 * the character to check. 2872 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 2873 */ 2874 public static boolean isLetter(char c) { 2875 return isLetter((int) c); 2876 } 2877 2878 /** 2879 * Indicates whether the specified code point is a letter. 2880 * 2881 * @param codePoint 2882 * the code point to check. 2883 * @return {@code true} if {@code codePoint} is a letter; {@code false} 2884 * otherwise. 2885 */ 2886 public static boolean isLetter(int codePoint) { 2887 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2888 return true; 2889 } 2890 if (codePoint < 128) { 2891 return false; 2892 } 2893 return isLetterImpl(codePoint); 2894 } 2895 2896 private static native boolean isLetterImpl(int codePoint); 2897 2898 /** 2899 * Indicates whether the specified character is a letter or a digit. 2900 * 2901 * @param c 2902 * the character to check. 2903 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 2904 * otherwise. 2905 */ 2906 public static boolean isLetterOrDigit(char c) { 2907 return isLetterOrDigit((int) c); 2908 } 2909 2910 /** 2911 * Indicates whether the specified code point is a letter or a digit. 2912 * 2913 * @param codePoint 2914 * the code point to check. 2915 * @return {@code true} if {@code codePoint} is a letter or a digit; 2916 * {@code false} otherwise. 2917 */ 2918 public static boolean isLetterOrDigit(int codePoint) { 2919 // Optimized case for ASCII 2920 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2921 return true; 2922 } 2923 if ('0' <= codePoint && codePoint <= '9') { 2924 return true; 2925 } 2926 if (codePoint < 128) { 2927 return false; 2928 } 2929 return isLetterOrDigitImpl(codePoint); 2930 } 2931 2932 private static native boolean isLetterOrDigitImpl(int codePoint); 2933 2934 /** 2935 * Indicates whether the specified character is a lower case letter. 2936 * 2937 * @param c 2938 * the character to check. 2939 * @return {@code true} if {@code c} is a lower case letter; {@code false} 2940 * otherwise. 2941 */ 2942 public static boolean isLowerCase(char c) { 2943 return isLowerCase((int) c); 2944 } 2945 2946 /** 2947 * Indicates whether the specified code point is a lower case letter. 2948 * 2949 * @param codePoint 2950 * the code point to check. 2951 * @return {@code true} if {@code codePoint} is a lower case letter; 2952 * {@code false} otherwise. 2953 */ 2954 public static boolean isLowerCase(int codePoint) { 2955 // Optimized case for ASCII 2956 if ('a' <= codePoint && codePoint <= 'z') { 2957 return true; 2958 } 2959 if (codePoint < 128) { 2960 return false; 2961 } 2962 return isLowerCaseImpl(codePoint); 2963 } 2964 2965 private static native boolean isLowerCaseImpl(int codePoint); 2966 2967 /** 2968 * Indicates whether the specified character is a Java space. 2969 * 2970 * @param c 2971 * the character to check. 2972 * @return {@code true} if {@code c} is a Java space; {@code false} 2973 * otherwise. 2974 * @deprecated Use {@link #isWhitespace(char)} 2975 */ 2976 @Deprecated 2977 public static boolean isSpace(char c) { 2978 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2979 } 2980 2981 /** 2982 * Indicates whether the specified character is a Unicode space character. 2983 * That is, if it is a member of one of the Unicode categories Space 2984 * Separator, Line Separator, or Paragraph Separator. 2985 * 2986 * @param c 2987 * the character to check. 2988 * @return {@code true} if {@code c} is a Unicode space character, 2989 * {@code false} otherwise. 2990 */ 2991 public static boolean isSpaceChar(char c) { 2992 return isSpaceChar((int) c); 2993 } 2994 2995 /** 2996 * Indicates whether the specified code point is a Unicode space character. 2997 * That is, if it is a member of one of the Unicode categories Space 2998 * Separator, Line Separator, or Paragraph Separator. 2999 * 3000 * @param codePoint 3001 * the code point to check. 3002 * @return {@code true} if {@code codePoint} is a Unicode space character, 3003 * {@code false} otherwise. 3004 */ 3005 public static boolean isSpaceChar(int codePoint) { 3006 if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) { 3007 return true; 3008 } 3009 if (codePoint < 0x2000) { 3010 return false; 3011 } 3012 if (codePoint <= 0xffff) { 3013 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 3014 codePoint == 0x202f || codePoint == 0x3000; 3015 } 3016 return isSpaceCharImpl(codePoint); 3017 } 3018 3019 private static native boolean isSpaceCharImpl(int codePoint); 3020 3021 /** 3022 * Indicates whether the specified character is a titlecase character. 3023 * 3024 * @param c 3025 * the character to check. 3026 * @return {@code true} if {@code c} is a titlecase character, {@code false} 3027 * otherwise. 3028 */ 3029 public static boolean isTitleCase(char c) { 3030 return isTitleCaseImpl(c); 3031 } 3032 3033 /** 3034 * Indicates whether the specified code point is a titlecase character. 3035 * 3036 * @param codePoint 3037 * the code point to check. 3038 * @return {@code true} if {@code codePoint} is a titlecase character, 3039 * {@code false} otherwise. 3040 */ 3041 public static boolean isTitleCase(int codePoint) { 3042 return isTitleCaseImpl(codePoint); 3043 } 3044 3045 private static native boolean isTitleCaseImpl(int codePoint); 3046 3047 /** 3048 * Indicates whether the specified character is valid as part of a Unicode 3049 * identifier other than the first character. 3050 * 3051 * @param c 3052 * the character to check. 3053 * @return {@code true} if {@code c} is valid as part of a Unicode 3054 * identifier; {@code false} otherwise. 3055 */ 3056 public static boolean isUnicodeIdentifierPart(char c) { 3057 return isUnicodeIdentifierPartImpl(c); 3058 } 3059 3060 /** 3061 * Indicates whether the specified code point is valid as part of a Unicode 3062 * identifier other than the first character. 3063 * 3064 * @param codePoint 3065 * the code point to check. 3066 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 3067 * identifier; {@code false} otherwise. 3068 */ 3069 public static boolean isUnicodeIdentifierPart(int codePoint) { 3070 return isUnicodeIdentifierPartImpl(codePoint); 3071 } 3072 3073 private static native boolean isUnicodeIdentifierPartImpl(int codePoint); 3074 3075 /** 3076 * Indicates whether the specified character is a valid initial character 3077 * for a Unicode identifier. 3078 * 3079 * @param c 3080 * the character to check. 3081 * @return {@code true} if {@code c} is a valid first character for a 3082 * Unicode identifier; {@code false} otherwise. 3083 */ 3084 public static boolean isUnicodeIdentifierStart(char c) { 3085 return isUnicodeIdentifierStartImpl(c); 3086 } 3087 3088 /** 3089 * Indicates whether the specified code point is a valid initial character 3090 * for a Unicode identifier. 3091 * 3092 * @param codePoint 3093 * the code point to check. 3094 * @return {@code true} if {@code codePoint} is a valid first character for 3095 * a Unicode identifier; {@code false} otherwise. 3096 */ 3097 public static boolean isUnicodeIdentifierStart(int codePoint) { 3098 return isUnicodeIdentifierStartImpl(codePoint); 3099 } 3100 3101 private static native boolean isUnicodeIdentifierStartImpl(int codePoint); 3102 3103 /** 3104 * Indicates whether the specified character is an upper case letter. 3105 * 3106 * @param c 3107 * the character to check. 3108 * @return {@code true} if {@code c} is a upper case letter; {@code false} 3109 * otherwise. 3110 */ 3111 public static boolean isUpperCase(char c) { 3112 return isUpperCase((int) c); 3113 } 3114 3115 /** 3116 * Indicates whether the specified code point is an upper case letter. 3117 * 3118 * @param codePoint 3119 * the code point to check. 3120 * @return {@code true} if {@code codePoint} is a upper case letter; 3121 * {@code false} otherwise. 3122 */ 3123 public static boolean isUpperCase(int codePoint) { 3124 // Optimized case for ASCII 3125 if ('A' <= codePoint && codePoint <= 'Z') { 3126 return true; 3127 } 3128 if (codePoint < 128) { 3129 return false; 3130 } 3131 return isUpperCaseImpl(codePoint); 3132 } 3133 3134 private static native boolean isUpperCaseImpl(int codePoint); 3135 3136 /** 3137 * Indicates whether the specified character is a whitespace character in 3138 * Java. 3139 * 3140 * @param c 3141 * the character to check. 3142 * @return {@code true} if the supplied {@code c} is a whitespace character 3143 * in Java; {@code false} otherwise. 3144 */ 3145 public static boolean isWhitespace(char c) { 3146 return isWhitespace((int) c); 3147 } 3148 3149 /** 3150 * Indicates whether the specified code point is a whitespace character in 3151 * Java. 3152 * 3153 * @param codePoint 3154 * the code point to check. 3155 * @return {@code true} if the supplied {@code c} is a whitespace character 3156 * in Java; {@code false} otherwise. 3157 */ 3158 public static boolean isWhitespace(int codePoint) { 3159 // This is both an optimization and papers over differences between Java and ICU. 3160 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) { 3161 return true; 3162 } 3163 if (codePoint == 0x1680) { 3164 return true; 3165 } 3166 if (codePoint < 0x2000 || codePoint == 0x2007) { 3167 return false; 3168 } 3169 if (codePoint <= 0xffff) { 3170 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 3171 codePoint == 0x3000; 3172 } 3173 return isWhitespaceImpl(codePoint); 3174 } 3175 3176 private static native boolean isWhitespaceImpl(int codePoint); 3177 3178 /** 3179 * Reverses the order of the first and second byte in the specified 3180 * character. 3181 * 3182 * @param c 3183 * the character to reverse. 3184 * @return the character with reordered bytes. 3185 */ 3186 public static char reverseBytes(char c) { 3187 return (char)((c<<8) | (c>>8)); 3188 } 3189 3190 /** 3191 * Returns the lower case equivalent for the specified character if the 3192 * character is an upper case letter. Otherwise, the specified character is 3193 * returned unchanged. 3194 * 3195 * @param c 3196 * the character 3197 * @return if {@code c} is an upper case character then its lower case 3198 * counterpart, otherwise just {@code c}. 3199 */ 3200 public static char toLowerCase(char c) { 3201 return (char) toLowerCase((int) c); 3202 } 3203 3204 /** 3205 * Returns the lower case equivalent for the specified code point if it is 3206 * an upper case letter. Otherwise, the specified code point is returned 3207 * unchanged. 3208 * 3209 * @param codePoint 3210 * the code point to check. 3211 * @return if {@code codePoint} is an upper case character then its lower 3212 * case counterpart, otherwise just {@code codePoint}. 3213 */ 3214 public static int toLowerCase(int codePoint) { 3215 // Optimized case for ASCII 3216 if ('A' <= codePoint && codePoint <= 'Z') { 3217 return (char) (codePoint + ('a' - 'A')); 3218 } 3219 if (codePoint < 192) { 3220 return codePoint; 3221 } 3222 return toLowerCaseImpl(codePoint); 3223 } 3224 3225 private static native int toLowerCaseImpl(int codePoint); 3226 3227 @Override 3228 public String toString() { 3229 return String.valueOf(value); 3230 } 3231 3232 /** 3233 * Converts the specified character to its string representation. 3234 * 3235 * @param value 3236 * the character to convert. 3237 * @return the character converted to a string. 3238 */ 3239 public static String toString(char value) { 3240 return String.valueOf(value); 3241 } 3242 3243 /** 3244 * Returns the title case equivalent for the specified character if it 3245 * exists. Otherwise, the specified character is returned unchanged. 3246 * 3247 * @param c 3248 * the character to convert. 3249 * @return the title case equivalent of {@code c} if it exists, otherwise 3250 * {@code c}. 3251 */ 3252 public static char toTitleCase(char c) { 3253 return (char) toTitleCaseImpl(c); 3254 } 3255 3256 /** 3257 * Returns the title case equivalent for the specified code point if it 3258 * exists. Otherwise, the specified code point is returned unchanged. 3259 * 3260 * @param codePoint 3261 * the code point to convert. 3262 * @return the title case equivalent of {@code codePoint} if it exists, 3263 * otherwise {@code codePoint}. 3264 */ 3265 public static int toTitleCase(int codePoint) { 3266 return toTitleCaseImpl(codePoint); 3267 } 3268 3269 private static native int toTitleCaseImpl(int codePoint); 3270 3271 /** 3272 * Returns the upper case equivalent for the specified character if the 3273 * character is a lower case letter. Otherwise, the specified character is 3274 * returned unchanged. 3275 * 3276 * @param c 3277 * the character to convert. 3278 * @return if {@code c} is a lower case character then its upper case 3279 * counterpart, otherwise just {@code c}. 3280 */ 3281 public static char toUpperCase(char c) { 3282 return (char) toUpperCase((int) c); 3283 } 3284 3285 /** 3286 * Returns the upper case equivalent for the specified code point if the 3287 * code point is a lower case letter. Otherwise, the specified code point is 3288 * returned unchanged. 3289 * 3290 * @param codePoint 3291 * the code point to convert. 3292 * @return if {@code codePoint} is a lower case character then its upper 3293 * case counterpart, otherwise just {@code codePoint}. 3294 */ 3295 public static int toUpperCase(int codePoint) { 3296 // Optimized case for ASCII 3297 if ('a' <= codePoint && codePoint <= 'z') { 3298 return (char) (codePoint - ('a' - 'A')); 3299 } 3300 if (codePoint < 181) { 3301 return codePoint; 3302 } 3303 return toUpperCaseImpl(codePoint); 3304 } 3305 3306 private static native int toUpperCaseImpl(int codePoint); 3307 } 3308