1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.lang; 19 20 import java.io.Serializable; 21 22 /** 23 * The wrapper for the primitive type {@code char}. This class also provides a 24 * number of utility methods for working with characters. 25 * 26 * <p>Character data is kept up to date as Unicode evolves. 27 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of 28 * the {@code Locale} documentation for details of the Unicode versions implemented by current 29 * and historical Android releases. 30 * 31 * <p>The Unicode specification, character tables, and other information are available at 32 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>. 33 * 34 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid 35 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 36 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 37 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 38 * encoding and {@code char} pairs are used to represent code points in the 39 * supplementary range. A pair of {@code char} values that represent a 40 * supplementary character are made up of a <i>high surrogate</i> with a value 41 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 42 * 0xDC00 to 0xDFFF. 43 * <p> 44 * On the Java platform a {@code char} value represents either a single BMP code 45 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 46 * is used to represent all Unicode code points. 47 * 48 * <a name="unicode_categories"><h3>Unicode categories</h3></a> 49 * <p>Here's a list of the Unicode character categories and the corresponding Java constant, 50 * grouped semantically to provide a convenient overview. This table is also useful in 51 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}. 52 * <span class="datatable"> 53 * <style type="text/css"> 54 * .datatable td { padding-right: 20px; } 55 * </style> 56 * <p><table> 57 * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr> 58 * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr> 59 * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr> 60 * <tr> <td> Cf </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr> 61 * <tr> <td> Cf </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr> 62 * <tr> <td><br></td> </tr> 63 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr> 64 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr> 65 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr> 66 * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr> 67 * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr> 68 * <tr> <td><br></td> </tr> 69 * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr> 70 * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr> 71 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr> 72 * <tr> <td><br></td> </tr> 73 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr> 74 * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr> 75 * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr> 76 * <tr> <td><br></td> </tr> 77 * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr> 78 * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr> 79 * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr> 80 * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr> 81 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr> 82 * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr> 83 * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr> 84 * <tr> <td><br></td> </tr> 85 * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr> 86 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr> 87 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr> 88 * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr> 89 * <tr> <td><br></td> </tr> 90 * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr> 91 * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr> 92 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr> 93 * </table> 94 * </span> 95 * 96 * @since 1.0 97 */ 98 public final class Character implements Serializable, Comparable<Character> { 99 private static final long serialVersionUID = 3786198910865385080L; 100 101 private final char value; 102 103 /** 104 * The minimum {@code Character} value. 105 */ 106 public static final char MIN_VALUE = '\u0000'; 107 108 /** 109 * The maximum {@code Character} value. 110 */ 111 public static final char MAX_VALUE = '\uffff'; 112 113 /** 114 * The minimum radix used for conversions between characters and integers. 115 */ 116 public static final int MIN_RADIX = 2; 117 118 /** 119 * The maximum radix used for conversions between characters and integers. 120 */ 121 public static final int MAX_RADIX = 36; 122 123 /** 124 * The {@link Class} object that represents the primitive type {@code char}. 125 */ 126 @SuppressWarnings("unchecked") 127 public static final Class<Character> TYPE 128 = (Class<Character>) char[].class.getComponentType(); 129 130 // Note: This can't be set to "char.class", since *that* is 131 // defined to be "java.lang.Character.TYPE"; 132 133 /** 134 * Unicode category constant Cn. 135 */ 136 public static final byte UNASSIGNED = 0; 137 138 /** 139 * Unicode category constant Lu. 140 */ 141 public static final byte UPPERCASE_LETTER = 1; 142 143 /** 144 * Unicode category constant Ll. 145 */ 146 public static final byte LOWERCASE_LETTER = 2; 147 148 /** 149 * Unicode category constant Lt. 150 */ 151 public static final byte TITLECASE_LETTER = 3; 152 153 /** 154 * Unicode category constant Lm. 155 */ 156 public static final byte MODIFIER_LETTER = 4; 157 158 /** 159 * Unicode category constant Lo. 160 */ 161 public static final byte OTHER_LETTER = 5; 162 163 /** 164 * Unicode category constant Mn. 165 */ 166 public static final byte NON_SPACING_MARK = 6; 167 168 /** 169 * Unicode category constant Me. 170 */ 171 public static final byte ENCLOSING_MARK = 7; 172 173 /** 174 * Unicode category constant Mc. 175 */ 176 public static final byte COMBINING_SPACING_MARK = 8; 177 178 /** 179 * Unicode category constant Nd. 180 */ 181 public static final byte DECIMAL_DIGIT_NUMBER = 9; 182 183 /** 184 * Unicode category constant Nl. 185 */ 186 public static final byte LETTER_NUMBER = 10; 187 188 /** 189 * Unicode category constant No. 190 */ 191 public static final byte OTHER_NUMBER = 11; 192 193 /** 194 * Unicode category constant Zs. 195 */ 196 public static final byte SPACE_SEPARATOR = 12; 197 198 /** 199 * Unicode category constant Zl. 200 */ 201 public static final byte LINE_SEPARATOR = 13; 202 203 /** 204 * Unicode category constant Zp. 205 */ 206 public static final byte PARAGRAPH_SEPARATOR = 14; 207 208 /** 209 * Unicode category constant Cc. 210 */ 211 public static final byte CONTROL = 15; 212 213 /** 214 * Unicode category constant Cf. 215 */ 216 public static final byte FORMAT = 16; 217 218 /** 219 * Unicode category constant Co. 220 */ 221 public static final byte PRIVATE_USE = 18; 222 223 /** 224 * Unicode category constant Cs. 225 */ 226 public static final byte SURROGATE = 19; 227 228 /** 229 * Unicode category constant Pd. 230 */ 231 public static final byte DASH_PUNCTUATION = 20; 232 233 /** 234 * Unicode category constant Ps. 235 */ 236 public static final byte START_PUNCTUATION = 21; 237 238 /** 239 * Unicode category constant Pe. 240 */ 241 public static final byte END_PUNCTUATION = 22; 242 243 /** 244 * Unicode category constant Pc. 245 */ 246 public static final byte CONNECTOR_PUNCTUATION = 23; 247 248 /** 249 * Unicode category constant Po. 250 */ 251 public static final byte OTHER_PUNCTUATION = 24; 252 253 /** 254 * Unicode category constant Sm. 255 */ 256 public static final byte MATH_SYMBOL = 25; 257 258 /** 259 * Unicode category constant Sc. 260 */ 261 public static final byte CURRENCY_SYMBOL = 26; 262 263 /** 264 * Unicode category constant Sk. 265 */ 266 public static final byte MODIFIER_SYMBOL = 27; 267 268 /** 269 * Unicode category constant So. 270 */ 271 public static final byte OTHER_SYMBOL = 28; 272 273 /** 274 * Unicode category constant Pi. 275 * 276 * @since 1.4 277 */ 278 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 279 280 /** 281 * Unicode category constant Pf. 282 * 283 * @since 1.4 284 */ 285 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 286 287 /** 288 * Unicode bidirectional constant. 289 * 290 * @since 1.4 291 */ 292 public static final byte DIRECTIONALITY_UNDEFINED = -1; 293 294 /** 295 * Unicode bidirectional constant L. 296 * 297 * @since 1.4 298 */ 299 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 300 301 /** 302 * Unicode bidirectional constant R. 303 * 304 * @since 1.4 305 */ 306 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 307 308 /** 309 * Unicode bidirectional constant AL. 310 * 311 * @since 1.4 312 */ 313 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 314 315 /** 316 * Unicode bidirectional constant EN. 317 * 318 * @since 1.4 319 */ 320 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 321 322 /** 323 * Unicode bidirectional constant ES. 324 * 325 * @since 1.4 326 */ 327 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 328 329 /** 330 * Unicode bidirectional constant ET. 331 * 332 * @since 1.4 333 */ 334 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 335 336 /** 337 * Unicode bidirectional constant AN. 338 * 339 * @since 1.4 340 */ 341 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 342 343 /** 344 * Unicode bidirectional constant CS. 345 * 346 * @since 1.4 347 */ 348 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 349 350 /** 351 * Unicode bidirectional constant NSM. 352 * 353 * @since 1.4 354 */ 355 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 356 357 /** 358 * Unicode bidirectional constant BN. 359 * 360 * @since 1.4 361 */ 362 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 363 364 /** 365 * Unicode bidirectional constant B. 366 * 367 * @since 1.4 368 */ 369 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 370 371 /** 372 * Unicode bidirectional constant S. 373 * 374 * @since 1.4 375 */ 376 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 377 378 /** 379 * Unicode bidirectional constant WS. 380 * 381 * @since 1.4 382 */ 383 public static final byte DIRECTIONALITY_WHITESPACE = 12; 384 385 /** 386 * Unicode bidirectional constant ON. 387 * 388 * @since 1.4 389 */ 390 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 391 392 /** 393 * Unicode bidirectional constant LRE. 394 * 395 * @since 1.4 396 */ 397 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 398 399 /** 400 * Unicode bidirectional constant LRO. 401 * 402 * @since 1.4 403 */ 404 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 405 406 /** 407 * Unicode bidirectional constant RLE. 408 * 409 * @since 1.4 410 */ 411 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 412 413 /** 414 * Unicode bidirectional constant RLO. 415 * 416 * @since 1.4 417 */ 418 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 419 420 /** 421 * Unicode bidirectional constant PDF. 422 * 423 * @since 1.4 424 */ 425 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 426 427 /** 428 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 429 * encoding, {@code '\uD800'}. 430 * 431 * @since 1.5 432 */ 433 public static final char MIN_HIGH_SURROGATE = '\uD800'; 434 435 /** 436 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 437 * encoding, {@code '\uDBFF'}. 438 * 439 * @since 1.5 440 */ 441 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 442 443 /** 444 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 445 * encoding, {@code '\uDC00'}. 446 * 447 * @since 1.5 448 */ 449 public static final char MIN_LOW_SURROGATE = '\uDC00'; 450 451 /** 452 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 453 * encoding, {@code '\uDFFF'}. 454 * 455 * @since 1.5 456 */ 457 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 458 459 /** 460 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 461 * 462 * @since 1.5 463 */ 464 public static final char MIN_SURROGATE = '\uD800'; 465 466 /** 467 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 468 * 469 * @since 1.5 470 */ 471 public static final char MAX_SURROGATE = '\uDFFF'; 472 473 /** 474 * The minimum value of a supplementary code point, {@code U+010000}. 475 * 476 * @since 1.5 477 */ 478 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 479 480 /** 481 * The minimum code point value, {@code U+0000}. 482 * 483 * @since 1.5 484 */ 485 public static final int MIN_CODE_POINT = 0x000000; 486 487 /** 488 * The maximum code point value, {@code U+10FFFF}. 489 * 490 * @since 1.5 491 */ 492 public static final int MAX_CODE_POINT = 0x10FFFF; 493 494 /** 495 * The number of bits required to represent a {@code Character} value 496 * unsigned form. 497 * 498 * @since 1.5 499 */ 500 public static final int SIZE = 16; 501 502 // BEGIN android-removed 503 // Unicode 3.0.1 (same as Unicode 3.0.0) 504 // private static final String bidiKeys = ... 505 506 // private static final char[] bidiValues = ... 507 508 // private static final char[] mirrored = ... 509 510 // Unicode 3.0.1 (same as Unicode 3.0.0) 511 // private static final String typeKeys = ... 512 513 // private static final char[] typeValues = ... 514 515 // private static final int[] typeValuesCache = ... 516 517 // Unicode 3.0.1 (same as Unicode 3.0.0) 518 // private static final String uppercaseKeys = ... 519 520 // private static final char[] uppercaseValues = ... 521 522 // private static final int[] uppercaseValuesCache = ... 523 524 // private static final String lowercaseKeys = ... 525 526 // private static final char[] lowercaseValues = ... 527 528 // private static final int[] lowercaseValuesCache = ... 529 530 // private static final String digitKeys = ... 531 532 // private static final char[] digitValues = ... 533 534 // private static final char[] typeTags = ... 535 // END android-removed 536 537 // BEGIN android-note 538 // put this in a helper class so that it's only initialized on demand? 539 // END android-note 540 private static final byte[] DIRECTIONALITY = new byte[] { 541 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 542 DIRECTIONALITY_EUROPEAN_NUMBER, 543 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 544 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 545 DIRECTIONALITY_ARABIC_NUMBER, 546 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 547 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 548 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 549 DIRECTIONALITY_OTHER_NEUTRALS, 550 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 551 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 552 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 553 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 554 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 555 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 556 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 557 558 // BEGIN android-removed 559 // private static final int ISJAVASTART = 1; 560 561 // private static final int ISJAVAPART = 2; 562 563 // Unicode 3.0.1 (same as Unicode 3.0.0) 564 // private static final String titlecaseKeys = ... 565 566 // private static final char[] titlecaseValues = ... 567 568 // Unicode 3.0.0 (NOT the same as Unicode 3.0.1) 569 // private static final String numericKeys = ... 570 571 // private static final char[] numericValues = ... 572 // END android-removed 573 574 /* 575 * Represents a subset of the Unicode character set. 576 */ 577 public static class Subset { 578 String name; 579 580 /** 581 * Constructs a new {@code Subset}. 582 * 583 * @param string 584 * this subset's name. 585 */ 586 protected Subset(String string) { 587 if (string == null) { 588 throw new NullPointerException(); 589 } 590 name = string; 591 } 592 593 /** 594 * Compares this character subset with the specified object. Uses 595 * {@link java.lang.Object#equals(Object)} to do the comparison. 596 * 597 * @param object 598 * the object to compare this character subset with. 599 * @return {@code true} if {@code object} is this subset, that is, if 600 * {@code object == this}; {@code false} otherwise. 601 */ 602 @Override 603 public final boolean equals(Object object) { 604 return super.equals(object); 605 } 606 607 /** 608 * Returns the integer hash code for this character subset. 609 * 610 * @return this subset's hash code, which is the hash code computed by 611 * {@link java.lang.Object#hashCode()}. 612 */ 613 @Override 614 public final int hashCode() { 615 return super.hashCode(); 616 } 617 618 /** 619 * Returns the string representation of this subset. 620 * 621 * @return this subset's name. 622 */ 623 @Override 624 public final String toString() { 625 return name; 626 } 627 } 628 629 /** 630 * Represents a block of Unicode characters, as defined by the Unicode 4.0.1 631 * specification. 632 * 633 * @since 1.2 634 */ 635 public static final class UnicodeBlock extends Subset { 636 /** 637 * The "Surrogates Area" Unicode Block. 638 * 639 * @deprecated As of Java 5, this block has been replaced by 640 * {@link #HIGH_SURROGATES}, 641 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 642 * {@link #LOW_SURROGATES}. 643 */ 644 @Deprecated 645 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0); 646 /** 647 * The "Basic Latin" Unicode Block. 648 * 649 * @since 1.2 650 */ 651 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f); 652 /** 653 * The "Latin-1 Supplement" Unicode Block. 654 * 655 * @since 1.2 656 */ 657 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff); 658 /** 659 * The "Latin Extended-A" Unicode Block. 660 * 661 * @since 1.2 662 */ 663 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f); 664 /** 665 * The "Latin Extended-B" Unicode Block. 666 * 667 * @since 1.2 668 */ 669 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f); 670 /** 671 * The "IPA Extensions" Unicode Block. 672 * 673 * @since 1.2 674 */ 675 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af); 676 /** 677 * The "Spacing Modifier Letters" Unicode Block. 678 * 679 * @since 1.2 680 */ 681 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff); 682 /** 683 * The "Combining Diacritical Marks" Unicode Block. 684 * 685 * @since 1.2 686 */ 687 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f); 688 /** 689 * The "Greek and Coptic" Unicode Block. Previously referred 690 * to as "Greek". 691 * 692 * @since 1.2 693 */ 694 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff); 695 /** 696 * The "Cyrillic" Unicode Block. 697 * 698 * @since 1.2 699 */ 700 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff); 701 /** 702 * The "Cyrillic Supplement" Unicode Block. Previously 703 * referred to as "Cyrillic Supplementary". 704 * 705 * @since 1.5 706 */ 707 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f); 708 /** 709 * The "Armenian" Unicode Block. 710 * 711 * @since 1.2 712 */ 713 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f); 714 /** 715 * The "Hebrew" Unicode Block. 716 * 717 * @since 1.2 718 */ 719 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff); 720 /** 721 * The "Arabic" Unicode Block. 722 * 723 * @since 1.2 724 */ 725 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff); 726 /** 727 * The "Syriac" Unicode Block. 728 * 729 * @since 1.4 730 */ 731 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f); 732 /** 733 * The "Thaana" Unicode Block. 734 * 735 * @since 1.4 736 */ 737 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf); 738 /** 739 * The "Devanagari" Unicode Block. 740 * 741 * @since 1.2 742 */ 743 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f); 744 /** 745 * The "Bengali" Unicode Block. 746 * 747 * @since 1.2 748 */ 749 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff); 750 /** 751 * The "Gurmukhi" Unicode Block. 752 * 753 * @since 1.2 754 */ 755 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f); 756 /** 757 * The "Gujarati" Unicode Block. 758 * 759 * @since 1.2 760 */ 761 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff); 762 /** 763 * The "Oriya" Unicode Block. 764 * 765 * @since 1.2 766 */ 767 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f); 768 /** 769 * The "Tamil" Unicode Block. 770 * 771 * @since 1.2 772 */ 773 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff); 774 /** 775 * The "Telugu" Unicode Block. 776 * 777 * @since 1.2 778 */ 779 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f); 780 /** 781 * The "Kannada" Unicode Block. 782 * 783 * @since 1.2 784 */ 785 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff); 786 /** 787 * The "Malayalam" Unicode Block. 788 * 789 * @since 1.2 790 */ 791 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f); 792 /** 793 * The "Sinhala" Unicode Block. 794 * 795 * @since 1.4 796 */ 797 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff); 798 /** 799 * The "Thai" Unicode Block. 800 * 801 * @since 1.2 802 */ 803 public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f); 804 /** 805 * The "Lao" Unicode Block. 806 * 807 * @since 1.2 808 */ 809 public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff); 810 /** 811 * The "Tibetan" Unicode Block. 812 * 813 * @since 1.2 814 */ 815 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff); 816 /** 817 * The "Myanmar" Unicode Block. 818 * 819 * @since 1.4 820 */ 821 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f); 822 /** 823 * The "Georgian" Unicode Block. 824 * 825 * @since 1.2 826 */ 827 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff); 828 /** 829 * The "Hangul Jamo" Unicode Block. 830 * 831 * @since 1.2 832 */ 833 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff); 834 /** 835 * The "Ethiopic" Unicode Block. 836 * 837 * @since 1.4 838 */ 839 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f); 840 /** 841 * The "Cherokee" Unicode Block. 842 * 843 * @since 1.4 844 */ 845 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff); 846 /** 847 * The "Unified Canadian Aboriginal Syllabics" Unicode Block. 848 * 849 * @since 1.4 850 */ 851 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f); 852 /** 853 * The "Ogham" Unicode Block. 854 * 855 * @since 1.4 856 */ 857 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f); 858 /** 859 * The "Runic" Unicode Block. 860 * 861 * @since 1.4 862 */ 863 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff); 864 /** 865 * The "Tagalog" Unicode Block. 866 * 867 * @since 1.5 868 */ 869 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f); 870 /** 871 * The "Hanunoo" Unicode Block. 872 * 873 * @since 1.5 874 */ 875 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f); 876 /** 877 * The "Buhid" Unicode Block. 878 * 879 * @since 1.5 880 */ 881 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f); 882 /** 883 * The "Tagbanwa" Unicode Block. 884 * 885 * @since 1.5 886 */ 887 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f); 888 /** 889 * The "Khmer" Unicode Block. 890 * 891 * @since 1.4 892 */ 893 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff); 894 /** 895 * The "Mongolian" Unicode Block. 896 * 897 * @since 1.4 898 */ 899 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af); 900 /** 901 * The "Limbu" Unicode Block. 902 * 903 * @since 1.5 904 */ 905 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f); 906 /** 907 * The "Tai Le" Unicode Block. 908 * 909 * @since 1.5 910 */ 911 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f); 912 /** 913 * The "Khmer Symbols" Unicode Block. 914 * 915 * @since 1.5 916 */ 917 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff); 918 /** 919 * The "Phonetic Extensions" Unicode Block. 920 * 921 * @since 1.5 922 */ 923 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f); 924 /** 925 * The "Latin Extended Additional" Unicode Block. 926 * 927 * @since 1.2 928 */ 929 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff); 930 /** 931 * The "Greek Extended" Unicode Block. 932 * 933 * @since 1.2 934 */ 935 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff); 936 /** 937 * The "General Punctuation" Unicode Block. 938 * 939 * @since 1.2 940 */ 941 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f); 942 /** 943 * The "Superscripts and Subscripts" Unicode Block. 944 * 945 * @since 1.2 946 */ 947 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f); 948 /** 949 * The "Currency Symbols" Unicode Block. 950 * 951 * @since 1.2 952 */ 953 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf); 954 /** 955 * The "Combining Diacritical Marks for Symbols" Unicode 956 * Block. Previously referred to as "Combining Marks for 957 * Symbols". 958 * 959 * @since 1.2 960 */ 961 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff); 962 /** 963 * The "Letterlike Symbols" Unicode Block. 964 * 965 * @since 1.2 966 */ 967 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f); 968 /** 969 * The "Number Forms" Unicode Block. 970 * 971 * @since 1.2 972 */ 973 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f); 974 /** 975 * The "Arrows" Unicode Block. 976 * 977 * @since 1.2 978 */ 979 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff); 980 /** 981 * The "Mathematical Operators" Unicode Block. 982 * 983 * @since 1.2 984 */ 985 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff); 986 /** 987 * The "Miscellaneous Technical" Unicode Block. 988 * 989 * @since 1.2 990 */ 991 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff); 992 /** 993 * The "Control Pictures" Unicode Block. 994 * 995 * @since 1.2 996 */ 997 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f); 998 /** 999 * The "Optical Character Recognition" Unicode Block. 1000 * 1001 * @since 1.2 1002 */ 1003 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f); 1004 /** 1005 * The "Enclosed Alphanumerics" Unicode Block. 1006 * 1007 * @since 1.2 1008 */ 1009 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff); 1010 /** 1011 * The "Box Drawing" Unicode Block. 1012 * 1013 * @since 1.2 1014 */ 1015 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f); 1016 /** 1017 * The "Block Elements" Unicode Block. 1018 * 1019 * @since 1.2 1020 */ 1021 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f); 1022 /** 1023 * The "Geometric Shapes" Unicode Block. 1024 * 1025 * @since 1.2 1026 */ 1027 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff); 1028 /** 1029 * The "Miscellaneous Symbols" Unicode Block. 1030 * 1031 * @since 1.2 1032 */ 1033 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff); 1034 /** 1035 * The "Dingbats" Unicode Block. 1036 * 1037 * @since 1.2 1038 */ 1039 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf); 1040 /** 1041 * The "Miscellaneous Mathematical Symbols-A" Unicode Block. 1042 * 1043 * @since 1.5 1044 */ 1045 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef); 1046 /** 1047 * The "Supplemental Arrows-A" Unicode Block. 1048 * 1049 * @since 1.5 1050 */ 1051 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff); 1052 /** 1053 * The "Braille Patterns" Unicode Block. 1054 * 1055 * @since 1.4 1056 */ 1057 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff); 1058 /** 1059 * The "Supplemental Arrows-B" Unicode Block. 1060 * 1061 * @since 1.5 1062 */ 1063 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f); 1064 /** 1065 * The "Miscellaneous Mathematical Symbols-B" Unicode Block. 1066 * 1067 * @since 1.5 1068 */ 1069 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff); 1070 /** 1071 * The "Supplemental Mathematical Operators" Unicode Block. 1072 * 1073 * @since 1.5 1074 */ 1075 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff); 1076 /** 1077 * The "Miscellaneous Symbols and Arrows" Unicode Block. 1078 * 1079 * @since 1.2 1080 */ 1081 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff); 1082 /** 1083 * The "CJK Radicals Supplement" Unicode Block. 1084 * 1085 * @since 1.4 1086 */ 1087 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff); 1088 /** 1089 * The "Kangxi Radicals" Unicode Block. 1090 * 1091 * @since 1.4 1092 */ 1093 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf); 1094 /** 1095 * The "Ideographic Description Characters" Unicode Block. 1096 * 1097 * @since 1.4 1098 */ 1099 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff); 1100 /** 1101 * The "CJK Symbols and Punctuation" Unicode Block. 1102 * 1103 * @since 1.2 1104 */ 1105 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f); 1106 /** 1107 * The "Hiragana" Unicode Block. 1108 * 1109 * @since 1.2 1110 */ 1111 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f); 1112 /** 1113 * The "Katakana" Unicode Block. 1114 * 1115 * @since 1.2 1116 */ 1117 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff); 1118 /** 1119 * The "Bopomofo" Unicode Block. 1120 * 1121 * @since 1.2 1122 */ 1123 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f); 1124 /** 1125 * The "Hangul Compatibility Jamo" Unicode Block. 1126 * 1127 * @since 1.2 1128 */ 1129 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f); 1130 /** 1131 * The "Kanbun" Unicode Block. 1132 * 1133 * @since 1.2 1134 */ 1135 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f); 1136 /** 1137 * The "Bopomofo Extended" Unicode Block. 1138 * 1139 * @since 1.4 1140 */ 1141 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf); 1142 /** 1143 * The "Katakana Phonetic Extensions" Unicode Block. 1144 * 1145 * @since 1.5 1146 */ 1147 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff); 1148 /** 1149 * The "Enclosed CJK Letters and Months" Unicode Block. 1150 * 1151 * @since 1.2 1152 */ 1153 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff); 1154 /** 1155 * The "CJK Compatibility" Unicode Block. 1156 * 1157 * @since 1.2 1158 */ 1159 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff); 1160 /** 1161 * The "CJK Unified Ideographs Extension A" Unicode Block. 1162 * 1163 * @since 1.4 1164 */ 1165 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf); 1166 /** 1167 * The "Yijing Hexagram Symbols" Unicode Block. 1168 * 1169 * @since 1.5 1170 */ 1171 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff); 1172 /** 1173 * The "CJK Unified Ideographs" Unicode Block. 1174 * 1175 * @since 1.2 1176 */ 1177 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff); 1178 /** 1179 * The "Yi Syllables" Unicode Block. 1180 * 1181 * @since 1.4 1182 */ 1183 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f); 1184 /** 1185 * The "Yi Radicals" Unicode Block. 1186 * 1187 * @since 1.4 1188 */ 1189 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf); 1190 /** 1191 * The "Hangul Syllables" Unicode Block. 1192 * 1193 * @since 1.2 1194 */ 1195 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af); 1196 /** 1197 * The "High Surrogates" Unicode Block. This block represents 1198 * code point values in the high surrogate range 0xD800 to 0xDB7F 1199 */ 1200 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f); 1201 /** 1202 * The "High Private Use Surrogates" Unicode Block. This block 1203 * represents code point values in the high surrogate range 0xDB80 to 1204 * 0xDBFF 1205 */ 1206 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff); 1207 /** 1208 * The "Low Surrogates" Unicode Block. This block represents 1209 * code point values in the low surrogate range 0xDC00 to 0xDFFF 1210 */ 1211 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff); 1212 /** 1213 * The "Private Use Area" Unicode Block. 1214 * 1215 * @since 1.2 1216 */ 1217 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff); 1218 /** 1219 * The "CJK Compatibility Ideographs" Unicode Block. 1220 * 1221 * @since 1.2 1222 */ 1223 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff); 1224 /** 1225 * The "Alphabetic Presentation Forms" Unicode Block. 1226 * 1227 * @since 1.2 1228 */ 1229 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f); 1230 /** 1231 * The "Arabic Presentation Forms-A" Unicode Block. 1232 * 1233 * @since 1.2 1234 */ 1235 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff); 1236 /** 1237 * The "Variation Selectors" Unicode Block. 1238 * 1239 * @since 1.5 1240 */ 1241 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f); 1242 /** 1243 * The "Combining Half Marks" Unicode Block. 1244 * 1245 * @since 1.2 1246 */ 1247 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f); 1248 /** 1249 * The "CJK Compatibility Forms" Unicode Block. 1250 * 1251 * @since 1.2 1252 */ 1253 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f); 1254 /** 1255 * The "Small Form Variants" Unicode Block. 1256 * 1257 * @since 1.2 1258 */ 1259 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f); 1260 /** 1261 * The "Arabic Presentation Forms-B" Unicode Block. 1262 * 1263 * @since 1.2 1264 */ 1265 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff); 1266 /** 1267 * The "Halfwidth and Fullwidth Forms" Unicode Block. 1268 * 1269 * @since 1.2 1270 */ 1271 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef); 1272 /** 1273 * The "Specials" Unicode Block. 1274 * 1275 * @since 1.2 1276 */ 1277 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff); 1278 /** 1279 * The "Linear B Syllabary" Unicode Block. 1280 * 1281 * @since 1.2 1282 */ 1283 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f); 1284 /** 1285 * The "Linear B Ideograms" Unicode Block. 1286 * 1287 * @since 1.5 1288 */ 1289 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff); 1290 /** 1291 * The "Aegean Numbers" Unicode Block. 1292 * 1293 * @since 1.5 1294 */ 1295 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f); 1296 /** 1297 * The "Old Italic" Unicode Block. 1298 * 1299 * @since 1.5 1300 */ 1301 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f); 1302 /** 1303 * The "Gothic" Unicode Block. 1304 * 1305 * @since 1.5 1306 */ 1307 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f); 1308 /** 1309 * The "Ugaritic" Unicode Block. 1310 * 1311 * @since 1.5 1312 */ 1313 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f); 1314 /** 1315 * The "Deseret" Unicode Block. 1316 * 1317 * @since 1.5 1318 */ 1319 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f); 1320 /** 1321 * The "Shavian" Unicode Block. 1322 * 1323 * @since 1.5 1324 */ 1325 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f); 1326 /** 1327 * The "Osmanya" Unicode Block. 1328 * 1329 * @since 1.5 1330 */ 1331 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af); 1332 /** 1333 * The "Cypriot Syllabary" Unicode Block. 1334 * 1335 * @since 1.5 1336 */ 1337 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f); 1338 /** 1339 * The "Byzantine Musical Symbols" Unicode Block. 1340 * 1341 * @since 1.5 1342 */ 1343 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff); 1344 /** 1345 * The "Musical Symbols" Unicode Block. 1346 * 1347 * @since 1.5 1348 */ 1349 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff); 1350 /** 1351 * The "Tai Xuan Jing Symbols" Unicode Block. 1352 * 1353 * @since 1.5 1354 */ 1355 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f); 1356 /** 1357 * The "Mathematical Alphanumeric Symbols" Unicode Block. 1358 * 1359 * @since 1.5 1360 */ 1361 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff); 1362 /** 1363 * The "CJK Unified Ideographs Extension B" Unicode Block. 1364 * 1365 * @since 1.5 1366 */ 1367 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df); 1368 /** 1369 * The "CJK Compatibility Ideographs Supplement" Unicode Block. 1370 * 1371 * @since 1.5 1372 */ 1373 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f); 1374 /** 1375 * The "Tags" Unicode Block. 1376 * 1377 * @since 1.5 1378 */ 1379 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f); 1380 /** 1381 * The "Variation Selectors Supplement" Unicode Block. 1382 * 1383 * @since 1.5 1384 */ 1385 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef); 1386 /** 1387 * The "Supplementary Private Use Area-A" Unicode Block. 1388 * 1389 * @since 1.5 1390 */ 1391 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff); 1392 /** 1393 * The "Supplementary Private Use Area-B" Unicode Block. 1394 * 1395 * @since 1.5 1396 */ 1397 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff); 1398 1399 /* 1400 * All of the UnicodeBlocks with valid ranges in ascending order. 1401 */ 1402 private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] { 1403 null, 1404 UnicodeBlock.BASIC_LATIN, 1405 UnicodeBlock.LATIN_1_SUPPLEMENT, 1406 UnicodeBlock.LATIN_EXTENDED_A, 1407 UnicodeBlock.LATIN_EXTENDED_B, 1408 UnicodeBlock.IPA_EXTENSIONS, 1409 UnicodeBlock.SPACING_MODIFIER_LETTERS, 1410 UnicodeBlock.COMBINING_DIACRITICAL_MARKS, 1411 UnicodeBlock.GREEK, 1412 UnicodeBlock.CYRILLIC, 1413 UnicodeBlock.ARMENIAN, 1414 UnicodeBlock.HEBREW, 1415 UnicodeBlock.ARABIC, 1416 UnicodeBlock.SYRIAC, 1417 UnicodeBlock.THAANA, 1418 UnicodeBlock.DEVANAGARI, 1419 UnicodeBlock.BENGALI, 1420 UnicodeBlock.GURMUKHI, 1421 UnicodeBlock.GUJARATI, 1422 UnicodeBlock.ORIYA, 1423 UnicodeBlock.TAMIL, 1424 UnicodeBlock.TELUGU, 1425 UnicodeBlock.KANNADA, 1426 UnicodeBlock.MALAYALAM, 1427 UnicodeBlock.SINHALA, 1428 UnicodeBlock.THAI, 1429 UnicodeBlock.LAO, 1430 UnicodeBlock.TIBETAN, 1431 UnicodeBlock.MYANMAR, 1432 UnicodeBlock.GEORGIAN, 1433 UnicodeBlock.HANGUL_JAMO, 1434 UnicodeBlock.ETHIOPIC, 1435 UnicodeBlock.CHEROKEE, 1436 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 1437 UnicodeBlock.OGHAM, 1438 UnicodeBlock.RUNIC, 1439 UnicodeBlock.KHMER, 1440 UnicodeBlock.MONGOLIAN, 1441 UnicodeBlock.LATIN_EXTENDED_ADDITIONAL, 1442 UnicodeBlock.GREEK_EXTENDED, 1443 UnicodeBlock.GENERAL_PUNCTUATION, 1444 UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS, 1445 UnicodeBlock.CURRENCY_SYMBOLS, 1446 UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS, 1447 UnicodeBlock.LETTERLIKE_SYMBOLS, 1448 UnicodeBlock.NUMBER_FORMS, 1449 UnicodeBlock.ARROWS, 1450 UnicodeBlock.MATHEMATICAL_OPERATORS, 1451 UnicodeBlock.MISCELLANEOUS_TECHNICAL, 1452 UnicodeBlock.CONTROL_PICTURES, 1453 UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION, 1454 UnicodeBlock.ENCLOSED_ALPHANUMERICS, 1455 UnicodeBlock.BOX_DRAWING, 1456 UnicodeBlock.BLOCK_ELEMENTS, 1457 UnicodeBlock.GEOMETRIC_SHAPES, 1458 UnicodeBlock.MISCELLANEOUS_SYMBOLS, 1459 UnicodeBlock.DINGBATS, 1460 UnicodeBlock.BRAILLE_PATTERNS, 1461 UnicodeBlock.CJK_RADICALS_SUPPLEMENT, 1462 UnicodeBlock.KANGXI_RADICALS, 1463 UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 1464 UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION, 1465 UnicodeBlock.HIRAGANA, 1466 UnicodeBlock.KATAKANA, 1467 UnicodeBlock.BOPOMOFO, 1468 UnicodeBlock.HANGUL_COMPATIBILITY_JAMO, 1469 UnicodeBlock.KANBUN, 1470 UnicodeBlock.BOPOMOFO_EXTENDED, 1471 UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS, 1472 UnicodeBlock.CJK_COMPATIBILITY, 1473 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 1474 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, 1475 UnicodeBlock.YI_SYLLABLES, 1476 UnicodeBlock.YI_RADICALS, 1477 UnicodeBlock.HANGUL_SYLLABLES, 1478 UnicodeBlock.HIGH_SURROGATES, 1479 UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES, 1480 UnicodeBlock.LOW_SURROGATES, 1481 UnicodeBlock.PRIVATE_USE_AREA, 1482 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS, 1483 UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS, 1484 UnicodeBlock.ARABIC_PRESENTATION_FORMS_A, 1485 UnicodeBlock.COMBINING_HALF_MARKS, 1486 UnicodeBlock.CJK_COMPATIBILITY_FORMS, 1487 UnicodeBlock.SMALL_FORM_VARIANTS, 1488 UnicodeBlock.ARABIC_PRESENTATION_FORMS_B, 1489 UnicodeBlock.SPECIALS, 1490 UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, 1491 UnicodeBlock.OLD_ITALIC, 1492 UnicodeBlock.GOTHIC, 1493 UnicodeBlock.DESERET, 1494 UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS, 1495 UnicodeBlock.MUSICAL_SYMBOLS, 1496 UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 1497 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 1498 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 1499 UnicodeBlock.TAGS, 1500 UnicodeBlock.CYRILLIC_SUPPLEMENTARY, 1501 UnicodeBlock.TAGALOG, 1502 UnicodeBlock.HANUNOO, 1503 UnicodeBlock.BUHID, 1504 UnicodeBlock.TAGBANWA, 1505 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 1506 UnicodeBlock.SUPPLEMENTAL_ARROWS_A, 1507 UnicodeBlock.SUPPLEMENTAL_ARROWS_B, 1508 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 1509 UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 1510 UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS, 1511 UnicodeBlock.VARIATION_SELECTORS, 1512 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, 1513 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B, 1514 UnicodeBlock.LIMBU, 1515 UnicodeBlock.TAI_LE, 1516 UnicodeBlock.KHMER_SYMBOLS, 1517 UnicodeBlock.PHONETIC_EXTENSIONS, 1518 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS, 1519 UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS, 1520 UnicodeBlock.LINEAR_B_SYLLABARY, 1521 UnicodeBlock.LINEAR_B_IDEOGRAMS, 1522 UnicodeBlock.AEGEAN_NUMBERS, 1523 UnicodeBlock.UGARITIC, 1524 UnicodeBlock.SHAVIAN, 1525 UnicodeBlock.OSMANYA, 1526 UnicodeBlock.CYPRIOT_SYLLABARY, 1527 UnicodeBlock.TAI_XUAN_JING_SYMBOLS, 1528 UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT 1529 }; 1530 1531 /** 1532 * Retrieves the constant that corresponds to the specified block name. 1533 * The block names are defined by the Unicode 4.0.1 specification in the 1534 * {@code Blocks-4.0.1.txt} file. 1535 * <p> 1536 * Block names may be one of the following: 1537 * <ul> 1538 * <li>Canonical block name, as defined by the Unicode specification; 1539 * case-insensitive.</li> 1540 * <li>Canonical block name without any spaces, as defined by the 1541 * Unicode specification; case-insensitive.</li> 1542 * <li>{@code UnicodeBlock} constant identifier. This is determined by 1543 * uppercasing the canonical name and replacing all spaces and hyphens 1544 * with underscores.</li> 1545 * </ul> 1546 * 1547 * @param blockName 1548 * the name of the block to retrieve. 1549 * @return the UnicodeBlock constant corresponding to {@code blockName}. 1550 * @throws NullPointerException 1551 * if {@code blockName} is {@code null}. 1552 * @throws IllegalArgumentException 1553 * if {@code blockName} is not a valid block name. 1554 * @since 1.5 1555 */ 1556 public static final UnicodeBlock forName(String blockName) { 1557 if (blockName == null) { 1558 throw new NullPointerException(); 1559 } 1560 int block = forNameImpl(blockName); 1561 if (block == -1) { 1562 if(blockName.equals("SURROGATES_AREA")) { 1563 return SURROGATES_AREA; 1564 } else if(blockName.equalsIgnoreCase("greek")) { 1565 return GREEK; 1566 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") || 1567 blockName.equals("Combining Marks for Symbols") || 1568 blockName.equals("CombiningMarksforSymbols")) { 1569 return COMBINING_MARKS_FOR_SYMBOLS; 1570 } 1571 throw new IllegalArgumentException(); 1572 } 1573 return BLOCKS[block]; 1574 } 1575 1576 /** 1577 * Gets the constant for the Unicode block that contains the specified 1578 * character. 1579 * 1580 * @param c 1581 * the character for which to get the {@code UnicodeBlock} 1582 * constant. 1583 * @return the {@code UnicodeBlock} constant for the block that contains 1584 * {@code c}, or {@code null} if {@code c} does not belong to 1585 * any defined block. 1586 */ 1587 public static UnicodeBlock of(char c) { 1588 return of((int) c); 1589 } 1590 1591 /** 1592 * Gets the constant for the Unicode block that contains the specified 1593 * Unicode code point. 1594 * 1595 * @param codePoint 1596 * the Unicode code point for which to get the 1597 * {@code UnicodeBlock} constant. 1598 * @return the {@code UnicodeBlock} constant for the block that contains 1599 * {@code codePoint}, or {@code null} if {@code codePoint} does 1600 * not belong to any defined block. 1601 * @throws IllegalArgumentException 1602 * if {@code codePoint} is not a valid Unicode code point. 1603 * @since 1.5 1604 */ 1605 public static UnicodeBlock of(int codePoint) { 1606 if (!isValidCodePoint(codePoint)) { 1607 throw new IllegalArgumentException(); 1608 } 1609 int block = ofImpl(codePoint); 1610 if(block == -1 || block >= BLOCKS.length) { 1611 return null; 1612 } 1613 return BLOCKS[block]; 1614 } 1615 1616 private UnicodeBlock(String blockName, int start, int end) { 1617 super(blockName); 1618 } 1619 } 1620 1621 private static native int forNameImpl(String blockName); 1622 1623 private static native int ofImpl(int codePoint); 1624 1625 /** 1626 * Constructs a new {@code Character} with the specified primitive char 1627 * value. 1628 * 1629 * @param value 1630 * the primitive char value to store in the new instance. 1631 */ 1632 public Character(char value) { 1633 this.value = value; 1634 } 1635 1636 /** 1637 * Gets the primitive value of this character. 1638 * 1639 * @return this object's primitive value. 1640 */ 1641 public char charValue() { 1642 return value; 1643 } 1644 1645 /** 1646 * Compares this object to the specified character object to determine their 1647 * relative order. 1648 * 1649 * @param c 1650 * the character object to compare this object to. 1651 * @return {@code 0} if the value of this character and the value of 1652 * {@code c} are equal; a positive value if the value of this 1653 * character is greater than the value of {@code c}; a negative 1654 * value if the value of this character is less than the value of 1655 * {@code c}. 1656 * @see java.lang.Comparable 1657 * @since 1.2 1658 */ 1659 public int compareTo(Character c) { 1660 return value - c.value; 1661 } 1662 1663 /** 1664 * Returns a {@code Character} instance for the {@code char} value passed. 1665 * <p> 1666 * If it is not necessary to get a new {@code Character} instance, it is 1667 * recommended to use this method instead of the constructor, since it 1668 * maintains a cache of instances which may result in better performance. 1669 * 1670 * @param c 1671 * the char value for which to get a {@code Character} instance. 1672 * @return the {@code Character} instance for {@code c}. 1673 * @since 1.5 1674 */ 1675 public static Character valueOf(char c) { 1676 return c < 128 ? SMALL_VALUES[c] : new Character(c); 1677 } 1678 1679 /** 1680 * A cache of instances used by {@link #valueOf(char)} and auto-boxing 1681 */ 1682 private static final Character[] SMALL_VALUES = new Character[128]; 1683 1684 static { 1685 for(int i = 0; i < 128; i++) { 1686 SMALL_VALUES[i] = new Character((char) i); 1687 } 1688 } 1689 /** 1690 * Indicates whether {@code codePoint} is a valid Unicode code point. 1691 * 1692 * @param codePoint 1693 * the code point to test. 1694 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 1695 * {@code false} otherwise. 1696 * @since 1.5 1697 */ 1698 public static boolean isValidCodePoint(int codePoint) { 1699 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1700 } 1701 1702 /** 1703 * Indicates whether {@code codePoint} is within the supplementary code 1704 * point range. 1705 * 1706 * @param codePoint 1707 * the code point to test. 1708 * @return {@code true} if {@code codePoint} is within the supplementary 1709 * code point range; {@code false} otherwise. 1710 * @since 1.5 1711 */ 1712 public static boolean isSupplementaryCodePoint(int codePoint) { 1713 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1714 } 1715 1716 /** 1717 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 1718 * that is used for representing supplementary characters in UTF-16 1719 * encoding. 1720 * 1721 * @param ch 1722 * the character to test. 1723 * @return {@code true} if {@code ch} is a high-surrogate code unit; 1724 * {@code false} otherwise. 1725 * @see #isLowSurrogate(char) 1726 * @since 1.5 1727 */ 1728 public static boolean isHighSurrogate(char ch) { 1729 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1730 } 1731 1732 /** 1733 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 1734 * that is used for representing supplementary characters in UTF-16 1735 * encoding. 1736 * 1737 * @param ch 1738 * the character to test. 1739 * @return {@code true} if {@code ch} is a low-surrogate code unit; 1740 * {@code false} otherwise. 1741 * @see #isHighSurrogate(char) 1742 * @since 1.5 1743 */ 1744 public static boolean isLowSurrogate(char ch) { 1745 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1746 } 1747 1748 /** 1749 * Indicates whether the specified character pair is a valid surrogate pair. 1750 * 1751 * @param high 1752 * the high surrogate unit to test. 1753 * @param low 1754 * the low surrogate unit to test. 1755 * @return {@code true} if {@code high} is a high-surrogate code unit and 1756 * {@code low} is a low-surrogate code unit; {@code false} 1757 * otherwise. 1758 * @see #isHighSurrogate(char) 1759 * @see #isLowSurrogate(char) 1760 * @since 1.5 1761 */ 1762 public static boolean isSurrogatePair(char high, char low) { 1763 return (isHighSurrogate(high) && isLowSurrogate(low)); 1764 } 1765 1766 /** 1767 * Calculates the number of {@code char} values required to represent the 1768 * specified Unicode code point. This method checks if the {@code codePoint} 1769 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 1770 * returned, otherwise {@code 1}. To test if the code point is valid, use 1771 * the {@link #isValidCodePoint(int)} method. 1772 * 1773 * @param codePoint 1774 * the code point for which to calculate the number of required 1775 * chars. 1776 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 1777 * @see #isValidCodePoint(int) 1778 * @see #isSupplementaryCodePoint(int) 1779 * @since 1.5 1780 */ 1781 public static int charCount(int codePoint) { 1782 return (codePoint >= 0x10000 ? 2 : 1); 1783 } 1784 1785 /** 1786 * Converts a surrogate pair into a Unicode code point. This method assumes 1787 * that the pair are valid surrogates. If the pair are <i>not</i> valid 1788 * surrogates, then the result is indeterminate. The 1789 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1790 * method to validate the pair. 1791 * 1792 * @param high 1793 * the high surrogate unit. 1794 * @param low 1795 * the low surrogate unit. 1796 * @return the Unicode code point corresponding to the surrogate unit pair. 1797 * @see #isSurrogatePair(char, char) 1798 * @since 1.5 1799 */ 1800 public static int toCodePoint(char high, char low) { 1801 // See RFC 2781, Section 2.2 1802 // http://www.ietf.org/rfc/rfc2781.txt 1803 int h = (high & 0x3FF) << 10; 1804 int l = low & 0x3FF; 1805 return (h | l) + 0x10000; 1806 } 1807 1808 /** 1809 * Returns the code point at {@code index} in the specified sequence of 1810 * character units. If the unit at {@code index} is a high-surrogate unit, 1811 * {@code index + 1} is less than the length of the sequence and the unit at 1812 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1813 * point represented by the pair is returned; otherwise the {@code char} 1814 * value at {@code index} is returned. 1815 * 1816 * @param seq 1817 * the source sequence of {@code char} units. 1818 * @param index 1819 * the position in {@code seq} from which to retrieve the code 1820 * point. 1821 * @return the Unicode code point or {@code char} value at {@code index} in 1822 * {@code seq}. 1823 * @throws NullPointerException 1824 * if {@code seq} is {@code null}. 1825 * @throws IndexOutOfBoundsException 1826 * if the {@code index} is negative or greater than or equal to 1827 * the length of {@code seq}. 1828 * @since 1.5 1829 */ 1830 public static int codePointAt(CharSequence seq, int index) { 1831 if (seq == null) { 1832 throw new NullPointerException(); 1833 } 1834 int len = seq.length(); 1835 if (index < 0 || index >= len) { 1836 throw new IndexOutOfBoundsException(); 1837 } 1838 1839 char high = seq.charAt(index++); 1840 if (index >= len) { 1841 return high; 1842 } 1843 char low = seq.charAt(index); 1844 if (isSurrogatePair(high, low)) { 1845 return toCodePoint(high, low); 1846 } 1847 return high; 1848 } 1849 1850 /** 1851 * Returns the code point at {@code index} in the specified array of 1852 * character units. If the unit at {@code index} is a high-surrogate unit, 1853 * {@code index + 1} is less than the length of the array and the unit at 1854 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1855 * point represented by the pair is returned; otherwise the {@code char} 1856 * value at {@code index} is returned. 1857 * 1858 * @param seq 1859 * the source array of {@code char} units. 1860 * @param index 1861 * the position in {@code seq} from which to retrieve the code 1862 * point. 1863 * @return the Unicode code point or {@code char} value at {@code index} in 1864 * {@code seq}. 1865 * @throws NullPointerException 1866 * if {@code seq} is {@code null}. 1867 * @throws IndexOutOfBoundsException 1868 * if the {@code index} is negative or greater than or equal to 1869 * the length of {@code seq}. 1870 * @since 1.5 1871 */ 1872 public static int codePointAt(char[] seq, int index) { 1873 if (seq == null) { 1874 throw new NullPointerException(); 1875 } 1876 int len = seq.length; 1877 if (index < 0 || index >= len) { 1878 throw new IndexOutOfBoundsException(); 1879 } 1880 1881 char high = seq[index++]; 1882 if (index >= len) { 1883 return high; 1884 } 1885 char low = seq[index]; 1886 if (isSurrogatePair(high, low)) { 1887 return toCodePoint(high, low); 1888 } 1889 return high; 1890 } 1891 1892 /** 1893 * Returns the code point at {@code index} in the specified array of 1894 * character units, where {@code index} has to be less than {@code limit}. 1895 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 1896 * is less than {@code limit} and the unit at {@code index + 1} is a 1897 * low-surrogate unit, then the supplementary code point represented by the 1898 * pair is returned; otherwise the {@code char} value at {@code index} is 1899 * returned. 1900 * 1901 * @param seq 1902 * the source array of {@code char} units. 1903 * @param index 1904 * the position in {@code seq} from which to get the code point. 1905 * @param limit 1906 * the index after the last unit in {@code seq} that can be used. 1907 * @return the Unicode code point or {@code char} value at {@code index} in 1908 * {@code seq}. 1909 * @throws NullPointerException 1910 * if {@code seq} is {@code null}. 1911 * @throws IndexOutOfBoundsException 1912 * if {@code index < 0}, {@code index >= limit}, 1913 * {@code limit < 0} or if {@code limit} is greater than the 1914 * length of {@code seq}. 1915 * @since 1.5 1916 */ 1917 public static int codePointAt(char[] seq, int index, int limit) { 1918 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1919 throw new IndexOutOfBoundsException(); 1920 } 1921 1922 char high = seq[index++]; 1923 if (index >= limit) { 1924 return high; 1925 } 1926 char low = seq[index]; 1927 if (isSurrogatePair(high, low)) { 1928 return toCodePoint(high, low); 1929 } 1930 return high; 1931 } 1932 1933 /** 1934 * Returns the code point that precedes {@code index} in the specified 1935 * sequence of character units. If the unit at {@code index - 1} is a 1936 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1937 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1938 * point represented by the pair is returned; otherwise the {@code char} 1939 * value at {@code index - 1} is returned. 1940 * 1941 * @param seq 1942 * the source sequence of {@code char} units. 1943 * @param index 1944 * the position in {@code seq} following the code 1945 * point that should be returned. 1946 * @return the Unicode code point or {@code char} value before {@code index} 1947 * in {@code seq}. 1948 * @throws NullPointerException 1949 * if {@code seq} is {@code null}. 1950 * @throws IndexOutOfBoundsException 1951 * if the {@code index} is less than 1 or greater than the 1952 * length of {@code seq}. 1953 * @since 1.5 1954 */ 1955 public static int codePointBefore(CharSequence seq, int index) { 1956 if (seq == null) { 1957 throw new NullPointerException(); 1958 } 1959 int len = seq.length(); 1960 if (index < 1 || index > len) { 1961 throw new IndexOutOfBoundsException(); 1962 } 1963 1964 char low = seq.charAt(--index); 1965 if (--index < 0) { 1966 return low; 1967 } 1968 char high = seq.charAt(index); 1969 if (isSurrogatePair(high, low)) { 1970 return toCodePoint(high, low); 1971 } 1972 return low; 1973 } 1974 1975 /** 1976 * Returns the code point that precedes {@code index} in the specified 1977 * array of character units. If the unit at {@code index - 1} is a 1978 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1979 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1980 * point represented by the pair is returned; otherwise the {@code char} 1981 * value at {@code index - 1} is returned. 1982 * 1983 * @param seq 1984 * the source array of {@code char} units. 1985 * @param index 1986 * the position in {@code seq} following the code 1987 * point that should be returned. 1988 * @return the Unicode code point or {@code char} value before {@code index} 1989 * in {@code seq}. 1990 * @throws NullPointerException 1991 * if {@code seq} is {@code null}. 1992 * @throws IndexOutOfBoundsException 1993 * if the {@code index} is less than 1 or greater than the 1994 * length of {@code seq}. 1995 * @since 1.5 1996 */ 1997 public static int codePointBefore(char[] seq, int index) { 1998 if (seq == null) { 1999 throw new NullPointerException(); 2000 } 2001 int len = seq.length; 2002 if (index < 1 || index > len) { 2003 throw new IndexOutOfBoundsException(); 2004 } 2005 2006 char low = seq[--index]; 2007 if (--index < 0) { 2008 return low; 2009 } 2010 char high = seq[index]; 2011 if (isSurrogatePair(high, low)) { 2012 return toCodePoint(high, low); 2013 } 2014 return low; 2015 } 2016 2017 /** 2018 * Returns the code point that precedes the {@code index} in the specified 2019 * array of character units and is not less than {@code start}. If the unit 2020 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 2021 * less than {@code start} and the unit at {@code index - 2} is a 2022 * high-surrogate unit, then the supplementary code point represented by the 2023 * pair is returned; otherwise the {@code char} value at {@code index - 1} 2024 * is returned. 2025 * 2026 * @param seq 2027 * the source array of {@code char} units. 2028 * @param index 2029 * the position in {@code seq} following the code point that 2030 * should be returned. 2031 * @param start 2032 * the index of the first element in {@code seq}. 2033 * @return the Unicode code point or {@code char} value before {@code index} 2034 * in {@code seq}. 2035 * @throws NullPointerException 2036 * if {@code seq} is {@code null}. 2037 * @throws IndexOutOfBoundsException 2038 * if the {@code index <= start}, {@code start < 0}, 2039 * {@code index} is greater than the length of {@code seq}, or 2040 * if {@code start} is equal or greater than the length of 2041 * {@code seq}. 2042 * @since 1.5 2043 */ 2044 public static int codePointBefore(char[] seq, int index, int start) { 2045 if (seq == null) { 2046 throw new NullPointerException(); 2047 } 2048 int len = seq.length; 2049 if (index <= start || index > len || start < 0 || start >= len) { 2050 throw new IndexOutOfBoundsException(); 2051 } 2052 2053 char low = seq[--index]; 2054 if (--index < start) { 2055 return low; 2056 } 2057 char high = seq[index]; 2058 if (isSurrogatePair(high, low)) { 2059 return toCodePoint(high, low); 2060 } 2061 return low; 2062 } 2063 2064 /** 2065 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2066 * and copies the value(s) into the char array {@code dst}, starting at 2067 * index {@code dstIndex}. 2068 * 2069 * @param codePoint 2070 * the Unicode code point to encode. 2071 * @param dst 2072 * the destination array to copy the encoded value into. 2073 * @param dstIndex 2074 * the index in {@code dst} from where to start copying. 2075 * @return the number of {@code char} value units copied into {@code dst}. 2076 * @throws IllegalArgumentException 2077 * if {@code codePoint} is not a valid Unicode code point. 2078 * @throws NullPointerException 2079 * if {@code dst} is {@code null}. 2080 * @throws IndexOutOfBoundsException 2081 * if {@code dstIndex} is negative, greater than or equal to 2082 * {@code dst.length} or equals {@code dst.length - 1} when 2083 * {@code codePoint} is a 2084 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 2085 * @since 1.5 2086 */ 2087 public static int toChars(int codePoint, char[] dst, int dstIndex) { 2088 if (!isValidCodePoint(codePoint)) { 2089 throw new IllegalArgumentException(); 2090 } 2091 if (dst == null) { 2092 throw new NullPointerException(); 2093 } 2094 if (dstIndex < 0 || dstIndex >= dst.length) { 2095 throw new IndexOutOfBoundsException(); 2096 } 2097 2098 if (isSupplementaryCodePoint(codePoint)) { 2099 if (dstIndex == dst.length - 1) { 2100 throw new IndexOutOfBoundsException(); 2101 } 2102 // See RFC 2781, Section 2.1 2103 // http://www.ietf.org/rfc/rfc2781.txt 2104 int cpPrime = codePoint - 0x10000; 2105 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2106 int low = 0xDC00 | (cpPrime & 0x3FF); 2107 dst[dstIndex] = (char) high; 2108 dst[dstIndex + 1] = (char) low; 2109 return 2; 2110 } 2111 2112 dst[dstIndex] = (char) codePoint; 2113 return 1; 2114 } 2115 2116 /** 2117 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2118 * and returns it as a char array. 2119 * 2120 * @param codePoint 2121 * the Unicode code point to encode. 2122 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 2123 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 2124 * then the returned array contains two characters, otherwise it 2125 * contains just one character. 2126 * @throws IllegalArgumentException 2127 * if {@code codePoint} is not a valid Unicode code point. 2128 * @since 1.5 2129 */ 2130 public static char[] toChars(int codePoint) { 2131 if (!isValidCodePoint(codePoint)) { 2132 throw new IllegalArgumentException(); 2133 } 2134 2135 if (isSupplementaryCodePoint(codePoint)) { 2136 int cpPrime = codePoint - 0x10000; 2137 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2138 int low = 0xDC00 | (cpPrime & 0x3FF); 2139 return new char[] { (char) high, (char) low }; 2140 } 2141 return new char[] { (char) codePoint }; 2142 } 2143 2144 /** 2145 * Counts the number of Unicode code points in the subsequence of the 2146 * specified character sequence, as delineated by {@code beginIndex} and 2147 * {@code endIndex}. Any surrogate values with missing pair values will be 2148 * counted as one code point. 2149 * 2150 * @param seq 2151 * the {@code CharSequence} to look through. 2152 * @param beginIndex 2153 * the inclusive index to begin counting at. 2154 * @param endIndex 2155 * the exclusive index to stop counting at. 2156 * @return the number of Unicode code points. 2157 * @throws NullPointerException 2158 * if {@code seq} is {@code null}. 2159 * @throws IndexOutOfBoundsException 2160 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2161 * if {@code endIndex} is greater than the length of {@code seq}. 2162 * @since 1.5 2163 */ 2164 public static int codePointCount(CharSequence seq, int beginIndex, 2165 int endIndex) { 2166 if (seq == null) { 2167 throw new NullPointerException(); 2168 } 2169 int len = seq.length(); 2170 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2171 throw new IndexOutOfBoundsException(); 2172 } 2173 2174 int result = 0; 2175 for (int i = beginIndex; i < endIndex; i++) { 2176 char c = seq.charAt(i); 2177 if (isHighSurrogate(c)) { 2178 if (++i < endIndex) { 2179 c = seq.charAt(i); 2180 if (!isLowSurrogate(c)) { 2181 result++; 2182 } 2183 } 2184 } 2185 result++; 2186 } 2187 return result; 2188 } 2189 2190 /** 2191 * Counts the number of Unicode code points in the subsequence of the 2192 * specified char array, as delineated by {@code offset} and {@code count}. 2193 * Any surrogate values with missing pair values will be counted as one code 2194 * point. 2195 * 2196 * @param seq 2197 * the char array to look through 2198 * @param offset 2199 * the inclusive index to begin counting at. 2200 * @param count 2201 * the number of {@code char} values to look through in 2202 * {@code seq}. 2203 * @return the number of Unicode code points. 2204 * @throws NullPointerException 2205 * if {@code seq} is {@code null}. 2206 * @throws IndexOutOfBoundsException 2207 * if {@code offset < 0}, {@code count < 0} or if 2208 * {@code offset + count} is greater than the length of 2209 * {@code seq}. 2210 * @since 1.5 2211 */ 2212 public static int codePointCount(char[] seq, int offset, int count) { 2213 if (seq == null) { 2214 throw new NullPointerException(); 2215 } 2216 int len = seq.length; 2217 int endIndex = offset + count; 2218 if (offset < 0 || count < 0 || endIndex > len) { 2219 throw new IndexOutOfBoundsException(); 2220 } 2221 2222 int result = 0; 2223 for (int i = offset; i < endIndex; i++) { 2224 char c = seq[i]; 2225 if (isHighSurrogate(c)) { 2226 if (++i < endIndex) { 2227 c = seq[i]; 2228 if (!isLowSurrogate(c)) { 2229 result++; 2230 } 2231 } 2232 } 2233 result++; 2234 } 2235 return result; 2236 } 2237 2238 /** 2239 * Determines the index in the specified character sequence that is offset 2240 * {@code codePointOffset} code points from {@code index}. 2241 * 2242 * @param seq 2243 * the character sequence to find the index in. 2244 * @param index 2245 * the start index in {@code seq}. 2246 * @param codePointOffset 2247 * the number of code points to look backwards or forwards; may 2248 * be a negative or positive value. 2249 * @return the index in {@code seq} that is {@code codePointOffset} code 2250 * points away from {@code index}. 2251 * @throws NullPointerException 2252 * if {@code seq} is {@code null}. 2253 * @throws IndexOutOfBoundsException 2254 * if {@code index < 0}, {@code index} is greater than the 2255 * length of {@code seq}, or if there are not enough values in 2256 * {@code seq} to skip {@code codePointOffset} code points 2257 * forwards or backwards (if {@code codePointOffset} is 2258 * negative) from {@code index}. 2259 * @since 1.5 2260 */ 2261 public static int offsetByCodePoints(CharSequence seq, int index, 2262 int codePointOffset) { 2263 if (seq == null) { 2264 throw new NullPointerException(); 2265 } 2266 int len = seq.length(); 2267 if (index < 0 || index > len) { 2268 throw new IndexOutOfBoundsException(); 2269 } 2270 2271 if (codePointOffset == 0) { 2272 return index; 2273 } 2274 2275 if (codePointOffset > 0) { 2276 int codePoints = codePointOffset; 2277 int i = index; 2278 while (codePoints > 0) { 2279 codePoints--; 2280 if (i >= len) { 2281 throw new IndexOutOfBoundsException(); 2282 } 2283 if (isHighSurrogate(seq.charAt(i))) { 2284 int next = i + 1; 2285 if (next < len && isLowSurrogate(seq.charAt(next))) { 2286 i++; 2287 } 2288 } 2289 i++; 2290 } 2291 return i; 2292 } 2293 2294 assert codePointOffset < 0; 2295 int codePoints = -codePointOffset; 2296 int i = index; 2297 while (codePoints > 0) { 2298 codePoints--; 2299 i--; 2300 if (i < 0) { 2301 throw new IndexOutOfBoundsException(); 2302 } 2303 if (isLowSurrogate(seq.charAt(i))) { 2304 int prev = i - 1; 2305 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2306 i--; 2307 } 2308 } 2309 } 2310 return i; 2311 } 2312 2313 /** 2314 * Determines the index in a subsequence of the specified character array 2315 * that is offset {@code codePointOffset} code points from {@code index}. 2316 * The subsequence is delineated by {@code start} and {@code count}. 2317 * 2318 * @param seq 2319 * the character array to find the index in. 2320 * @param start 2321 * the inclusive index that marks the beginning of the 2322 * subsequence. 2323 * @param count 2324 * the number of {@code char} values to include within the 2325 * subsequence. 2326 * @param index 2327 * the start index in the subsequence of the char array. 2328 * @param codePointOffset 2329 * the number of code points to look backwards or forwards; may 2330 * be a negative or positive value. 2331 * @return the index in {@code seq} that is {@code codePointOffset} code 2332 * points away from {@code index}. 2333 * @throws NullPointerException 2334 * if {@code seq} is {@code null}. 2335 * @throws IndexOutOfBoundsException 2336 * if {@code start < 0}, {@code count < 0}, 2337 * {@code index < start}, {@code index > start + count}, 2338 * {@code start + count} is greater than the length of 2339 * {@code seq}, or if there are not enough values in 2340 * {@code seq} to skip {@code codePointOffset} code points 2341 * forward or backward (if {@code codePointOffset} is 2342 * negative) from {@code index}. 2343 * @since 1.5 2344 */ 2345 public static int offsetByCodePoints(char[] seq, int start, int count, 2346 int index, int codePointOffset) { 2347 if (seq == null) { 2348 throw new NullPointerException(); 2349 } 2350 int end = start + count; 2351 if (start < 0 || count < 0 || end > seq.length || index < start 2352 || index > end) { 2353 throw new IndexOutOfBoundsException(); 2354 } 2355 2356 if (codePointOffset == 0) { 2357 return index; 2358 } 2359 2360 if (codePointOffset > 0) { 2361 int codePoints = codePointOffset; 2362 int i = index; 2363 while (codePoints > 0) { 2364 codePoints--; 2365 if (i >= end) { 2366 throw new IndexOutOfBoundsException(); 2367 } 2368 if (isHighSurrogate(seq[i])) { 2369 int next = i + 1; 2370 if (next < end && isLowSurrogate(seq[next])) { 2371 i++; 2372 } 2373 } 2374 i++; 2375 } 2376 return i; 2377 } 2378 2379 assert codePointOffset < 0; 2380 int codePoints = -codePointOffset; 2381 int i = index; 2382 while (codePoints > 0) { 2383 codePoints--; 2384 i--; 2385 if (i < start) { 2386 throw new IndexOutOfBoundsException(); 2387 } 2388 if (isLowSurrogate(seq[i])) { 2389 int prev = i - 1; 2390 if (prev >= start && isHighSurrogate(seq[prev])) { 2391 i--; 2392 } 2393 } 2394 } 2395 return i; 2396 } 2397 2398 /** 2399 * Convenience method to determine the value of the specified character 2400 * {@code c} in the supplied radix. The value of {@code radix} must be 2401 * between MIN_RADIX and MAX_RADIX. 2402 * 2403 * @param c 2404 * the character to determine the value of. 2405 * @param radix 2406 * the radix. 2407 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2408 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2409 */ 2410 public static int digit(char c, int radix) { 2411 return digit((int) c, radix); 2412 } 2413 2414 /** 2415 * Convenience method to determine the value of the character 2416 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2417 * be between MIN_RADIX and MAX_RADIX. 2418 * 2419 * @param codePoint 2420 * the character, including supplementary characters. 2421 * @param radix 2422 * the radix. 2423 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2424 * {@link #MAX_RADIX} then the value of the character in the radix; 2425 * -1 otherwise. 2426 */ 2427 public static int digit(int codePoint, int radix) { 2428 if (radix < MIN_RADIX || radix > MAX_RADIX) { 2429 return -1; 2430 } 2431 if (codePoint < 128) { 2432 // Optimized for ASCII 2433 int result = -1; 2434 if ('0' <= codePoint && codePoint <= '9') { 2435 result = codePoint - '0'; 2436 } else if ('a' <= codePoint && codePoint <= 'z') { 2437 result = 10 + (codePoint - 'a'); 2438 } else if ('A' <= codePoint && codePoint <= 'Z') { 2439 result = 10 + (codePoint - 'A'); 2440 } 2441 return result < radix ? result : -1; 2442 } 2443 return digitImpl(codePoint, radix); 2444 } 2445 2446 private static native int digitImpl(int codePoint, int radix); 2447 2448 /** 2449 * Compares this object with the specified object and indicates if they are 2450 * equal. In order to be equal, {@code object} must be an instance of 2451 * {@code Character} and have the same char value as this object. 2452 * 2453 * @param object 2454 * the object to compare this double with. 2455 * @return {@code true} if the specified object is equal to this 2456 * {@code Character}; {@code false} otherwise. 2457 */ 2458 @Override 2459 public boolean equals(Object object) { 2460 return (object instanceof Character) && (value == ((Character) object).value); 2461 } 2462 2463 /** 2464 * Returns the character which represents the specified digit in the 2465 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2466 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2467 * smaller than {@code radix}. If any of these conditions does not hold, 0 2468 * is returned. 2469 * 2470 * @param digit 2471 * the integer value. 2472 * @param radix 2473 * the radix. 2474 * @return the character which represents the {@code digit} in the 2475 * {@code radix}. 2476 */ 2477 public static char forDigit(int digit, int radix) { 2478 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2479 if (0 <= digit && digit < radix) { 2480 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2481 } 2482 } 2483 return 0; 2484 } 2485 2486 /** 2487 * Gets the numeric value of the specified Unicode character. 2488 * 2489 * @param c 2490 * the Unicode character to get the numeric value of. 2491 * @return a non-negative numeric integer value if a numeric value for 2492 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2493 * -2 if the numeric value can not be represented with an integer. 2494 */ 2495 public static int getNumericValue(char c) { 2496 return getNumericValue((int) c); 2497 } 2498 2499 /** 2500 * Gets the numeric value of the specified Unicode code point. For example, 2501 * the code point '\u216B' stands for the Roman number XII, which has the 2502 * numeric value 12. 2503 * 2504 * @param codePoint 2505 * the Unicode code point to get the numeric value of. 2506 * @return a non-negative numeric integer value if a numeric value for 2507 * {@code codePoint} exists, -1 if there is no numeric value for 2508 * {@code codePoint}, -2 if the numeric value can not be 2509 * represented with an integer. 2510 */ 2511 public static int getNumericValue(int codePoint) { 2512 if (codePoint < 128) { 2513 // Optimized for ASCII 2514 if (codePoint >= '0' && codePoint <= '9') { 2515 return codePoint - '0'; 2516 } 2517 if (codePoint >= 'a' && codePoint <= 'z') { 2518 return codePoint - ('a' - 10); 2519 } 2520 if (codePoint >= 'A' && codePoint <= 'Z') { 2521 return codePoint - ('A' - 10); 2522 } 2523 return -1; 2524 } 2525 return getNumericValueImpl(codePoint); 2526 } 2527 2528 private static native int getNumericValueImpl(int codePoint); 2529 2530 /** 2531 * Gets the general Unicode category of the specified character. 2532 * 2533 * @param c 2534 * the character to get the category of. 2535 * @return the Unicode category of {@code c}. 2536 */ 2537 public static int getType(char c) { 2538 return getType((int) c); 2539 } 2540 2541 /** 2542 * Gets the general Unicode category of the specified code point. 2543 * 2544 * @param codePoint 2545 * the Unicode code point to get the category of. 2546 * @return the Unicode category of {@code codePoint}. 2547 */ 2548 public static int getType(int codePoint) { 2549 int type = getTypeImpl(codePoint); 2550 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 2551 if (type <= Character.FORMAT) { 2552 return type; 2553 } 2554 return (type + 1); 2555 } 2556 2557 private static native int getTypeImpl(int codePoint); 2558 2559 /** 2560 * Gets the Unicode directionality of the specified character. 2561 * 2562 * @param c 2563 * the character to get the directionality of. 2564 * @return the Unicode directionality of {@code c}. 2565 */ 2566 public static byte getDirectionality(char c) { 2567 return getDirectionality((int)c); 2568 } 2569 2570 /** 2571 * Gets the Unicode directionality of the specified character. 2572 * 2573 * @param codePoint 2574 * the Unicode code point to get the directionality of. 2575 * @return the Unicode directionality of {@code codePoint}. 2576 */ 2577 public static byte getDirectionality(int codePoint) { 2578 if (getType(codePoint) == Character.UNASSIGNED) { 2579 return Character.DIRECTIONALITY_UNDEFINED; 2580 } 2581 2582 byte directionality = getDirectionalityImpl(codePoint); 2583 if (directionality == -1) { 2584 return -1; 2585 } 2586 return DIRECTIONALITY[directionality]; 2587 } 2588 2589 private static native byte getDirectionalityImpl(int codePoint); 2590 2591 /** 2592 * Indicates whether the specified character is mirrored. 2593 * 2594 * @param c 2595 * the character to check. 2596 * @return {@code true} if {@code c} is mirrored; {@code false} 2597 * otherwise. 2598 */ 2599 public static boolean isMirrored(char c) { 2600 return isMirrored((int) c); 2601 } 2602 2603 /** 2604 * Indicates whether the specified code point is mirrored. 2605 * 2606 * @param codePoint 2607 * the code point to check. 2608 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2609 * otherwise. 2610 */ 2611 public static boolean isMirrored(int codePoint) { 2612 return isMirroredImpl(codePoint); 2613 } 2614 2615 private static native boolean isMirroredImpl(int codePoint); 2616 2617 @Override 2618 public int hashCode() { 2619 return value; 2620 } 2621 2622 /** 2623 * Indicates whether the specified character is defined in the Unicode 2624 * specification. 2625 * 2626 * @param c 2627 * the character to check. 2628 * @return {@code true} if the general Unicode category of the character is 2629 * not {@code UNASSIGNED}; {@code false} otherwise. 2630 */ 2631 public static boolean isDefined(char c) { 2632 return isDefinedImpl(c); 2633 } 2634 2635 /** 2636 * Indicates whether the specified code point is defined in the Unicode 2637 * specification. 2638 * 2639 * @param codePoint 2640 * the code point to check. 2641 * @return {@code true} if the general Unicode category of the code point is 2642 * not {@code UNASSIGNED}; {@code false} otherwise. 2643 */ 2644 public static boolean isDefined(int codePoint) { 2645 return isDefinedImpl(codePoint); 2646 } 2647 2648 private static native boolean isDefinedImpl(int codePoint); 2649 2650 /** 2651 * Indicates whether the specified character is a digit. 2652 * 2653 * @param c 2654 * the character to check. 2655 * @return {@code true} if {@code c} is a digit; {@code false} 2656 * otherwise. 2657 */ 2658 public static boolean isDigit(char c) { 2659 return isDigit((int) c); 2660 } 2661 2662 /** 2663 * Indicates whether the specified code point is a digit. 2664 * 2665 * @param codePoint 2666 * the code point to check. 2667 * @return {@code true} if {@code codePoint} is a digit; {@code false} 2668 * otherwise. 2669 */ 2670 public static boolean isDigit(int codePoint) { 2671 // Optimized case for ASCII 2672 if ('0' <= codePoint && codePoint <= '9') { 2673 return true; 2674 } 2675 if (codePoint < 1632) { 2676 return false; 2677 } 2678 return isDigitImpl(codePoint); 2679 } 2680 2681 private static native boolean isDigitImpl(int codePoint); 2682 2683 /** 2684 * Indicates whether the specified character is ignorable in a Java or 2685 * Unicode identifier. 2686 * 2687 * @param c 2688 * the character to check. 2689 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 2690 */ 2691 public static boolean isIdentifierIgnorable(char c) { 2692 return isIdentifierIgnorable((int) c); 2693 } 2694 2695 /** 2696 * Indicates whether the specified code point is ignorable in a Java or 2697 * Unicode identifier. 2698 * 2699 * @param codePoint 2700 * the code point to check. 2701 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 2702 * otherwise. 2703 */ 2704 public static boolean isIdentifierIgnorable(int codePoint) { 2705 if (codePoint < 0x600) { 2706 return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || 2707 (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad); 2708 } 2709 return isIdentifierIgnorableImpl(codePoint); 2710 } 2711 2712 private static native boolean isIdentifierIgnorableImpl(int codePoint); 2713 2714 /** 2715 * Indicates whether the specified character is an ISO control character. 2716 * 2717 * @param c 2718 * the character to check. 2719 * @return {@code true} if {@code c} is an ISO control character; 2720 * {@code false} otherwise. 2721 */ 2722 public static boolean isISOControl(char c) { 2723 return isISOControl((int) c); 2724 } 2725 2726 /** 2727 * Indicates whether the specified code point is an ISO control character. 2728 * 2729 * @param c 2730 * the code point to check. 2731 * @return {@code true} if {@code c} is an ISO control character; 2732 * {@code false} otherwise. 2733 */ 2734 public static boolean isISOControl(int c) { 2735 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2736 } 2737 2738 /** 2739 * Indicates whether the specified character is a valid part of a Java 2740 * identifier other than the first character. 2741 * 2742 * @param c 2743 * the character to check. 2744 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2745 * {@code false} otherwise. 2746 */ 2747 public static boolean isJavaIdentifierPart(char c) { 2748 // BEGIN android-changed 2749 return isJavaIdentifierPart((int) c); 2750 // END android-changed 2751 } 2752 2753 /** 2754 * Indicates whether the specified code point is a valid part of a Java 2755 * identifier other than the first character. 2756 * 2757 * @param codePoint 2758 * the code point to check. 2759 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2760 * {@code false} otherwise. 2761 */ 2762 public static boolean isJavaIdentifierPart(int codePoint) { 2763 // BEGIN android-changed: use precomputed bitmasks for the ASCII range. 2764 // Optimized case for ASCII 2765 if (codePoint < 64) { 2766 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 2767 } else if (codePoint < 128) { 2768 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2769 } 2770 int type = getType(codePoint); 2771 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2772 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2773 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2774 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2775 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 2776 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT; 2777 // END android-changed 2778 } 2779 2780 /** 2781 * Indicates whether the specified character is a valid first character for 2782 * a Java identifier. 2783 * 2784 * @param c 2785 * the character to check. 2786 * @return {@code true} if {@code c} is a valid first character of a Java 2787 * identifier; {@code false} otherwise. 2788 */ 2789 public static boolean isJavaIdentifierStart(char c) { 2790 // BEGIN android-changed 2791 return isJavaIdentifierStart((int) c); 2792 // END android-changed 2793 } 2794 2795 /** 2796 * Indicates whether the specified code point is a valid first character for 2797 * a Java identifier. 2798 * 2799 * @param codePoint 2800 * the code point to check. 2801 * @return {@code true} if {@code codePoint} is a valid start of a Java 2802 * identifier; {@code false} otherwise. 2803 */ 2804 public static boolean isJavaIdentifierStart(int codePoint) { 2805 // BEGIN android-changed: use precomputed bitmasks for the ASCII range. 2806 // Optimized case for ASCII 2807 if (codePoint < 64) { 2808 return (codePoint == '$'); // There's only one character in this range. 2809 } else if (codePoint < 128) { 2810 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2811 } 2812 int type = getType(codePoint); 2813 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL 2814 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2815 // END android-changed 2816 } 2817 2818 /** 2819 * Indicates whether the specified character is a Java letter. 2820 * 2821 * @param c 2822 * the character to check. 2823 * @return {@code true} if {@code c} is a Java letter; {@code false} 2824 * otherwise. 2825 * @deprecated Use {@link #isJavaIdentifierStart(char)} 2826 */ 2827 @Deprecated 2828 public static boolean isJavaLetter(char c) { 2829 return isJavaIdentifierStart(c); 2830 } 2831 2832 /** 2833 * Indicates whether the specified character is a Java letter or digit 2834 * character. 2835 * 2836 * @param c 2837 * the character to check. 2838 * @return {@code true} if {@code c} is a Java letter or digit; 2839 * {@code false} otherwise. 2840 * @deprecated Use {@link #isJavaIdentifierPart(char)} 2841 */ 2842 @Deprecated 2843 public static boolean isJavaLetterOrDigit(char c) { 2844 return isJavaIdentifierPart(c); 2845 } 2846 2847 /** 2848 * Indicates whether the specified character is a letter. 2849 * 2850 * @param c 2851 * the character to check. 2852 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 2853 */ 2854 public static boolean isLetter(char c) { 2855 return isLetter((int) c); 2856 } 2857 2858 /** 2859 * Indicates whether the specified code point is a letter. 2860 * 2861 * @param codePoint 2862 * the code point to check. 2863 * @return {@code true} if {@code codePoint} is a letter; {@code false} 2864 * otherwise. 2865 */ 2866 public static boolean isLetter(int codePoint) { 2867 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2868 return true; 2869 } 2870 if (codePoint < 128) { 2871 return false; 2872 } 2873 return isLetterImpl(codePoint); 2874 } 2875 2876 private static native boolean isLetterImpl(int codePoint); 2877 2878 /** 2879 * Indicates whether the specified character is a letter or a digit. 2880 * 2881 * @param c 2882 * the character to check. 2883 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 2884 * otherwise. 2885 */ 2886 public static boolean isLetterOrDigit(char c) { 2887 return isLetterOrDigit((int) c); 2888 } 2889 2890 /** 2891 * Indicates whether the specified code point is a letter or a digit. 2892 * 2893 * @param codePoint 2894 * the code point to check. 2895 * @return {@code true} if {@code codePoint} is a letter or a digit; 2896 * {@code false} otherwise. 2897 */ 2898 public static boolean isLetterOrDigit(int codePoint) { 2899 // Optimized case for ASCII 2900 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2901 return true; 2902 } 2903 if ('0' <= codePoint && codePoint <= '9') { 2904 return true; 2905 } 2906 if (codePoint < 128) { 2907 return false; 2908 } 2909 return isLetterOrDigitImpl(codePoint); 2910 } 2911 2912 private static native boolean isLetterOrDigitImpl(int codePoint); 2913 2914 /** 2915 * Indicates whether the specified character is a lower case letter. 2916 * 2917 * @param c 2918 * the character to check. 2919 * @return {@code true} if {@code c} is a lower case letter; {@code false} 2920 * otherwise. 2921 */ 2922 public static boolean isLowerCase(char c) { 2923 return isLowerCase((int) c); 2924 } 2925 2926 /** 2927 * Indicates whether the specified code point is a lower case letter. 2928 * 2929 * @param codePoint 2930 * the code point to check. 2931 * @return {@code true} if {@code codePoint} is a lower case letter; 2932 * {@code false} otherwise. 2933 */ 2934 public static boolean isLowerCase(int codePoint) { 2935 // Optimized case for ASCII 2936 if ('a' <= codePoint && codePoint <= 'z') { 2937 return true; 2938 } 2939 if (codePoint < 128) { 2940 return false; 2941 } 2942 return isLowerCaseImpl(codePoint); 2943 } 2944 2945 private static native boolean isLowerCaseImpl(int codePoint); 2946 2947 /** 2948 * Indicates whether the specified character is a Java space. 2949 * 2950 * @param c 2951 * the character to check. 2952 * @return {@code true} if {@code c} is a Java space; {@code false} 2953 * otherwise. 2954 * @deprecated Use {@link #isWhitespace(char)} 2955 */ 2956 @Deprecated 2957 public static boolean isSpace(char c) { 2958 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2959 } 2960 2961 /** 2962 * Indicates whether the specified character is a Unicode space character. 2963 * That is, if it is a member of one of the Unicode categories Space 2964 * Separator, Line Separator, or Paragraph Separator. 2965 * 2966 * @param c 2967 * the character to check. 2968 * @return {@code true} if {@code c} is a Unicode space character, 2969 * {@code false} otherwise. 2970 */ 2971 public static boolean isSpaceChar(char c) { 2972 return isSpaceChar((int) c); 2973 } 2974 2975 /** 2976 * Indicates whether the specified code point is a Unicode space character. 2977 * That is, if it is a member of one of the Unicode categories Space 2978 * Separator, Line Separator, or Paragraph Separator. 2979 * 2980 * @param codePoint 2981 * the code point to check. 2982 * @return {@code true} if {@code codePoint} is a Unicode space character, 2983 * {@code false} otherwise. 2984 */ 2985 public static boolean isSpaceChar(int codePoint) { 2986 if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) { 2987 return true; 2988 } 2989 if (codePoint < 0x2000) { 2990 return false; 2991 } 2992 if (codePoint <= 0xffff) { 2993 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 2994 codePoint == 0x202f || codePoint == 0x3000; 2995 } 2996 return isSpaceCharImpl(codePoint); 2997 } 2998 2999 private static native boolean isSpaceCharImpl(int codePoint); 3000 3001 /** 3002 * Indicates whether the specified character is a titlecase character. 3003 * 3004 * @param c 3005 * the character to check. 3006 * @return {@code true} if {@code c} is a titlecase character, {@code false} 3007 * otherwise. 3008 */ 3009 public static boolean isTitleCase(char c) { 3010 return isTitleCaseImpl(c); 3011 } 3012 3013 /** 3014 * Indicates whether the specified code point is a titlecase character. 3015 * 3016 * @param codePoint 3017 * the code point to check. 3018 * @return {@code true} if {@code codePoint} is a titlecase character, 3019 * {@code false} otherwise. 3020 */ 3021 public static boolean isTitleCase(int codePoint) { 3022 return isTitleCaseImpl(codePoint); 3023 } 3024 3025 private static native boolean isTitleCaseImpl(int codePoint); 3026 3027 /** 3028 * Indicates whether the specified character is valid as part of a Unicode 3029 * identifier other than the first character. 3030 * 3031 * @param c 3032 * the character to check. 3033 * @return {@code true} if {@code c} is valid as part of a Unicode 3034 * identifier; {@code false} otherwise. 3035 */ 3036 public static boolean isUnicodeIdentifierPart(char c) { 3037 return isUnicodeIdentifierPartImpl(c); 3038 } 3039 3040 /** 3041 * Indicates whether the specified code point is valid as part of a Unicode 3042 * identifier other than the first character. 3043 * 3044 * @param codePoint 3045 * the code point to check. 3046 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 3047 * identifier; {@code false} otherwise. 3048 */ 3049 public static boolean isUnicodeIdentifierPart(int codePoint) { 3050 return isUnicodeIdentifierPartImpl(codePoint); 3051 } 3052 3053 private static native boolean isUnicodeIdentifierPartImpl(int codePoint); 3054 3055 /** 3056 * Indicates whether the specified character is a valid initial character 3057 * for a Unicode identifier. 3058 * 3059 * @param c 3060 * the character to check. 3061 * @return {@code true} if {@code c} is a valid first character for a 3062 * Unicode identifier; {@code false} otherwise. 3063 */ 3064 public static boolean isUnicodeIdentifierStart(char c) { 3065 return isUnicodeIdentifierStartImpl(c); 3066 } 3067 3068 /** 3069 * Indicates whether the specified code point is a valid initial character 3070 * for a Unicode identifier. 3071 * 3072 * @param codePoint 3073 * the code point to check. 3074 * @return {@code true} if {@code codePoint} is a valid first character for 3075 * a Unicode identifier; {@code false} otherwise. 3076 */ 3077 public static boolean isUnicodeIdentifierStart(int codePoint) { 3078 return isUnicodeIdentifierStartImpl(codePoint); 3079 } 3080 3081 private static native boolean isUnicodeIdentifierStartImpl(int codePoint); 3082 3083 /** 3084 * Indicates whether the specified character is an upper case letter. 3085 * 3086 * @param c 3087 * the character to check. 3088 * @return {@code true} if {@code c} is a upper case letter; {@code false} 3089 * otherwise. 3090 */ 3091 public static boolean isUpperCase(char c) { 3092 return isUpperCase((int) c); 3093 } 3094 3095 /** 3096 * Indicates whether the specified code point is an upper case letter. 3097 * 3098 * @param codePoint 3099 * the code point to check. 3100 * @return {@code true} if {@code codePoint} is a upper case letter; 3101 * {@code false} otherwise. 3102 */ 3103 public static boolean isUpperCase(int codePoint) { 3104 // Optimized case for ASCII 3105 if ('A' <= codePoint && codePoint <= 'Z') { 3106 return true; 3107 } 3108 if (codePoint < 128) { 3109 return false; 3110 } 3111 return isUpperCaseImpl(codePoint); 3112 } 3113 3114 private static native boolean isUpperCaseImpl(int codePoint); 3115 3116 /** 3117 * Indicates whether the specified character is a whitespace character in 3118 * Java. 3119 * 3120 * @param c 3121 * the character to check. 3122 * @return {@code true} if the supplied {@code c} is a whitespace character 3123 * in Java; {@code false} otherwise. 3124 */ 3125 public static boolean isWhitespace(char c) { 3126 return isWhitespace((int) c); 3127 } 3128 3129 /** 3130 * Indicates whether the specified code point is a whitespace character in 3131 * Java. 3132 * 3133 * @param codePoint 3134 * the code point to check. 3135 * @return {@code true} if the supplied {@code c} is a whitespace character 3136 * in Java; {@code false} otherwise. 3137 */ 3138 public static boolean isWhitespace(int codePoint) { 3139 // Optimized case for ASCII 3140 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) { 3141 return true; 3142 } 3143 if (codePoint == 0x1680) { 3144 return true; 3145 } 3146 if (codePoint < 0x2000 || codePoint == 0x2007) { 3147 return false; 3148 } 3149 if (codePoint <= 0xffff) { 3150 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 3151 codePoint == 0x3000; 3152 } 3153 return isWhitespaceImpl(codePoint); 3154 } 3155 3156 private static native boolean isWhitespaceImpl(int codePoint); 3157 3158 /** 3159 * Reverses the order of the first and second byte in the specified 3160 * character. 3161 * 3162 * @param c 3163 * the character to reverse. 3164 * @return the character with reordered bytes. 3165 */ 3166 public static char reverseBytes(char c) { 3167 return (char)((c<<8) | (c>>8)); 3168 } 3169 3170 /** 3171 * Returns the lower case equivalent for the specified character if the 3172 * character is an upper case letter. Otherwise, the specified character is 3173 * returned unchanged. 3174 * 3175 * @param c 3176 * the character 3177 * @return if {@code c} is an upper case character then its lower case 3178 * counterpart, otherwise just {@code c}. 3179 */ 3180 public static char toLowerCase(char c) { 3181 return (char) toLowerCase((int) c); 3182 } 3183 3184 /** 3185 * Returns the lower case equivalent for the specified code point if it is 3186 * an upper case letter. Otherwise, the specified code point is returned 3187 * unchanged. 3188 * 3189 * @param codePoint 3190 * the code point to check. 3191 * @return if {@code codePoint} is an upper case character then its lower 3192 * case counterpart, otherwise just {@code codePoint}. 3193 */ 3194 public static int toLowerCase(int codePoint) { 3195 // Optimized case for ASCII 3196 if ('A' <= codePoint && codePoint <= 'Z') { 3197 return (char) (codePoint + ('a' - 'A')); 3198 } 3199 if (codePoint < 192) { 3200 return codePoint; 3201 } 3202 return toLowerCaseImpl(codePoint); 3203 } 3204 3205 private static native int toLowerCaseImpl(int codePoint); 3206 3207 @Override 3208 public String toString() { 3209 return String.valueOf(value); 3210 } 3211 3212 /** 3213 * Converts the specified character to its string representation. 3214 * 3215 * @param value 3216 * the character to convert. 3217 * @return the character converted to a string. 3218 */ 3219 public static String toString(char value) { 3220 return String.valueOf(value); 3221 } 3222 3223 /** 3224 * Returns the title case equivalent for the specified character if it 3225 * exists. Otherwise, the specified character is returned unchanged. 3226 * 3227 * @param c 3228 * the character to convert. 3229 * @return the title case equivalent of {@code c} if it exists, otherwise 3230 * {@code c}. 3231 */ 3232 public static char toTitleCase(char c) { 3233 return (char) toTitleCaseImpl(c); 3234 } 3235 3236 /** 3237 * Returns the title case equivalent for the specified code point if it 3238 * exists. Otherwise, the specified code point is returned unchanged. 3239 * 3240 * @param codePoint 3241 * the code point to convert. 3242 * @return the title case equivalent of {@code codePoint} if it exists, 3243 * otherwise {@code codePoint}. 3244 */ 3245 public static int toTitleCase(int codePoint) { 3246 return toTitleCaseImpl(codePoint); 3247 } 3248 3249 private static native int toTitleCaseImpl(int codePoint); 3250 3251 /** 3252 * Returns the upper case equivalent for the specified character if the 3253 * character is a lower case letter. Otherwise, the specified character is 3254 * returned unchanged. 3255 * 3256 * @param c 3257 * the character to convert. 3258 * @return if {@code c} is a lower case character then its upper case 3259 * counterpart, otherwise just {@code c}. 3260 */ 3261 public static char toUpperCase(char c) { 3262 return (char) toUpperCase((int) c); 3263 } 3264 3265 /** 3266 * Returns the upper case equivalent for the specified code point if the 3267 * code point is a lower case letter. Otherwise, the specified code point is 3268 * returned unchanged. 3269 * 3270 * @param codePoint 3271 * the code point to convert. 3272 * @return if {@code codePoint} is a lower case character then its upper 3273 * case counterpart, otherwise just {@code codePoint}. 3274 */ 3275 public static int toUpperCase(int codePoint) { 3276 // Optimized case for ASCII 3277 if ('a' <= codePoint && codePoint <= 'z') { 3278 return (char) (codePoint - ('a' - 'A')); 3279 } 3280 if (codePoint < 181) { 3281 return codePoint; 3282 } 3283 return toUpperCaseImpl(codePoint); 3284 } 3285 3286 private static native int toUpperCaseImpl(int codePoint); 3287 } 3288