1 /* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.harmony.tests.java.util.regex; 18 19 import java.util.regex.Matcher; 20 import java.util.regex.Pattern; 21 import java.util.regex.PatternSyntaxException; 22 23 import junit.framework.TestCase; 24 25 /** 26 * Tests simple Pattern compilation and Matcher methods 27 */ 28 @SuppressWarnings("nls") 29 public class Pattern2Test extends TestCase { 30 public void testSimpleMatch() throws PatternSyntaxException { 31 Pattern p = Pattern.compile("foo.*"); 32 33 Matcher m1 = p.matcher("foo123"); 34 assertTrue(m1.matches()); 35 assertTrue(m1.find(0)); 36 assertTrue(m1.lookingAt()); 37 38 Matcher m2 = p.matcher("fox"); 39 assertFalse(m2.matches()); 40 assertFalse(m2.find(0)); 41 assertFalse(m2.lookingAt()); 42 43 assertTrue(Pattern.matches("foo.*", "foo123")); 44 assertFalse(Pattern.matches("foo.*", "fox")); 45 46 assertFalse(Pattern.matches("bar", "foobar")); 47 48 assertTrue(Pattern.matches("", "")); 49 } 50 51 public void testCursors() { 52 Pattern p; 53 Matcher m; 54 55 try { 56 p = Pattern.compile("foo"); 57 58 m = p.matcher("foobar"); 59 assertTrue(m.find()); 60 assertEquals(0, m.start()); 61 assertEquals(3, m.end()); 62 assertFalse(m.find()); 63 64 // Note: also testing reset here 65 m.reset(); 66 assertTrue(m.find()); 67 assertEquals(0, m.start()); 68 assertEquals(3, m.end()); 69 assertFalse(m.find()); 70 71 m.reset("barfoobar"); 72 assertTrue(m.find()); 73 assertEquals(3, m.start()); 74 assertEquals(6, m.end()); 75 assertFalse(m.find()); 76 77 m.reset("barfoo"); 78 assertTrue(m.find()); 79 assertEquals(3, m.start()); 80 assertEquals(6, m.end()); 81 assertFalse(m.find()); 82 83 m.reset("foobarfoobarfoo"); 84 assertTrue(m.find()); 85 assertEquals(0, m.start()); 86 assertEquals(3, m.end()); 87 assertTrue(m.find()); 88 assertEquals(6, m.start()); 89 assertEquals(9, m.end()); 90 assertTrue(m.find()); 91 assertEquals(12, m.start()); 92 assertEquals(15, m.end()); 93 assertFalse(m.find()); 94 assertTrue(m.find(0)); 95 assertEquals(0, m.start()); 96 assertEquals(3, m.end()); 97 assertTrue(m.find(4)); 98 assertEquals(6, m.start()); 99 assertEquals(9, m.end()); 100 } catch (PatternSyntaxException e) { 101 System.out.println(e.getMessage()); 102 fail(); 103 } 104 } 105 106 public void testGroups() throws PatternSyntaxException { 107 Pattern p; 108 Matcher m; 109 110 p = Pattern.compile("(p[0-9]*)#?(q[0-9]*)"); 111 112 m = p.matcher("p1#q3p2q42p5p71p63#q888"); 113 assertTrue(m.find()); 114 assertEquals(0, m.start()); 115 assertEquals(5, m.end()); 116 assertEquals(2, m.groupCount()); 117 assertEquals(0, m.start(0)); 118 assertEquals(5, m.end(0)); 119 assertEquals(0, m.start(1)); 120 assertEquals(2, m.end(1)); 121 assertEquals(3, m.start(2)); 122 assertEquals(5, m.end(2)); 123 assertEquals("p1#q3", m.group()); 124 assertEquals("p1#q3", m.group(0)); 125 assertEquals("p1", m.group(1)); 126 assertEquals("q3", m.group(2)); 127 128 assertTrue(m.find()); 129 assertEquals(5, m.start()); 130 assertEquals(10, m.end()); 131 assertEquals(2, m.groupCount()); 132 assertEquals(10, m.end(0)); 133 assertEquals(5, m.start(1)); 134 assertEquals(7, m.end(1)); 135 assertEquals(7, m.start(2)); 136 assertEquals(10, m.end(2)); 137 assertEquals("p2q42", m.group()); 138 assertEquals("p2q42", m.group(0)); 139 assertEquals("p2", m.group(1)); 140 assertEquals("q42", m.group(2)); 141 142 assertTrue(m.find()); 143 assertEquals(15, m.start()); 144 assertEquals(23, m.end()); 145 assertEquals(2, m.groupCount()); 146 assertEquals(15, m.start(0)); 147 assertEquals(23, m.end(0)); 148 assertEquals(15, m.start(1)); 149 assertEquals(18, m.end(1)); 150 assertEquals(19, m.start(2)); 151 assertEquals(23, m.end(2)); 152 assertEquals("p63#q888", m.group()); 153 assertEquals("p63#q888", m.group(0)); 154 assertEquals("p63", m.group(1)); 155 assertEquals("q888", m.group(2)); 156 assertFalse(m.find()); 157 } 158 159 public void testReplace() throws PatternSyntaxException { 160 Pattern p; 161 Matcher m; 162 163 // Note: examples from book, 164 // Hitchens, Ron, 2002, "Java NIO", O'Reilly, page 171 165 p = Pattern.compile("a*b"); 166 167 m = p.matcher("aabfooaabfooabfoob"); 168 assertTrue(m.replaceAll("-").equals("-foo-foo-foo-")); 169 assertTrue(m.replaceFirst("-").equals("-fooaabfooabfoob")); 170 171 /* 172 * p = Pattern.compile ("\\p{Blank}"); 173 * 174 * m = p.matcher ("fee fie foe fum"); assertTrue 175 * (m.replaceFirst("-").equals ("fee-fie foe fum")); assertTrue 176 * (m.replaceAll("-").equals ("fee-fie-foe-fum")); 177 */ 178 179 p = Pattern.compile("([bB])yte"); 180 181 m = p.matcher("Byte for byte"); 182 assertTrue(m.replaceFirst("$1ite").equals("Bite for byte")); 183 assertTrue(m.replaceAll("$1ite").equals("Bite for bite")); 184 185 p = Pattern.compile("\\d\\d\\d\\d([- ])"); 186 187 m = p.matcher("card #1234-5678-1234"); 188 assertTrue(m.replaceFirst("xxxx$1").equals("card #xxxx-5678-1234")); 189 assertTrue(m.replaceAll("xxxx$1").equals("card #xxxx-xxxx-1234")); 190 191 p = Pattern.compile("(up|left)( *)(right|down)"); 192 193 m = p.matcher("left right, up down"); 194 assertTrue(m.replaceFirst("$3$2$1").equals("right left, up down")); 195 assertTrue(m.replaceAll("$3$2$1").equals("right left, down up")); 196 197 p = Pattern.compile("([CcPp][hl]e[ea]se)"); 198 199 m = p.matcher("I want cheese. Please."); 200 assertTrue(m.replaceFirst("<b> $1 </b>").equals( 201 "I want <b> cheese </b>. Please.")); 202 assertTrue(m.replaceAll("<b> $1 </b>").equals( 203 "I want <b> cheese </b>. <b> Please </b>.")); 204 } 205 206 public void testEscapes() throws PatternSyntaxException { 207 Pattern p; 208 Matcher m; 209 210 // Test \\ sequence 211 p = Pattern.compile("([a-z]+)\\\\([a-z]+);"); 212 m = p.matcher("fred\\ginger;abbott\\costello;jekell\\hyde;"); 213 assertTrue(m.find()); 214 assertEquals("fred", m.group(1)); 215 assertEquals("ginger", m.group(2)); 216 assertTrue(m.find()); 217 assertEquals("abbott", m.group(1)); 218 assertEquals("costello", m.group(2)); 219 assertTrue(m.find()); 220 assertEquals("jekell", m.group(1)); 221 assertEquals("hyde", m.group(2)); 222 assertFalse(m.find()); 223 224 // Test \n, \t, \r, \f, \e, \a sequences 225 p = Pattern.compile("([a-z]+)[\\n\\t\\r\\f\\e\\a]+([a-z]+)"); 226 m = p.matcher("aa\nbb;cc\u0009\rdd;ee\u000C\u001Bff;gg\n\u0007hh"); 227 assertTrue(m.find()); 228 assertEquals("aa", m.group(1)); 229 assertEquals("bb", m.group(2)); 230 assertTrue(m.find()); 231 assertEquals("cc", m.group(1)); 232 assertEquals("dd", m.group(2)); 233 assertTrue(m.find()); 234 assertEquals("ee", m.group(1)); 235 assertEquals("ff", m.group(2)); 236 assertTrue(m.find()); 237 assertEquals("gg", m.group(1)); 238 assertEquals("hh", m.group(2)); 239 assertFalse(m.find()); 240 241 // Test \\u and \\x sequences 242 p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); 243 m = p.matcher("11:;22 ;33-;44!;"); 244 assertTrue(m.find()); 245 assertEquals("11", m.group(1)); 246 assertTrue(m.find()); 247 assertEquals("22", m.group(1)); 248 assertTrue(m.find()); 249 assertEquals("44", m.group(1)); 250 assertFalse(m.find()); 251 252 // Test invalid unicode sequences 253 try { 254 p = Pattern.compile("\\u"); 255 fail("PatternSyntaxException expected"); 256 } catch (PatternSyntaxException e) { 257 } 258 259 try { 260 p = Pattern.compile("\\u;"); 261 fail("PatternSyntaxException expected"); 262 } catch (PatternSyntaxException e) { 263 } 264 265 try { 266 p = Pattern.compile("\\u002"); 267 fail("PatternSyntaxException expected"); 268 } catch (PatternSyntaxException e) { 269 } 270 271 try { 272 p = Pattern.compile("\\u002;"); 273 fail("PatternSyntaxException expected"); 274 } catch (PatternSyntaxException e) { 275 } 276 277 // Test invalid hex sequences 278 try { 279 p = Pattern.compile("\\x"); 280 fail("PatternSyntaxException expected"); 281 } catch (PatternSyntaxException e) { 282 } 283 284 try { 285 p = Pattern.compile("\\x;"); 286 fail("PatternSyntaxException expected"); 287 } catch (PatternSyntaxException e) { 288 } 289 290 // icu4c allows 1 to 6 hex digits in \x escapes. 291 p = Pattern.compile("\\xa"); 292 p = Pattern.compile("\\xab"); 293 p = Pattern.compile("\\xabc"); 294 p = Pattern.compile("\\xabcd"); 295 p = Pattern.compile("\\xabcde"); 296 p = Pattern.compile("\\xabcdef"); 297 // (Further digits would just be treated as characters after the escape.) 298 try { 299 p = Pattern.compile("\\xg"); 300 fail(); 301 } catch (PatternSyntaxException expected) { 302 } 303 304 // Test \0 (octal) sequences (1, 2 and 3 digit) 305 p = Pattern.compile("([0-9]+)[\\07\\040\\0160];"); 306 m = p.matcher("11\u0007;22:;33 ;44p;"); 307 assertTrue(m.find()); 308 assertEquals("11", m.group(1)); 309 assertTrue(m.find()); 310 assertEquals("33", m.group(1)); 311 assertTrue(m.find()); 312 assertEquals("44", m.group(1)); 313 assertFalse(m.find()); 314 315 // Test invalid octal sequences 316 try { 317 p = Pattern.compile("\\08"); 318 fail("PatternSyntaxException expected"); 319 } catch (PatternSyntaxException e) { 320 } 321 322 // originally contributed test did not check the result 323 // TODO: check what RI does here 324 // try { 325 // p = Pattern.compile("\\0477"); 326 // fail("PatternSyntaxException expected"); 327 // } catch (PatternSyntaxException e) { 328 // } 329 330 try { 331 p = Pattern.compile("\\0"); 332 fail("PatternSyntaxException expected"); 333 } catch (PatternSyntaxException e) { 334 } 335 336 try { 337 p = Pattern.compile("\\0;"); 338 fail("PatternSyntaxException expected"); 339 } catch (PatternSyntaxException e) { 340 } 341 342 // Test \c (control character) sequence 343 p = Pattern.compile("([0-9]+)[\\cA\\cB\\cC\\cD];"); 344 m = p.matcher("11\u0001;22:;33\u0002;44p;55\u0003;66\u0004;"); 345 assertTrue(m.find()); 346 assertEquals("11", m.group(1)); 347 assertTrue(m.find()); 348 assertEquals("33", m.group(1)); 349 assertTrue(m.find()); 350 assertEquals("55", m.group(1)); 351 assertTrue(m.find()); 352 assertEquals("66", m.group(1)); 353 assertFalse(m.find()); 354 355 // More thorough control escape test 356 // Ensure that each escape matches exactly the corresponding 357 // character 358 // code and no others (well, from 0-255 at least) 359 int i, j; 360 for (i = 0; i < 26; i++) { 361 p = Pattern.compile("\\c" + Character.toString((char) ('A' + i))); 362 int match_char = -1; 363 for (j = 0; j < 255; j++) { 364 m = p.matcher(Character.toString((char) j)); 365 if (m.matches()) { 366 assertEquals(-1, match_char); 367 match_char = j; 368 } 369 } 370 assertTrue(match_char == i + 1); 371 } 372 373 // Test invalid control escapes 374 // icu4c 50 accepts this pattern, and treats it as a literal. 375 //try { 376 p = Pattern.compile("\\c"); 377 assertTrue(p.matcher("x\\cy").find()); 378 // fail(p.matcher("").toString()); 379 //} catch (PatternSyntaxException e) { 380 //} 381 382 // But \cH works. 383 p = Pattern.compile("\\cH"); 384 assertTrue(p.matcher("x\u0008y").find()); 385 assertFalse(p.matcher("x\\cHy").find()); 386 387 // originally contributed test did not check the result 388 // TODO: check what RI does here 389 // try { 390 // p = Pattern.compile("\\c;"); 391 // fail("PatternSyntaxException expected"); 392 // } catch (PatternSyntaxException e) { 393 // } 394 // 395 // try { 396 // p = Pattern.compile("\\ca;"); 397 // fail("PatternSyntaxException expected"); 398 // } catch (PatternSyntaxException e) { 399 // } 400 // 401 // try { 402 // p = Pattern.compile("\\c4;"); 403 // fail("PatternSyntaxException expected"); 404 // } catch (PatternSyntaxException e) { 405 // } 406 } 407 408 public void testCharacterClasses() throws PatternSyntaxException { 409 Pattern p; 410 Matcher m; 411 412 // Test one character range 413 p = Pattern.compile("[p].*[l]"); 414 m = p.matcher("paul"); 415 assertTrue(m.matches()); 416 m = p.matcher("pool"); 417 assertTrue(m.matches()); 418 m = p.matcher("pong"); 419 assertFalse(m.matches()); 420 m = p.matcher("pl"); 421 assertTrue(m.matches()); 422 423 // Test two character range 424 p = Pattern.compile("[pm].*[lp]"); 425 m = p.matcher("prop"); 426 assertTrue(m.matches()); 427 m = p.matcher("mall"); 428 assertTrue(m.matches()); 429 m = p.matcher("pong"); 430 assertFalse(m.matches()); 431 m = p.matcher("pill"); 432 assertTrue(m.matches()); 433 434 // Test range including [ and ] 435 p = Pattern.compile("[<\\[].*[\\]>]"); 436 m = p.matcher("<foo>"); 437 assertTrue(m.matches()); 438 m = p.matcher("[bar]"); 439 assertTrue(m.matches()); 440 m = p.matcher("{foobar]"); 441 assertFalse(m.matches()); 442 m = p.matcher("<pill]"); 443 assertTrue(m.matches()); 444 445 // Test range using ^ 446 p = Pattern.compile("[^bc][a-z]+[tr]"); 447 m = p.matcher("pat"); 448 assertTrue(m.matches()); 449 m = p.matcher("liar"); 450 assertTrue(m.matches()); 451 m = p.matcher("car"); 452 assertFalse(m.matches()); 453 m = p.matcher("gnat"); 454 assertTrue(m.matches()); 455 456 // Test character range using - 457 p = Pattern.compile("[a-z]_+[a-zA-Z]-+[0-9p-z]"); 458 m = p.matcher("d__F-8"); 459 assertTrue(m.matches()); 460 m = p.matcher("c_a-q"); 461 assertTrue(m.matches()); 462 m = p.matcher("a__R-a"); 463 assertFalse(m.matches()); 464 m = p.matcher("r_____d-----5"); 465 assertTrue(m.matches()); 466 467 // Test range using unicode characters and unicode and hex escapes 468 p = Pattern.compile("[\\u1234-\\u2345]_+[a-z]-+[\u0001-\\x11]"); 469 m = p.matcher("\u2000_q-\u0007"); 470 assertTrue(m.matches()); 471 m = p.matcher("\u1234_z-\u0001"); 472 assertTrue(m.matches()); 473 m = p.matcher("r_p-q"); 474 assertFalse(m.matches()); 475 m = p.matcher("\u2345_____d-----\n"); 476 assertTrue(m.matches()); 477 478 // Test ranges including the "-" character 479 // "---" collides with icu4c's "--" operator, and likely to be user error anyway. 480 if (false) { 481 p = Pattern.compile("[\\*-/]_+[---]!+[--AP]"); 482 m = p.matcher("-_-!!A"); 483 assertTrue(m.matches()); 484 m = p.matcher("\u002b_-!!!-"); 485 assertTrue(m.matches()); 486 m = p.matcher("!_-!@"); 487 assertFalse(m.matches()); 488 m = p.matcher(",______-!!!!!!!P"); 489 assertTrue(m.matches()); 490 } 491 492 // Test nested ranges 493 p = Pattern.compile("[pm[t]][a-z]+[[r]lp]"); 494 m = p.matcher("prop"); 495 assertTrue(m.matches()); 496 m = p.matcher("tsar"); 497 assertTrue(m.matches()); 498 m = p.matcher("pong"); 499 assertFalse(m.matches()); 500 m = p.matcher("moor"); 501 assertTrue(m.matches()); 502 503 // Test character class intersection with && 504 // TODO: figure out what x&&y or any class with a null intersection 505 // set (like [[a-c]&&[d-f]]) might mean. It doesn't mean "match 506 // nothing" and doesn't mean "match anything" so I'm stumped. 507 p = Pattern.compile("[[a-p]&&[g-z]]+-+[[a-z]&&q]-+[x&&[a-z]]-+"); 508 m = p.matcher("h--q--x--"); 509 assertTrue(m.matches()); 510 m = p.matcher("hog--q-x-"); 511 assertTrue(m.matches()); 512 m = p.matcher("ape--q-x-"); 513 assertFalse(m.matches()); 514 m = p.matcher("mop--q-x----"); 515 assertTrue(m.matches()); 516 517 // Test error cases with && 518 // This is an RI bug that icu4c doesn't have. 519 if (false) { 520 p = Pattern.compile("[&&[xyz]]"); 521 m = p.matcher("&"); 522 // System.out.println(m.matches()); 523 m = p.matcher("x"); 524 // System.out.println(m.matches()); 525 m = p.matcher("y"); 526 // System.out.println(m.matches()); 527 } 528 p = Pattern.compile("[[xyz]&[axy]]"); 529 m = p.matcher("x"); 530 // System.out.println(m.matches()); 531 m = p.matcher("z"); 532 // System.out.println(m.matches()); 533 m = p.matcher("&"); 534 // System.out.println(m.matches()); 535 p = Pattern.compile("[abc[123]&&[345]def]"); 536 m = p.matcher("a"); 537 // System.out.println(m.matches()); 538 539 // icu4c rightly considers a missing rhs to && a syntax error. 540 if (false) { 541 p = Pattern.compile("[[xyz]&&]"); 542 } 543 544 p = Pattern.compile("[[abc]&]"); 545 546 try { 547 p = Pattern.compile("[[abc]&&"); 548 fail("PatternSyntaxException expected"); 549 } catch (PatternSyntaxException e) { 550 } 551 552 p = Pattern.compile("[[abc]\\&&[xyz]]"); 553 554 p = Pattern.compile("[[abc]&\\&[xyz]]"); 555 556 // Test 3-way intersection 557 p = Pattern.compile("[[a-p]&&[g-z]&&[d-k]]"); 558 m = p.matcher("g"); 559 assertTrue(m.matches()); 560 m = p.matcher("m"); 561 assertFalse(m.matches()); 562 563 // Test nested intersection 564 p = Pattern.compile("[[[a-p]&&[g-z]]&&[d-k]]"); 565 m = p.matcher("g"); 566 assertTrue(m.matches()); 567 m = p.matcher("m"); 568 assertFalse(m.matches()); 569 570 // Test character class subtraction with && and ^ 571 p = Pattern.compile("[[a-z]&&[^aeiou]][aeiou][[^xyz]&&[a-z]]"); 572 m = p.matcher("pop"); 573 assertTrue(m.matches()); 574 m = p.matcher("tag"); 575 assertTrue(m.matches()); 576 m = p.matcher("eat"); 577 assertFalse(m.matches()); 578 m = p.matcher("tax"); 579 assertFalse(m.matches()); 580 m = p.matcher("zip"); 581 assertTrue(m.matches()); 582 583 // Test . (DOT), with and without DOTALL 584 // Note: DOT not allowed in character classes 585 p = Pattern.compile(".+/x.z"); 586 m = p.matcher("!$/xyz"); 587 assertTrue(m.matches()); 588 m = p.matcher("%\n\r/x\nz"); 589 assertFalse(m.matches()); 590 p = Pattern.compile(".+/x.z", Pattern.DOTALL); 591 m = p.matcher("%\n\r/x\nz"); 592 assertTrue(m.matches()); 593 594 // Test \d (digit) 595 p = Pattern.compile("\\d+[a-z][\\dx]"); 596 m = p.matcher("42a6"); 597 assertTrue(m.matches()); 598 m = p.matcher("21zx"); 599 assertTrue(m.matches()); 600 m = p.matcher("ab6"); 601 assertFalse(m.matches()); 602 m = p.matcher("56912f9"); 603 assertTrue(m.matches()); 604 605 // Test \D (not a digit) 606 p = Pattern.compile("\\D+[a-z]-[\\D3]"); 607 m = p.matcher("za-p"); 608 assertTrue(m.matches()); 609 m = p.matcher("%!e-3"); 610 assertTrue(m.matches()); 611 m = p.matcher("9a-x"); 612 assertFalse(m.matches()); 613 m = p.matcher("\u1234pp\ny-3"); 614 assertTrue(m.matches()); 615 616 // Test \s (whitespace) 617 p = Pattern.compile("<[a-zA-Z]+\\s+[0-9]+[\\sx][^\\s]>"); 618 m = p.matcher("<cat \t1\fx>"); 619 assertTrue(m.matches()); 620 m = p.matcher("<cat \t1\f >"); 621 assertFalse(m.matches()); 622 m = p 623 .matcher("xyz <foo\n\r22 5> <pp \t\n\f\r \u000b41x\u1234><pp \nx7\rc> zzz"); 624 assertTrue(m.find()); 625 assertTrue(m.find()); 626 assertFalse(m.find()); 627 628 // Test \S (not whitespace) 629 p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221>"); 630 m = p.matcher("<f $0**\n** 221>"); 631 assertTrue(m.matches()); 632 m = p.matcher("<x 441\t221>"); 633 assertTrue(m.matches()); 634 m = p.matcher("<z \t9\ng 221>"); 635 assertFalse(m.matches()); 636 m = p.matcher("<z 60\ngg\u1234\f221>"); 637 assertTrue(m.matches()); 638 p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221[\\S&&[^abc]]>"); 639 m = p.matcher("<f $0**\n** 221x>"); 640 assertTrue(m.matches()); 641 m = p.matcher("<x 441\t221z>"); 642 assertTrue(m.matches()); 643 m = p.matcher("<x 441\t221 >"); 644 assertFalse(m.matches()); 645 m = p.matcher("<x 441\t221c>"); 646 assertFalse(m.matches()); 647 m = p.matcher("<z \t9\ng 221x>"); 648 assertFalse(m.matches()); 649 m = p.matcher("<z 60\ngg\u1234\f221\u0001>"); 650 assertTrue(m.matches()); 651 652 // Test \w (ascii word) 653 p = Pattern.compile("<\\w+\\s[0-9]+;[^\\w]\\w+/[\\w$]+;"); 654 m = p.matcher("<f1 99;!foo5/a$7;"); 655 assertTrue(m.matches()); 656 m = p.matcher("<f$ 99;!foo5/a$7;"); 657 assertFalse(m.matches()); 658 m = p 659 .matcher("<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789 99;!foo5/a$7;"); 660 assertTrue(m.matches()); 661 662 // Test \W (not an ascii word) 663 p = Pattern.compile("<\\W\\w+\\s[0-9]+;[\\W_][^\\W]+\\s[0-9]+;"); 664 m = p.matcher("<$foo3\n99;_bar\t0;"); 665 assertTrue(m.matches()); 666 m = p.matcher("<hh 99;_g 0;"); 667 assertFalse(m.matches()); 668 m = p.matcher("<*xx\t00;^zz\f11;"); 669 assertTrue(m.matches()); 670 671 // Test x|y pattern 672 // TODO 673 } 674 675 public void testPOSIXGroups() throws PatternSyntaxException { 676 Pattern p; 677 Matcher m; 678 679 // Test POSIX groups using \p and \P (in the group and not in the group) 680 // Groups are Lower, Upper, ASCII, Alpha, Digit, XDigit, Alnum, Punct, 681 // Graph, Print, Blank, Space, Cntrl 682 // Test \p{Lower} 683 /* 684 * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Lower}\\d\\P{Lower}:[\\p{Lower}Z]\\s[^\\P{Lower}]>"); 685 * m = p.matcher("<a4P:g x>"); assertTrue(m.matches()); m = p.matcher("<p4%:Z\tq>"); 686 * assertTrue(m.matches()); m = p.matcher("<A6#:e e>"); 687 * assertFalse(m.matches()); 688 */ 689 p = Pattern.compile("\\p{Lower}+"); 690 m = p.matcher("abcdefghijklmnopqrstuvwxyz"); 691 assertTrue(m.matches()); 692 693 // Invalid uses of \p{Lower} 694 try { 695 p = Pattern.compile("\\p"); 696 fail("PatternSyntaxException expected"); 697 } catch (PatternSyntaxException e) { 698 } 699 700 try { 701 p = Pattern.compile("\\p;"); 702 fail("PatternSyntaxException expected"); 703 } catch (PatternSyntaxException e) { 704 } 705 706 try { 707 p = Pattern.compile("\\p{"); 708 fail("PatternSyntaxException expected"); 709 } catch (PatternSyntaxException e) { 710 } 711 712 try { 713 p = Pattern.compile("\\p{;"); 714 fail("PatternSyntaxException expected"); 715 } catch (PatternSyntaxException e) { 716 } 717 718 try { 719 p = Pattern.compile("\\p{Lower"); 720 fail("PatternSyntaxException expected"); 721 } catch (PatternSyntaxException e) { 722 } 723 724 try { 725 p = Pattern.compile("\\p{Lower;"); 726 fail("PatternSyntaxException expected"); 727 } catch (PatternSyntaxException e) { 728 } 729 730 // Test \p{Upper} 731 /* 732 * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Upper}\\d\\P{Upper}:[\\p{Upper}z]\\s[^\\P{Upper}]>"); 733 * m = p.matcher("<A4p:G X>"); assertTrue(m.matches()); m = p.matcher("<P4%:z\tQ>"); 734 * assertTrue(m.matches()); m = p.matcher("<a6#:E E>"); 735 * assertFalse(m.matches()); 736 */ 737 p = Pattern.compile("\\p{Upper}+"); 738 m = p.matcher("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 739 assertTrue(m.matches()); 740 741 // Invalid uses of \p{Upper} 742 try { 743 p = Pattern.compile("\\p{Upper"); 744 fail("PatternSyntaxException expected"); 745 } catch (PatternSyntaxException e) { 746 } 747 748 try { 749 p = Pattern.compile("\\p{Upper;"); 750 fail("PatternSyntaxException expected"); 751 } catch (PatternSyntaxException e) { 752 } 753 754 // Test \p{ASCII} 755 /* 756 * FIXME: Requires complex range processing p = Pattern.compile("<\\p{ASCII}\\d\\P{ASCII}:[\\p{ASCII}\u1234]\\s[^\\P{ASCII}]>"); 757 * m = p.matcher("<A4\u0080:G X>"); assertTrue(m.matches()); m = 758 * p.matcher("<P4\u00ff:\u1234\t\n>"); assertTrue(m.matches()); m = 759 * p.matcher("<\u00846#:E E>"); assertFalse(m.matches()) 760 */ 761 int i; 762 p = Pattern.compile("\\p{ASCII}"); 763 for (i = 0; i < 0x80; i++) { 764 m = p.matcher(Character.toString((char) i)); 765 assertTrue(m.matches()); 766 } 767 for (; i < 0xff; i++) { 768 m = p.matcher(Character.toString((char) i)); 769 assertFalse(m.matches()); 770 } 771 772 // Invalid uses of \p{ASCII} 773 try { 774 p = Pattern.compile("\\p{ASCII"); 775 fail("PatternSyntaxException expected"); 776 } catch (PatternSyntaxException e) { 777 } 778 779 try { 780 p = Pattern.compile("\\p{ASCII;"); 781 fail("PatternSyntaxException expected"); 782 } catch (PatternSyntaxException e) { 783 } 784 785 // Test \p{Alpha} 786 // TODO 787 788 // Test \p{Digit} 789 // TODO 790 791 // Test \p{XDigit} 792 // TODO 793 794 // Test \p{Alnum} 795 // TODO 796 797 // Test \p{Punct} 798 // TODO 799 800 // Test \p{Graph} 801 // TODO 802 803 // Test \p{Print} 804 // TODO 805 806 // Test \p{Blank} 807 // TODO 808 809 // Test \p{Space} 810 // TODO 811 812 // Test \p{Cntrl} 813 // TODO 814 } 815 816 public void testUnicodeBlocks() throws PatternSyntaxException { 817 Pattern p; 818 Matcher m; 819 int i, j; 820 821 // Test Unicode blocks using \p and \P 822 // FIXME: 823 // Note that LatinExtended-B and ArabicPresentations-B are unrecognized 824 // by the reference JDK. 825 for (i = 0; i < UBlocks.length; i++) { 826 /* 827 * p = Pattern.compile("\\p{"+UBlocks[i].name+"}"); 828 * 829 * if (UBlocks[i].low > 0) { m = 830 * p.matcher(Character.toString((char)(UBlocks[i].low-1))); 831 * assertFalse(m.matches()); } for (j=UBlocks[i].low; j <= 832 * UBlocks[i].high; j++) { m = 833 * p.matcher(Character.toString((char)j)); assertTrue(m.matches()); } 834 * if (UBlocks[i].high < 0xFFFF) { m = 835 * p.matcher(Character.toString((char)(UBlocks[i].high+1))); 836 * assertFalse(m.matches()); } 837 * 838 * p = Pattern.compile("\\P{"+UBlocks[i].name+"}"); 839 * 840 * if (UBlocks[i].low > 0) { m = 841 * p.matcher(Character.toString((char)(UBlocks[i].low-1))); 842 * assertTrue(m.matches()); } for (j=UBlocks[i].low; j < 843 * UBlocks[i].high; j++) { m = 844 * p.matcher(Character.toString((char)j)); assertFalse(m.matches()); } 845 * if (UBlocks[i].high < 0xFFFF) { m = 846 * p.matcher(Character.toString((char)(UBlocks[i].high+1))); 847 * assertTrue(m.matches()); } 848 */ 849 850 p = Pattern.compile("\\p{In" + UBlocks[i].name + "}"); 851 852 if (UBlocks[i].low > 0) { 853 m = p.matcher(Character.toString((char) (UBlocks[i].low - 1))); 854 assertFalse(UBlocks[i].name, m.matches()); 855 } 856 for (j = UBlocks[i].low; j <= UBlocks[i].high; j++) { 857 m = p.matcher(Character.toString((char) j)); 858 assertTrue(UBlocks[i].name, m.matches()); 859 } 860 if (UBlocks[i].high < 0xFFFF) { 861 m = p.matcher(Character.toString((char) (UBlocks[i].high + 1))); 862 assertFalse(UBlocks[i].name, m.matches()); 863 } 864 865 p = Pattern.compile("\\P{In" + UBlocks[i].name + "}"); 866 867 if (UBlocks[i].low > 0) { 868 m = p.matcher(Character.toString((char) (UBlocks[i].low - 1))); 869 assertTrue(UBlocks[i].name, m.matches()); 870 } 871 for (j = UBlocks[i].low; j < UBlocks[i].high; j++) { 872 m = p.matcher(Character.toString((char) j)); 873 assertFalse(UBlocks[i].name, m.matches()); 874 } 875 if (UBlocks[i].high < 0xFFFF) { 876 m = p.matcher(Character.toString((char) (UBlocks[i].high + 1))); 877 assertTrue(UBlocks[i].name, m.matches()); 878 } 879 } 880 } 881 882 public void testMisc() throws PatternSyntaxException { 883 Pattern p; 884 Matcher m; 885 886 // Test (?>...) 887 // TODO 888 889 // Test (?onflags-offflags) 890 // Valid flags are i,m,d,s,u,x 891 // TODO 892 893 // Test (?onflags-offflags:...) 894 // TODO 895 896 // Test \Q, \E 897 p = Pattern.compile("[a-z]+;\\Q[a-z]+;\\Q(foo.*);\\E[0-9]+"); 898 m = p.matcher("abc;[a-z]+;\\Q(foo.*);411"); 899 assertTrue(m.matches()); 900 m = p.matcher("abc;def;foo42;555"); 901 assertFalse(m.matches()); 902 m = p.matcher("abc;\\Qdef;\\Qfoo99;\\E123"); 903 assertFalse(m.matches()); 904 905 p = Pattern.compile("[a-z]+;(foo[0-9]-\\Q(...)\\E);[0-9]+"); 906 m = p.matcher("abc;foo5-(...);123"); 907 assertTrue(m.matches()); 908 assertEquals("foo5-(...)", m.group(1)); 909 m = p.matcher("abc;foo9-(xxx);789"); 910 assertFalse(m.matches()); 911 912 p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q$-\\E]+);[0-9]+"); 913 m = p.matcher("abc;bar0-def$-;123"); 914 assertTrue(m.matches()); 915 916 // FIXME: 917 // This should work the same as the pattern above but fails with the 918 // the reference JDK 919 p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q-$\\E]+);[0-9]+"); 920 m = p.matcher("abc;bar0-def$-;123"); 921 // assertTrue(m.matches()); 922 923 // FIXME: 924 // This should work too .. it looks as if just about anything that 925 // has more 926 // than one character between \Q and \E is broken in the the reference 927 // JDK 928 p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q[0-9]\\E]+);[0-9]+"); 929 m = p.matcher("abc;bar0-def[99]-]0x[;123"); 930 // assertTrue(m.matches()); 931 932 // This is the same as above but with explicit escapes .. and this 933 // does work 934 // on the the reference JDK 935 p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\[0\\-9\\]]+);[0-9]+"); 936 m = p.matcher("abc;bar0-def[99]-]0x[;123"); 937 assertTrue(m.matches()); 938 939 // Test #<comment text> 940 // TODO 941 } 942 943 public void testCompile1() throws PatternSyntaxException { 944 Pattern pattern = Pattern 945 .compile("[0-9A-Za-z][0-9A-Za-z\\x2e\\x3a\\x2d\\x5f]*"); 946 String name = "iso-8859-1"; 947 assertTrue(pattern.matcher(name).matches()); 948 } 949 950 public void testCompile2() throws PatternSyntaxException { 951 String findString = "\\Qimport\\E"; 952 953 Pattern pattern = Pattern.compile(findString, 0); 954 Matcher matcher = pattern.matcher(new String( 955 "import a.A;\n\n import b.B;\nclass C {}")); 956 957 assertTrue(matcher.find(0)); 958 } 959 960 public void testCompile3() throws PatternSyntaxException { 961 Pattern p; 962 Matcher m; 963 p = Pattern.compile("a$"); 964 m = p.matcher("a\n"); 965 assertTrue(m.find()); 966 assertEquals("a", m.group()); 967 assertFalse(m.find()); 968 969 p = Pattern.compile("(a$)"); 970 m = p.matcher("a\n"); 971 assertTrue(m.find()); 972 assertEquals("a", m.group()); 973 assertEquals("a", m.group(1)); 974 assertFalse(m.find()); 975 976 p = Pattern.compile("^.*$", Pattern.MULTILINE); 977 978 m = p.matcher("a\n"); 979 assertTrue(m.find()); 980 // System.out.println("["+m.group()+"]"); 981 assertEquals("a", m.group()); 982 assertFalse(m.find()); 983 984 m = p.matcher("a\nb\n"); 985 assertTrue(m.find()); 986 // System.out.println("["+m.group()+"]"); 987 assertEquals("a", m.group()); 988 assertTrue(m.find()); 989 // System.out.println("["+m.group()+"]"); 990 assertEquals("b", m.group()); 991 assertFalse(m.find()); 992 993 m = p.matcher("a\nb"); 994 assertTrue(m.find()); 995 // System.out.println("["+m.group()+"]"); 996 assertEquals("a", m.group()); 997 assertTrue(m.find()); 998 assertEquals("b", m.group()); 999 assertFalse(m.find()); 1000 1001 m = p.matcher("\naa\r\nbb\rcc\n\n"); 1002 assertTrue(m.find()); 1003 // System.out.println("["+m.group()+"]"); 1004 assertTrue(m.group().equals("")); 1005 assertTrue(m.find()); 1006 // System.out.println("["+m.group()+"]"); 1007 assertEquals("aa", m.group()); 1008 assertTrue(m.find()); 1009 // System.out.println("["+m.group()+"]"); 1010 assertEquals("bb", m.group()); 1011 assertTrue(m.find()); 1012 // System.out.println("["+m.group()+"]"); 1013 assertEquals("cc", m.group()); 1014 assertTrue(m.find()); 1015 // System.out.println("["+m.group()+"]"); 1016 assertTrue(m.group().equals("")); 1017 assertFalse(m.find()); 1018 1019 m = p.matcher("a"); 1020 assertTrue(m.find()); 1021 assertEquals("a", m.group()); 1022 assertFalse(m.find()); 1023 1024 m = p.matcher(""); 1025 // This differs from the RI behaviour but seems more correct. 1026 assertTrue(m.find()); 1027 assertTrue(m.group().equals("")); 1028 assertFalse(m.find()); 1029 1030 p = Pattern.compile("^.*$"); 1031 m = p.matcher(""); 1032 assertTrue(m.find()); 1033 assertTrue(m.group().equals("")); 1034 assertFalse(m.find()); 1035 } 1036 1037 public void testCompile4() throws PatternSyntaxException { 1038 String findString = "\\Qpublic\\E"; 1039 StringBuffer text = new StringBuffer(" public class Class {\n" 1040 + " public class Class {"); 1041 1042 Pattern pattern = Pattern.compile(findString, 0); 1043 Matcher matcher = pattern.matcher(text); 1044 1045 boolean found = matcher.find(); 1046 assertTrue(found); 1047 assertEquals(4, matcher.start()); 1048 if (found) { 1049 // modify text 1050 text.delete(0, text.length()); 1051 text.append("Text have been changed."); 1052 matcher.reset(text); 1053 } 1054 1055 found = matcher.find(); 1056 assertFalse(found); 1057 } 1058 1059 public void testCompile5() throws PatternSyntaxException { 1060 Pattern p = Pattern.compile("^[0-9]"); 1061 String s[] = p.split("12", -1); 1062 assertEquals("", s[0]); 1063 assertEquals("2", s[1]); 1064 assertEquals(2, s.length); 1065 } 1066 1067 // public void testCompile6() { 1068 // String regex = "[\\p{L}[\\p{Mn}[\\p{Pc}[\\p{Nd}[\\p{Nl}[\\p{Sc}]]]]]]+"; 1069 // String regex = "[\\p{L}\\p{Mn}\\p{Pc}\\p{Nd}\\p{Nl}\\p{Sc}]+"; 1070 // try { 1071 // Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE); 1072 // assertTrue(true); 1073 // } catch (PatternSyntaxException e) { 1074 // System.out.println(e.getMessage()); 1075 // assertTrue(false); 1076 // } 1077 // } 1078 1079 private static class UBInfo { 1080 public UBInfo(int low, int high, String name) { 1081 this.name = name; 1082 this.low = low; 1083 this.high = high; 1084 } 1085 1086 public String name; 1087 1088 public int low, high; 1089 } 1090 1091 // A table representing the unicode categories 1092 // private static UBInfo[] UCategories = { 1093 // Lu 1094 // Ll 1095 // Lt 1096 // Lm 1097 // Lo 1098 // Mn 1099 // Mc 1100 // Me 1101 // Nd 1102 // Nl 1103 // No 1104 // Pc 1105 // Pd 1106 // Ps 1107 // Pe 1108 // Pi 1109 // Pf 1110 // Po 1111 // Sm 1112 // Sc 1113 // Sk 1114 // So 1115 // Zs 1116 // Zl 1117 // Zp 1118 // Cc 1119 // Cf 1120 // Cs 1121 // Co 1122 // Cn 1123 // }; 1124 1125 // A table representing the unicode character blocks 1126 private static UBInfo[] UBlocks = { 1127 /* 0000; 007F; Basic Latin */ 1128 new UBInfo(0x0000, 0x007F, "BasicLatin"), // Character.UnicodeBlock.BASIC_LATIN 1129 /* 0080; 00FF; Latin-1 Supplement */ 1130 new UBInfo(0x0080, 0x00FF, "Latin-1Supplement"), // Character.UnicodeBlock.LATIN_1_SUPPLEMENT 1131 /* 0100; 017F; Latin Extended-A */ 1132 new UBInfo(0x0100, 0x017F, "LatinExtended-A"), // Character.UnicodeBlock.LATIN_EXTENDED_A 1133 /* 0180; 024F; Latin Extended-B */ 1134 // new UBInfo (0x0180,0x024F,"InLatinExtended-B"), // 1135 // Character.UnicodeBlock.LATIN_EXTENDED_B 1136 /* 0250; 02AF; IPA Extensions */ 1137 new UBInfo(0x0250, 0x02AF, "IPAExtensions"), // Character.UnicodeBlock.IPA_EXTENSIONS 1138 /* 02B0; 02FF; Spacing Modifier Letters */ 1139 new UBInfo(0x02B0, 0x02FF, "SpacingModifierLetters"), // Character.UnicodeBlock.SPACING_MODIFIER_LETTERS 1140 /* 0300; 036F; Combining Diacritical Marks */ 1141 new UBInfo(0x0300, 0x036F, "CombiningDiacriticalMarks"), // Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS 1142 /* 0370; 03FF; Greek */ 1143 new UBInfo(0x0370, 0x03FF, "Greek"), // Character.UnicodeBlock.GREEK 1144 /* 0400; 04FF; Cyrillic */ 1145 new UBInfo(0x0400, 0x04FF, "Cyrillic"), // Character.UnicodeBlock.CYRILLIC 1146 /* 0530; 058F; Armenian */ 1147 new UBInfo(0x0530, 0x058F, "Armenian"), // Character.UnicodeBlock.ARMENIAN 1148 /* 0590; 05FF; Hebrew */ 1149 new UBInfo(0x0590, 0x05FF, "Hebrew"), // Character.UnicodeBlock.HEBREW 1150 /* 0600; 06FF; Arabic */ 1151 new UBInfo(0x0600, 0x06FF, "Arabic"), // Character.UnicodeBlock.ARABIC 1152 /* 0700; 074F; Syriac */ 1153 new UBInfo(0x0700, 0x074F, "Syriac"), // Character.UnicodeBlock.SYRIAC 1154 /* 0780; 07BF; Thaana */ 1155 new UBInfo(0x0780, 0x07BF, "Thaana"), // Character.UnicodeBlock.THAANA 1156 /* 0900; 097F; Devanagari */ 1157 new UBInfo(0x0900, 0x097F, "Devanagari"), // Character.UnicodeBlock.DEVANAGARI 1158 /* 0980; 09FF; Bengali */ 1159 new UBInfo(0x0980, 0x09FF, "Bengali"), // Character.UnicodeBlock.BENGALI 1160 /* 0A00; 0A7F; Gurmukhi */ 1161 new UBInfo(0x0A00, 0x0A7F, "Gurmukhi"), // Character.UnicodeBlock.GURMUKHI 1162 /* 0A80; 0AFF; Gujarati */ 1163 new UBInfo(0x0A80, 0x0AFF, "Gujarati"), // Character.UnicodeBlock.GUJARATI 1164 /* 0B00; 0B7F; Oriya */ 1165 new UBInfo(0x0B00, 0x0B7F, "Oriya"), // Character.UnicodeBlock.ORIYA 1166 /* 0B80; 0BFF; Tamil */ 1167 new UBInfo(0x0B80, 0x0BFF, "Tamil"), // Character.UnicodeBlock.TAMIL 1168 /* 0C00; 0C7F; Telugu */ 1169 new UBInfo(0x0C00, 0x0C7F, "Telugu"), // Character.UnicodeBlock.TELUGU 1170 /* 0C80; 0CFF; Kannada */ 1171 new UBInfo(0x0C80, 0x0CFF, "Kannada"), // Character.UnicodeBlock.KANNADA 1172 /* 0D00; 0D7F; Malayalam */ 1173 new UBInfo(0x0D00, 0x0D7F, "Malayalam"), // Character.UnicodeBlock.MALAYALAM 1174 /* 0D80; 0DFF; Sinhala */ 1175 new UBInfo(0x0D80, 0x0DFF, "Sinhala"), // Character.UnicodeBlock.SINHALA 1176 /* 0E00; 0E7F; Thai */ 1177 new UBInfo(0x0E00, 0x0E7F, "Thai"), // Character.UnicodeBlock.THAI 1178 /* 0E80; 0EFF; Lao */ 1179 new UBInfo(0x0E80, 0x0EFF, "Lao"), // Character.UnicodeBlock.LAO 1180 /* 0F00; 0FFF; Tibetan */ 1181 new UBInfo(0x0F00, 0x0FFF, "Tibetan"), // Character.UnicodeBlock.TIBETAN 1182 /* 1000; 109F; Myanmar */ 1183 new UBInfo(0x1000, 0x109F, "Myanmar"), // Character.UnicodeBlock.MYANMAR 1184 /* 10A0; 10FF; Georgian */ 1185 new UBInfo(0x10A0, 0x10FF, "Georgian"), // Character.UnicodeBlock.GEORGIAN 1186 /* 1100; 11FF; Hangul Jamo */ 1187 new UBInfo(0x1100, 0x11FF, "HangulJamo"), // Character.UnicodeBlock.HANGUL_JAMO 1188 /* 1200; 137F; Ethiopic */ 1189 new UBInfo(0x1200, 0x137F, "Ethiopic"), // Character.UnicodeBlock.ETHIOPIC 1190 /* 13A0; 13FF; Cherokee */ 1191 new UBInfo(0x13A0, 0x13FF, "Cherokee"), // Character.UnicodeBlock.CHEROKEE 1192 /* 1400; 167F; Unified Canadian Aboriginal Syllabics */ 1193 new UBInfo(0x1400, 0x167F, "UnifiedCanadianAboriginalSyllabics"), // Character.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1194 /* 1680; 169F; Ogham */ 1195 new UBInfo(0x1680, 0x169F, "Ogham"), // Character.UnicodeBlock.OGHAM 1196 /* 16A0; 16FF; Runic */ 1197 new UBInfo(0x16A0, 0x16FF, "Runic"), // Character.UnicodeBlock.RUNIC 1198 /* 1780; 17FF; Khmer */ 1199 new UBInfo(0x1780, 0x17FF, "Khmer"), // Character.UnicodeBlock.KHMER 1200 /* 1800; 18AF; Mongolian */ 1201 new UBInfo(0x1800, 0x18AF, "Mongolian"), // Character.UnicodeBlock.MONGOLIAN 1202 /* 1E00; 1EFF; Latin Extended Additional */ 1203 new UBInfo(0x1E00, 0x1EFF, "LatinExtendedAdditional"), // Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL 1204 /* 1F00; 1FFF; Greek Extended */ 1205 new UBInfo(0x1F00, 0x1FFF, "GreekExtended"), // Character.UnicodeBlock.GREEK_EXTENDED 1206 /* 2000; 206F; General Punctuation */ 1207 new UBInfo(0x2000, 0x206F, "GeneralPunctuation"), // Character.UnicodeBlock.GENERAL_PUNCTUATION 1208 /* 2070; 209F; Superscripts and Subscripts */ 1209 new UBInfo(0x2070, 0x209F, "SuperscriptsandSubscripts"), // Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS 1210 /* 20A0; 20CF; Currency Symbols */ 1211 new UBInfo(0x20A0, 0x20CF, "CurrencySymbols"), // Character.UnicodeBlock.CURRENCY_SYMBOLS 1212 /* 20D0; 20FF; Combining Marks for Symbols */ 1213 new UBInfo(0x20D0, 0x20FF, "CombiningMarksforSymbols"), // Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS 1214 /* 2100; 214F; Letterlike Symbols */ 1215 new UBInfo(0x2100, 0x214F, "LetterlikeSymbols"), // Character.UnicodeBlock.LETTERLIKE_SYMBOLS 1216 /* 2150; 218F; Number Forms */ 1217 new UBInfo(0x2150, 0x218F, "NumberForms"), // Character.UnicodeBlock.NUMBER_FORMS 1218 /* 2190; 21FF; Arrows */ 1219 new UBInfo(0x2190, 0x21FF, "Arrows"), // Character.UnicodeBlock.ARROWS 1220 /* 2200; 22FF; Mathematical Operators */ 1221 new UBInfo(0x2200, 0x22FF, "MathematicalOperators"), // Character.UnicodeBlock.MATHEMATICAL_OPERATORS 1222 /* 2300; 23FF; Miscellaneous Technical */ 1223 new UBInfo(0x2300, 0x23FF, "MiscellaneousTechnical"), // Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL 1224 /* 2400; 243F; Control Pictures */ 1225 new UBInfo(0x2400, 0x243F, "ControlPictures"), // Character.UnicodeBlock.CONTROL_PICTURES 1226 /* 2440; 245F; Optical Character Recognition */ 1227 new UBInfo(0x2440, 0x245F, "OpticalCharacterRecognition"), // Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION 1228 /* 2460; 24FF; Enclosed Alphanumerics */ 1229 new UBInfo(0x2460, 0x24FF, "EnclosedAlphanumerics"), // Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS 1230 /* 2500; 257F; Box Drawing */ 1231 new UBInfo(0x2500, 0x257F, "BoxDrawing"), // Character.UnicodeBlock.BOX_DRAWING 1232 /* 2580; 259F; Block Elements */ 1233 new UBInfo(0x2580, 0x259F, "BlockElements"), // Character.UnicodeBlock.BLOCK_ELEMENTS 1234 /* 25A0; 25FF; Geometric Shapes */ 1235 new UBInfo(0x25A0, 0x25FF, "GeometricShapes"), // Character.UnicodeBlock.GEOMETRIC_SHAPES 1236 /* 2600; 26FF; Miscellaneous Symbols */ 1237 new UBInfo(0x2600, 0x26FF, "MiscellaneousSymbols"), // Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS 1238 /* 2700; 27BF; Dingbats */ 1239 new UBInfo(0x2700, 0x27BF, "Dingbats"), // Character.UnicodeBlock.DINGBATS 1240 /* 2800; 28FF; Braille Patterns */ 1241 new UBInfo(0x2800, 0x28FF, "BraillePatterns"), // Character.UnicodeBlock.BRAILLE_PATTERNS 1242 /* 2E80; 2EFF; CJK Radicals Supplement */ 1243 new UBInfo(0x2E80, 0x2EFF, "CJKRadicalsSupplement"), // Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT 1244 /* 2F00; 2FDF; Kangxi Radicals */ 1245 new UBInfo(0x2F00, 0x2FDF, "KangxiRadicals"), // Character.UnicodeBlock.KANGXI_RADICALS 1246 /* 2FF0; 2FFF; Ideographic Description Characters */ 1247 new UBInfo(0x2FF0, 0x2FFF, "IdeographicDescriptionCharacters"), // Character.UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1248 /* 3000; 303F; CJK Symbols and Punctuation */ 1249 new UBInfo(0x3000, 0x303F, "CJKSymbolsandPunctuation"), // Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION 1250 /* 3040; 309F; Hiragana */ 1251 new UBInfo(0x3040, 0x309F, "Hiragana"), // Character.UnicodeBlock.HIRAGANA 1252 /* 30A0; 30FF; Katakana */ 1253 new UBInfo(0x30A0, 0x30FF, "Katakana"), // Character.UnicodeBlock.KATAKANA 1254 /* 3100; 312F; Bopomofo */ 1255 new UBInfo(0x3100, 0x312F, "Bopomofo"), // Character.UnicodeBlock.BOPOMOFO 1256 /* 3130; 318F; Hangul Compatibility Jamo */ 1257 new UBInfo(0x3130, 0x318F, "HangulCompatibilityJamo"), // Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO 1258 /* 3190; 319F; Kanbun */ 1259 new UBInfo(0x3190, 0x319F, "Kanbun"), // Character.UnicodeBlock.KANBUN 1260 /* 31A0; 31BF; Bopomofo Extended */ 1261 new UBInfo(0x31A0, 0x31BF, "BopomofoExtended"), // Character.UnicodeBlock.BOPOMOFO_EXTENDED 1262 /* 3200; 32FF; Enclosed CJK Letters and Months */ 1263 new UBInfo(0x3200, 0x32FF, "EnclosedCJKLettersandMonths"), // Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS 1264 /* 3300; 33FF; CJK Compatibility */ 1265 new UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY 1266 /* 3400; 4DB5; CJK Unified Ideographs Extension A */ 1267 new UBInfo(0x3400, 0x4DBF, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1268 /* 4E00; 9FFF; CJK Unified Ideographs */ 1269 new UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS 1270 /* A000; A48F; Yi Syllables */ 1271 new UBInfo(0xA000, 0xA48F, "YiSyllables"), // Character.UnicodeBlock.YI_SYLLABLES 1272 /* A490; A4CF; Yi Radicals */ 1273 new UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS 1274 /* AC00; D7A3; Hangul Syllables */ 1275 new UBInfo(0xAC00, 0xD7AF, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES 1276 /* D800; DB7F; High Surrogates */ 1277 /* DB80; DBFF; High Private Use Surrogates */ 1278 /* DC00; DFFF; Low Surrogates */ 1279 /* E000; F8FF; Private Use */ 1280 /* F900; FAFF; CJK Compatibility Ideographs */ 1281 new UBInfo(0xF900, 0xFAFF, "CJKCompatibilityIdeographs"), // Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS 1282 /* FB00; FB4F; Alphabetic Presentation Forms */ 1283 new UBInfo(0xFB00, 0xFB4F, "AlphabeticPresentationForms"), // Character.UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS 1284 /* FB50; FDFF; Arabic Presentation Forms-A */ 1285 new UBInfo(0xFB50, 0xFDFF, "ArabicPresentationForms-A"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A 1286 /* FE20; FE2F; Combining Half Marks */ 1287 new UBInfo(0xFE20, 0xFE2F, "CombiningHalfMarks"), // Character.UnicodeBlock.COMBINING_HALF_MARKS 1288 /* FE30; FE4F; CJK Compatibility Forms */ 1289 new UBInfo(0xFE30, 0xFE4F, "CJKCompatibilityForms"), // Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS 1290 /* FE50; FE6F; Small Form Variants */ 1291 new UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS 1292 /* FE70; FEFE; Arabic Presentation Forms-B */ 1293 new UBInfo(0xFE70, 0xFEFF, "ArabicPresentationForms-B"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B 1294 /* FF00; FFEF; Halfwidth and Fullwidth Forms */ 1295 new UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS 1296 /* FFF0; FFFD; Specials */ 1297 new UBInfo(0xFFF0, 0xFFFF, "Specials") // Character.UnicodeBlock.SPECIALS 1298 }; 1299 } 1300