Home | History | Annotate | Download | only in regex
      1 /* Licensed to the Apache Software Foundation (ASF) under one or more
      2  * contributor license agreements.  See the NOTICE file distributed with
      3  * this work for additional information regarding copyright ownership.
      4  * The ASF licenses this file to You under the Apache License, Version 2.0
      5  * (the "License"); you may not use this file except in compliance with
      6  * the License.  You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package org.apache.harmony.tests.java.util.regex;
     18 
     19 import java.util.regex.Matcher;
     20 import java.util.regex.Pattern;
     21 import java.util.regex.PatternSyntaxException;
     22 
     23 import junit.framework.TestCase;
     24 
     25 /**
     26  * Tests simple Pattern compilation and Matcher methods
     27  */
     28 @SuppressWarnings("nls")
     29 public class Pattern2Test extends TestCase {
     30     public void testSimpleMatch() throws PatternSyntaxException {
     31         Pattern p = Pattern.compile("foo.*");
     32 
     33         Matcher m1 = p.matcher("foo123");
     34         assertTrue(m1.matches());
     35         assertTrue(m1.find(0));
     36         assertTrue(m1.lookingAt());
     37 
     38         Matcher m2 = p.matcher("fox");
     39         assertFalse(m2.matches());
     40         assertFalse(m2.find(0));
     41         assertFalse(m2.lookingAt());
     42 
     43         assertTrue(Pattern.matches("foo.*", "foo123"));
     44         assertFalse(Pattern.matches("foo.*", "fox"));
     45 
     46         assertFalse(Pattern.matches("bar", "foobar"));
     47 
     48         assertTrue(Pattern.matches("", ""));
     49     }
     50 
     51     public void testCursors() {
     52         Pattern p;
     53         Matcher m;
     54 
     55         try {
     56             p = Pattern.compile("foo");
     57 
     58             m = p.matcher("foobar");
     59             assertTrue(m.find());
     60             assertEquals(0, m.start());
     61             assertEquals(3, m.end());
     62             assertFalse(m.find());
     63 
     64             // Note: also testing reset here
     65             m.reset();
     66             assertTrue(m.find());
     67             assertEquals(0, m.start());
     68             assertEquals(3, m.end());
     69             assertFalse(m.find());
     70 
     71             m.reset("barfoobar");
     72             assertTrue(m.find());
     73             assertEquals(3, m.start());
     74             assertEquals(6, m.end());
     75             assertFalse(m.find());
     76 
     77             m.reset("barfoo");
     78             assertTrue(m.find());
     79             assertEquals(3, m.start());
     80             assertEquals(6, m.end());
     81             assertFalse(m.find());
     82 
     83             m.reset("foobarfoobarfoo");
     84             assertTrue(m.find());
     85             assertEquals(0, m.start());
     86             assertEquals(3, m.end());
     87             assertTrue(m.find());
     88             assertEquals(6, m.start());
     89             assertEquals(9, m.end());
     90             assertTrue(m.find());
     91             assertEquals(12, m.start());
     92             assertEquals(15, m.end());
     93             assertFalse(m.find());
     94             assertTrue(m.find(0));
     95             assertEquals(0, m.start());
     96             assertEquals(3, m.end());
     97             assertTrue(m.find(4));
     98             assertEquals(6, m.start());
     99             assertEquals(9, m.end());
    100         } catch (PatternSyntaxException e) {
    101             System.out.println(e.getMessage());
    102             fail();
    103         }
    104     }
    105 
    106     public void testGroups() throws PatternSyntaxException {
    107         Pattern p;
    108         Matcher m;
    109 
    110         p = Pattern.compile("(p[0-9]*)#?(q[0-9]*)");
    111 
    112         m = p.matcher("p1#q3p2q42p5p71p63#q888");
    113         assertTrue(m.find());
    114         assertEquals(0, m.start());
    115         assertEquals(5, m.end());
    116         assertEquals(2, m.groupCount());
    117         assertEquals(0, m.start(0));
    118         assertEquals(5, m.end(0));
    119         assertEquals(0, m.start(1));
    120         assertEquals(2, m.end(1));
    121         assertEquals(3, m.start(2));
    122         assertEquals(5, m.end(2));
    123         assertEquals("p1#q3", m.group());
    124         assertEquals("p1#q3", m.group(0));
    125         assertEquals("p1", m.group(1));
    126         assertEquals("q3", m.group(2));
    127 
    128         assertTrue(m.find());
    129         assertEquals(5, m.start());
    130         assertEquals(10, m.end());
    131         assertEquals(2, m.groupCount());
    132         assertEquals(10, m.end(0));
    133         assertEquals(5, m.start(1));
    134         assertEquals(7, m.end(1));
    135         assertEquals(7, m.start(2));
    136         assertEquals(10, m.end(2));
    137         assertEquals("p2q42", m.group());
    138         assertEquals("p2q42", m.group(0));
    139         assertEquals("p2", m.group(1));
    140         assertEquals("q42", m.group(2));
    141 
    142         assertTrue(m.find());
    143         assertEquals(15, m.start());
    144         assertEquals(23, m.end());
    145         assertEquals(2, m.groupCount());
    146         assertEquals(15, m.start(0));
    147         assertEquals(23, m.end(0));
    148         assertEquals(15, m.start(1));
    149         assertEquals(18, m.end(1));
    150         assertEquals(19, m.start(2));
    151         assertEquals(23, m.end(2));
    152         assertEquals("p63#q888", m.group());
    153         assertEquals("p63#q888", m.group(0));
    154         assertEquals("p63", m.group(1));
    155         assertEquals("q888", m.group(2));
    156         assertFalse(m.find());
    157     }
    158 
    159     public void testReplace() throws PatternSyntaxException {
    160         Pattern p;
    161         Matcher m;
    162 
    163         // Note: examples from book,
    164         // Hitchens, Ron, 2002, "Java NIO", O'Reilly, page 171
    165         p = Pattern.compile("a*b");
    166 
    167         m = p.matcher("aabfooaabfooabfoob");
    168         assertTrue(m.replaceAll("-").equals("-foo-foo-foo-"));
    169         assertTrue(m.replaceFirst("-").equals("-fooaabfooabfoob"));
    170 
    171         /*
    172          * p = Pattern.compile ("\\p{Blank}");
    173          *
    174          * m = p.matcher ("fee fie foe fum"); assertTrue
    175          * (m.replaceFirst("-").equals ("fee-fie foe fum")); assertTrue
    176          * (m.replaceAll("-").equals ("fee-fie-foe-fum"));
    177          */
    178 
    179         p = Pattern.compile("([bB])yte");
    180 
    181         m = p.matcher("Byte for byte");
    182         assertTrue(m.replaceFirst("$1ite").equals("Bite for byte"));
    183         assertTrue(m.replaceAll("$1ite").equals("Bite for bite"));
    184 
    185         p = Pattern.compile("\\d\\d\\d\\d([- ])");
    186 
    187         m = p.matcher("card #1234-5678-1234");
    188         assertTrue(m.replaceFirst("xxxx$1").equals("card #xxxx-5678-1234"));
    189         assertTrue(m.replaceAll("xxxx$1").equals("card #xxxx-xxxx-1234"));
    190 
    191         p = Pattern.compile("(up|left)( *)(right|down)");
    192 
    193         m = p.matcher("left right, up down");
    194         assertTrue(m.replaceFirst("$3$2$1").equals("right left, up down"));
    195         assertTrue(m.replaceAll("$3$2$1").equals("right left, down up"));
    196 
    197         p = Pattern.compile("([CcPp][hl]e[ea]se)");
    198 
    199         m = p.matcher("I want cheese. Please.");
    200         assertTrue(m.replaceFirst("<b> $1 </b>").equals(
    201                 "I want <b> cheese </b>. Please."));
    202         assertTrue(m.replaceAll("<b> $1 </b>").equals(
    203                 "I want <b> cheese </b>. <b> Please </b>."));
    204     }
    205 
    206     public void testEscapes() throws PatternSyntaxException {
    207         Pattern p;
    208         Matcher m;
    209 
    210         // Test \\ sequence
    211         p = Pattern.compile("([a-z]+)\\\\([a-z]+);");
    212         m = p.matcher("fred\\ginger;abbott\\costello;jekell\\hyde;");
    213         assertTrue(m.find());
    214         assertEquals("fred", m.group(1));
    215         assertEquals("ginger", m.group(2));
    216         assertTrue(m.find());
    217         assertEquals("abbott", m.group(1));
    218         assertEquals("costello", m.group(2));
    219         assertTrue(m.find());
    220         assertEquals("jekell", m.group(1));
    221         assertEquals("hyde", m.group(2));
    222         assertFalse(m.find());
    223 
    224         // Test \n, \t, \r, \f, \e, \a sequences
    225         p = Pattern.compile("([a-z]+)[\\n\\t\\r\\f\\e\\a]+([a-z]+)");
    226         m = p.matcher("aa\nbb;cc\u0009\rdd;ee\u000C\u001Bff;gg\n\u0007hh");
    227         assertTrue(m.find());
    228         assertEquals("aa", m.group(1));
    229         assertEquals("bb", m.group(2));
    230         assertTrue(m.find());
    231         assertEquals("cc", m.group(1));
    232         assertEquals("dd", m.group(2));
    233         assertTrue(m.find());
    234         assertEquals("ee", m.group(1));
    235         assertEquals("ff", m.group(2));
    236         assertTrue(m.find());
    237         assertEquals("gg", m.group(1));
    238         assertEquals("hh", m.group(2));
    239         assertFalse(m.find());
    240 
    241         // Test \\u and \\x sequences
    242 p = Pattern.compile("([0-9]+)[\\u0020:\\x21];");
    243         m = p.matcher("11:;22 ;33-;44!;");
    244         assertTrue(m.find());
    245         assertEquals("11", m.group(1));
    246         assertTrue(m.find());
    247         assertEquals("22", m.group(1));
    248         assertTrue(m.find());
    249         assertEquals("44", m.group(1));
    250         assertFalse(m.find());
    251 
    252         // Test invalid unicode sequences
    253         try {
    254             p = Pattern.compile("\\u");
    255             fail("PatternSyntaxException expected");
    256         } catch (PatternSyntaxException e) {
    257         }
    258 
    259         try {
    260             p = Pattern.compile("\\u;");
    261             fail("PatternSyntaxException expected");
    262         } catch (PatternSyntaxException e) {
    263         }
    264 
    265         try {
    266             p = Pattern.compile("\\u002");
    267             fail("PatternSyntaxException expected");
    268         } catch (PatternSyntaxException e) {
    269         }
    270 
    271         try {
    272             p = Pattern.compile("\\u002;");
    273             fail("PatternSyntaxException expected");
    274         } catch (PatternSyntaxException e) {
    275         }
    276 
    277         // Test invalid hex sequences
    278         try {
    279             p = Pattern.compile("\\x");
    280             fail("PatternSyntaxException expected");
    281         } catch (PatternSyntaxException e) {
    282         }
    283 
    284         try {
    285             p = Pattern.compile("\\x;");
    286             fail("PatternSyntaxException expected");
    287         } catch (PatternSyntaxException e) {
    288         }
    289 
    290         // icu4c allows 1 to 6 hex digits in \x escapes.
    291         p = Pattern.compile("\\xa");
    292         p = Pattern.compile("\\xab");
    293         p = Pattern.compile("\\xabc");
    294         p = Pattern.compile("\\xabcd");
    295         p = Pattern.compile("\\xabcde");
    296         p = Pattern.compile("\\xabcdef");
    297         // (Further digits would just be treated as characters after the escape.)
    298         try {
    299             p = Pattern.compile("\\xg");
    300             fail();
    301         } catch (PatternSyntaxException expected) {
    302         }
    303 
    304         // Test \0 (octal) sequences (1, 2 and 3 digit)
    305         p = Pattern.compile("([0-9]+)[\\07\\040\\0160];");
    306         m = p.matcher("11\u0007;22:;33 ;44p;");
    307         assertTrue(m.find());
    308         assertEquals("11", m.group(1));
    309         assertTrue(m.find());
    310         assertEquals("33", m.group(1));
    311         assertTrue(m.find());
    312         assertEquals("44", m.group(1));
    313         assertFalse(m.find());
    314 
    315         // Test invalid octal sequences
    316         try {
    317             p = Pattern.compile("\\08");
    318             fail("PatternSyntaxException expected");
    319         } catch (PatternSyntaxException e) {
    320         }
    321 
    322         // originally contributed test did not check the result
    323         // TODO: check what RI does here
    324         // try {
    325         // p = Pattern.compile("\\0477");
    326         // fail("PatternSyntaxException expected");
    327         // } catch (PatternSyntaxException e) {
    328         // }
    329 
    330         try {
    331             p = Pattern.compile("\\0");
    332             fail("PatternSyntaxException expected");
    333         } catch (PatternSyntaxException e) {
    334         }
    335 
    336         try {
    337             p = Pattern.compile("\\0;");
    338             fail("PatternSyntaxException expected");
    339         } catch (PatternSyntaxException e) {
    340         }
    341 
    342         // Test \c (control character) sequence
    343         p = Pattern.compile("([0-9]+)[\\cA\\cB\\cC\\cD];");
    344         m = p.matcher("11\u0001;22:;33\u0002;44p;55\u0003;66\u0004;");
    345         assertTrue(m.find());
    346         assertEquals("11", m.group(1));
    347         assertTrue(m.find());
    348         assertEquals("33", m.group(1));
    349         assertTrue(m.find());
    350         assertEquals("55", m.group(1));
    351         assertTrue(m.find());
    352         assertEquals("66", m.group(1));
    353         assertFalse(m.find());
    354 
    355         // More thorough control escape test
    356         // Ensure that each escape matches exactly the corresponding
    357         // character
    358         // code and no others (well, from 0-255 at least)
    359         int i, j;
    360         for (i = 0; i < 26; i++) {
    361             p = Pattern.compile("\\c" + Character.toString((char) ('A' + i)));
    362             int match_char = -1;
    363             for (j = 0; j < 255; j++) {
    364                 m = p.matcher(Character.toString((char) j));
    365                 if (m.matches()) {
    366                     assertEquals(-1, match_char);
    367                     match_char = j;
    368                 }
    369             }
    370             assertTrue(match_char == i + 1);
    371         }
    372 
    373         // Test invalid control escapes
    374         // icu4c 50 accepts this pattern, and treats it as a literal.
    375         //try {
    376             p = Pattern.compile("\\c");
    377             assertTrue(p.matcher("x\\cy").find());
    378         //    fail(p.matcher("").toString());
    379         //} catch (PatternSyntaxException e) {
    380         //}
    381 
    382         // But \cH works.
    383         p = Pattern.compile("\\cH");
    384         assertTrue(p.matcher("x\u0008y").find());
    385         assertFalse(p.matcher("x\\cHy").find());
    386 
    387         // originally contributed test did not check the result
    388         // TODO: check what RI does here
    389         // try {
    390         // p = Pattern.compile("\\c;");
    391         // fail("PatternSyntaxException expected");
    392         // } catch (PatternSyntaxException e) {
    393         // }
    394         //
    395         // try {
    396         // p = Pattern.compile("\\ca;");
    397         // fail("PatternSyntaxException expected");
    398         // } catch (PatternSyntaxException e) {
    399         // }
    400         //
    401         // try {
    402         // p = Pattern.compile("\\c4;");
    403         // fail("PatternSyntaxException expected");
    404         // } catch (PatternSyntaxException e) {
    405         // }
    406     }
    407 
    408     public void testCharacterClasses() throws PatternSyntaxException {
    409         Pattern p;
    410         Matcher m;
    411 
    412         // Test one character range
    413         p = Pattern.compile("[p].*[l]");
    414         m = p.matcher("paul");
    415         assertTrue(m.matches());
    416         m = p.matcher("pool");
    417         assertTrue(m.matches());
    418         m = p.matcher("pong");
    419         assertFalse(m.matches());
    420         m = p.matcher("pl");
    421         assertTrue(m.matches());
    422 
    423         // Test two character range
    424         p = Pattern.compile("[pm].*[lp]");
    425         m = p.matcher("prop");
    426         assertTrue(m.matches());
    427         m = p.matcher("mall");
    428         assertTrue(m.matches());
    429         m = p.matcher("pong");
    430         assertFalse(m.matches());
    431         m = p.matcher("pill");
    432         assertTrue(m.matches());
    433 
    434         // Test range including [ and ]
    435         p = Pattern.compile("[<\\[].*[\\]>]");
    436         m = p.matcher("<foo>");
    437         assertTrue(m.matches());
    438         m = p.matcher("[bar]");
    439         assertTrue(m.matches());
    440         m = p.matcher("{foobar]");
    441         assertFalse(m.matches());
    442         m = p.matcher("<pill]");
    443         assertTrue(m.matches());
    444 
    445         // Test range using ^
    446         p = Pattern.compile("[^bc][a-z]+[tr]");
    447         m = p.matcher("pat");
    448         assertTrue(m.matches());
    449         m = p.matcher("liar");
    450         assertTrue(m.matches());
    451         m = p.matcher("car");
    452         assertFalse(m.matches());
    453         m = p.matcher("gnat");
    454         assertTrue(m.matches());
    455 
    456         // Test character range using -
    457         p = Pattern.compile("[a-z]_+[a-zA-Z]-+[0-9p-z]");
    458         m = p.matcher("d__F-8");
    459         assertTrue(m.matches());
    460         m = p.matcher("c_a-q");
    461         assertTrue(m.matches());
    462         m = p.matcher("a__R-a");
    463         assertFalse(m.matches());
    464         m = p.matcher("r_____d-----5");
    465         assertTrue(m.matches());
    466 
    467         // Test range using unicode characters and unicode and hex escapes
    468         p = Pattern.compile("[\\u1234-\\u2345]_+[a-z]-+[\u0001-\\x11]");
    469         m = p.matcher("\u2000_q-\u0007");
    470         assertTrue(m.matches());
    471         m = p.matcher("\u1234_z-\u0001");
    472         assertTrue(m.matches());
    473         m = p.matcher("r_p-q");
    474         assertFalse(m.matches());
    475         m = p.matcher("\u2345_____d-----\n");
    476         assertTrue(m.matches());
    477 
    478         // Test ranges including the "-" character
    479         // "---" collides with icu4c's "--" operator, and likely to be user error anyway.
    480         if (false) {
    481             p = Pattern.compile("[\\*-/]_+[---]!+[--AP]");
    482             m = p.matcher("-_-!!A");
    483             assertTrue(m.matches());
    484             m = p.matcher("\u002b_-!!!-");
    485             assertTrue(m.matches());
    486             m = p.matcher("!_-!@");
    487             assertFalse(m.matches());
    488             m = p.matcher(",______-!!!!!!!P");
    489             assertTrue(m.matches());
    490         }
    491 
    492         // Test nested ranges
    493         p = Pattern.compile("[pm[t]][a-z]+[[r]lp]");
    494         m = p.matcher("prop");
    495         assertTrue(m.matches());
    496         m = p.matcher("tsar");
    497         assertTrue(m.matches());
    498         m = p.matcher("pong");
    499         assertFalse(m.matches());
    500         m = p.matcher("moor");
    501         assertTrue(m.matches());
    502 
    503         // Test character class intersection with &&
    504         // TODO: figure out what x&&y or any class with a null intersection
    505         // set (like [[a-c]&&[d-f]]) might mean. It doesn't mean "match
    506         // nothing" and doesn't mean "match anything" so I'm stumped.
    507         p = Pattern.compile("[[a-p]&&[g-z]]+-+[[a-z]&&q]-+[x&&[a-z]]-+");
    508         m = p.matcher("h--q--x--");
    509         assertTrue(m.matches());
    510         m = p.matcher("hog--q-x-");
    511         assertTrue(m.matches());
    512         m = p.matcher("ape--q-x-");
    513         assertFalse(m.matches());
    514         m = p.matcher("mop--q-x----");
    515         assertTrue(m.matches());
    516 
    517         // Test error cases with &&
    518         // This is an RI bug that icu4c doesn't have.
    519         if (false) {
    520             p = Pattern.compile("[&&[xyz]]");
    521             m = p.matcher("&");
    522             // System.out.println(m.matches());
    523             m = p.matcher("x");
    524             // System.out.println(m.matches());
    525             m = p.matcher("y");
    526             // System.out.println(m.matches());
    527         }
    528         p = Pattern.compile("[[xyz]&[axy]]");
    529         m = p.matcher("x");
    530         // System.out.println(m.matches());
    531         m = p.matcher("z");
    532         // System.out.println(m.matches());
    533         m = p.matcher("&");
    534         // System.out.println(m.matches());
    535         p = Pattern.compile("[abc[123]&&[345]def]");
    536         m = p.matcher("a");
    537         // System.out.println(m.matches());
    538 
    539         // icu4c rightly considers a missing rhs to && a syntax error.
    540         if (false) {
    541             p = Pattern.compile("[[xyz]&&]");
    542         }
    543 
    544         p = Pattern.compile("[[abc]&]");
    545 
    546         try {
    547             p = Pattern.compile("[[abc]&&");
    548             fail("PatternSyntaxException expected");
    549         } catch (PatternSyntaxException e) {
    550         }
    551 
    552         p = Pattern.compile("[[abc]\\&&[xyz]]");
    553 
    554         p = Pattern.compile("[[abc]&\\&[xyz]]");
    555 
    556         // Test 3-way intersection
    557         p = Pattern.compile("[[a-p]&&[g-z]&&[d-k]]");
    558         m = p.matcher("g");
    559         assertTrue(m.matches());
    560         m = p.matcher("m");
    561         assertFalse(m.matches());
    562 
    563         // Test nested intersection
    564         p = Pattern.compile("[[[a-p]&&[g-z]]&&[d-k]]");
    565         m = p.matcher("g");
    566         assertTrue(m.matches());
    567         m = p.matcher("m");
    568         assertFalse(m.matches());
    569 
    570         // Test character class subtraction with && and ^
    571         p = Pattern.compile("[[a-z]&&[^aeiou]][aeiou][[^xyz]&&[a-z]]");
    572         m = p.matcher("pop");
    573         assertTrue(m.matches());
    574         m = p.matcher("tag");
    575         assertTrue(m.matches());
    576         m = p.matcher("eat");
    577         assertFalse(m.matches());
    578         m = p.matcher("tax");
    579         assertFalse(m.matches());
    580         m = p.matcher("zip");
    581         assertTrue(m.matches());
    582 
    583         // Test . (DOT), with and without DOTALL
    584         // Note: DOT not allowed in character classes
    585         p = Pattern.compile(".+/x.z");
    586         m = p.matcher("!$/xyz");
    587         assertTrue(m.matches());
    588         m = p.matcher("%\n\r/x\nz");
    589         assertFalse(m.matches());
    590         p = Pattern.compile(".+/x.z", Pattern.DOTALL);
    591         m = p.matcher("%\n\r/x\nz");
    592         assertTrue(m.matches());
    593 
    594         // Test \d (digit)
    595         p = Pattern.compile("\\d+[a-z][\\dx]");
    596         m = p.matcher("42a6");
    597         assertTrue(m.matches());
    598         m = p.matcher("21zx");
    599         assertTrue(m.matches());
    600         m = p.matcher("ab6");
    601         assertFalse(m.matches());
    602         m = p.matcher("56912f9");
    603         assertTrue(m.matches());
    604 
    605         // Test \D (not a digit)
    606         p = Pattern.compile("\\D+[a-z]-[\\D3]");
    607         m = p.matcher("za-p");
    608         assertTrue(m.matches());
    609         m = p.matcher("%!e-3");
    610         assertTrue(m.matches());
    611         m = p.matcher("9a-x");
    612         assertFalse(m.matches());
    613         m = p.matcher("\u1234pp\ny-3");
    614         assertTrue(m.matches());
    615 
    616         // Test \s (whitespace)
    617         p = Pattern.compile("<[a-zA-Z]+\\s+[0-9]+[\\sx][^\\s]>");
    618         m = p.matcher("<cat \t1\fx>");
    619         assertTrue(m.matches());
    620         m = p.matcher("<cat \t1\f >");
    621         assertFalse(m.matches());
    622         m = p
    623                 .matcher("xyz <foo\n\r22 5> <pp \t\n\f\r \u000b41x\u1234><pp \nx7\rc> zzz");
    624         assertTrue(m.find());
    625         assertTrue(m.find());
    626         assertFalse(m.find());
    627 
    628         // Test \S (not whitespace)
    629         p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221>");
    630         m = p.matcher("<f $0**\n** 221>");
    631         assertTrue(m.matches());
    632         m = p.matcher("<x 441\t221>");
    633         assertTrue(m.matches());
    634         m = p.matcher("<z \t9\ng 221>");
    635         assertFalse(m.matches());
    636         m = p.matcher("<z 60\ngg\u1234\f221>");
    637         assertTrue(m.matches());
    638         p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221[\\S&&[^abc]]>");
    639         m = p.matcher("<f $0**\n** 221x>");
    640         assertTrue(m.matches());
    641         m = p.matcher("<x 441\t221z>");
    642         assertTrue(m.matches());
    643         m = p.matcher("<x 441\t221 >");
    644         assertFalse(m.matches());
    645         m = p.matcher("<x 441\t221c>");
    646         assertFalse(m.matches());
    647         m = p.matcher("<z \t9\ng 221x>");
    648         assertFalse(m.matches());
    649         m = p.matcher("<z 60\ngg\u1234\f221\u0001>");
    650         assertTrue(m.matches());
    651 
    652         // Test \w (ascii word)
    653         p = Pattern.compile("<\\w+\\s[0-9]+;[^\\w]\\w+/[\\w$]+;");
    654         m = p.matcher("<f1 99;!foo5/a$7;");
    655         assertTrue(m.matches());
    656         m = p.matcher("<f$ 99;!foo5/a$7;");
    657         assertFalse(m.matches());
    658         m = p
    659                 .matcher("<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789 99;!foo5/a$7;");
    660         assertTrue(m.matches());
    661 
    662         // Test \W (not an ascii word)
    663         p = Pattern.compile("<\\W\\w+\\s[0-9]+;[\\W_][^\\W]+\\s[0-9]+;");
    664         m = p.matcher("<$foo3\n99;_bar\t0;");
    665         assertTrue(m.matches());
    666         m = p.matcher("<hh 99;_g 0;");
    667         assertFalse(m.matches());
    668         m = p.matcher("<*xx\t00;^zz\f11;");
    669         assertTrue(m.matches());
    670 
    671         // Test x|y pattern
    672         // TODO
    673     }
    674 
    675     public void testPOSIXGroups() throws PatternSyntaxException {
    676         Pattern p;
    677         Matcher m;
    678 
    679         // Test POSIX groups using \p and \P (in the group and not in the group)
    680         // Groups are Lower, Upper, ASCII, Alpha, Digit, XDigit, Alnum, Punct,
    681         // Graph, Print, Blank, Space, Cntrl
    682         // Test \p{Lower}
    683         /*
    684          * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Lower}\\d\\P{Lower}:[\\p{Lower}Z]\\s[^\\P{Lower}]>");
    685          * m = p.matcher("<a4P:g x>"); assertTrue(m.matches()); m = p.matcher("<p4%:Z\tq>");
    686          * assertTrue(m.matches()); m = p.matcher("<A6#:e e>");
    687          * assertFalse(m.matches());
    688          */
    689         p = Pattern.compile("\\p{Lower}+");
    690         m = p.matcher("abcdefghijklmnopqrstuvwxyz");
    691         assertTrue(m.matches());
    692 
    693         // Invalid uses of \p{Lower}
    694         try {
    695             p = Pattern.compile("\\p");
    696             fail("PatternSyntaxException expected");
    697         } catch (PatternSyntaxException e) {
    698         }
    699 
    700         try {
    701             p = Pattern.compile("\\p;");
    702             fail("PatternSyntaxException expected");
    703         } catch (PatternSyntaxException e) {
    704         }
    705 
    706         try {
    707             p = Pattern.compile("\\p{");
    708             fail("PatternSyntaxException expected");
    709         } catch (PatternSyntaxException e) {
    710         }
    711 
    712         try {
    713             p = Pattern.compile("\\p{;");
    714             fail("PatternSyntaxException expected");
    715         } catch (PatternSyntaxException e) {
    716         }
    717 
    718         try {
    719             p = Pattern.compile("\\p{Lower");
    720             fail("PatternSyntaxException expected");
    721         } catch (PatternSyntaxException e) {
    722         }
    723 
    724         try {
    725             p = Pattern.compile("\\p{Lower;");
    726             fail("PatternSyntaxException expected");
    727         } catch (PatternSyntaxException e) {
    728         }
    729 
    730         // Test \p{Upper}
    731         /*
    732          * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Upper}\\d\\P{Upper}:[\\p{Upper}z]\\s[^\\P{Upper}]>");
    733          * m = p.matcher("<A4p:G X>"); assertTrue(m.matches()); m = p.matcher("<P4%:z\tQ>");
    734          * assertTrue(m.matches()); m = p.matcher("<a6#:E E>");
    735          * assertFalse(m.matches());
    736          */
    737         p = Pattern.compile("\\p{Upper}+");
    738         m = p.matcher("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
    739         assertTrue(m.matches());
    740 
    741         // Invalid uses of \p{Upper}
    742         try {
    743             p = Pattern.compile("\\p{Upper");
    744             fail("PatternSyntaxException expected");
    745         } catch (PatternSyntaxException e) {
    746         }
    747 
    748         try {
    749             p = Pattern.compile("\\p{Upper;");
    750             fail("PatternSyntaxException expected");
    751         } catch (PatternSyntaxException e) {
    752         }
    753 
    754         // Test \p{ASCII}
    755         /*
    756          * FIXME: Requires complex range processing p = Pattern.compile("<\\p{ASCII}\\d\\P{ASCII}:[\\p{ASCII}\u1234]\\s[^\\P{ASCII}]>");
    757          * m = p.matcher("<A4\u0080:G X>"); assertTrue(m.matches()); m =
    758          * p.matcher("<P4\u00ff:\u1234\t\n>"); assertTrue(m.matches()); m =
    759          * p.matcher("<\u00846#:E E>"); assertFalse(m.matches())
    760          */
    761         int i;
    762         p = Pattern.compile("\\p{ASCII}");
    763         for (i = 0; i < 0x80; i++) {
    764             m = p.matcher(Character.toString((char) i));
    765             assertTrue(m.matches());
    766         }
    767         for (; i < 0xff; i++) {
    768             m = p.matcher(Character.toString((char) i));
    769             assertFalse(m.matches());
    770         }
    771 
    772         // Invalid uses of \p{ASCII}
    773         try {
    774             p = Pattern.compile("\\p{ASCII");
    775             fail("PatternSyntaxException expected");
    776         } catch (PatternSyntaxException e) {
    777         }
    778 
    779         try {
    780             p = Pattern.compile("\\p{ASCII;");
    781             fail("PatternSyntaxException expected");
    782         } catch (PatternSyntaxException e) {
    783         }
    784 
    785         // Test \p{Alpha}
    786         // TODO
    787 
    788         // Test \p{Digit}
    789         // TODO
    790 
    791         // Test \p{XDigit}
    792         // TODO
    793 
    794         // Test \p{Alnum}
    795         // TODO
    796 
    797         // Test \p{Punct}
    798         // TODO
    799 
    800         // Test \p{Graph}
    801         // TODO
    802 
    803         // Test \p{Print}
    804         // TODO
    805 
    806         // Test \p{Blank}
    807         // TODO
    808 
    809         // Test \p{Space}
    810         // TODO
    811 
    812         // Test \p{Cntrl}
    813         // TODO
    814     }
    815 
    816     public void testUnicodeBlocks() throws PatternSyntaxException {
    817         Pattern p;
    818         Matcher m;
    819         int i, j;
    820 
    821         // Test Unicode blocks using \p and \P
    822         // FIXME:
    823         // Note that LatinExtended-B and ArabicPresentations-B are unrecognized
    824         // by the reference JDK.
    825         for (i = 0; i < UBlocks.length; i++) {
    826             /*
    827              * p = Pattern.compile("\\p{"+UBlocks[i].name+"}");
    828              *
    829              * if (UBlocks[i].low > 0) { m =
    830              * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
    831              * assertFalse(m.matches()); } for (j=UBlocks[i].low; j <=
    832              * UBlocks[i].high; j++) { m =
    833              * p.matcher(Character.toString((char)j)); assertTrue(m.matches()); }
    834              * if (UBlocks[i].high < 0xFFFF) { m =
    835              * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
    836              * assertFalse(m.matches()); }
    837              *
    838              * p = Pattern.compile("\\P{"+UBlocks[i].name+"}");
    839              *
    840              * if (UBlocks[i].low > 0) { m =
    841              * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
    842              * assertTrue(m.matches()); } for (j=UBlocks[i].low; j <
    843              * UBlocks[i].high; j++) { m =
    844              * p.matcher(Character.toString((char)j)); assertFalse(m.matches()); }
    845              * if (UBlocks[i].high < 0xFFFF) { m =
    846              * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
    847              * assertTrue(m.matches()); }
    848              */
    849 
    850             p = Pattern.compile("\\p{In" + UBlocks[i].name + "}");
    851 
    852             if (UBlocks[i].low > 0) {
    853                 m = p.matcher(Character.toString((char) (UBlocks[i].low - 1)));
    854                 assertFalse(UBlocks[i].name, m.matches());
    855             }
    856             for (j = UBlocks[i].low; j <= UBlocks[i].high; j++) {
    857                 m = p.matcher(Character.toString((char) j));
    858                 assertTrue(UBlocks[i].name, m.matches());
    859             }
    860             if (UBlocks[i].high < 0xFFFF) {
    861                 m = p.matcher(Character.toString((char) (UBlocks[i].high + 1)));
    862                 assertFalse(UBlocks[i].name, m.matches());
    863             }
    864 
    865             p = Pattern.compile("\\P{In" + UBlocks[i].name + "}");
    866 
    867             if (UBlocks[i].low > 0) {
    868                 m = p.matcher(Character.toString((char) (UBlocks[i].low - 1)));
    869                 assertTrue(UBlocks[i].name, m.matches());
    870             }
    871             for (j = UBlocks[i].low; j < UBlocks[i].high; j++) {
    872                 m = p.matcher(Character.toString((char) j));
    873                 assertFalse(UBlocks[i].name, m.matches());
    874             }
    875             if (UBlocks[i].high < 0xFFFF) {
    876                 m = p.matcher(Character.toString((char) (UBlocks[i].high + 1)));
    877                 assertTrue(UBlocks[i].name, m.matches());
    878             }
    879         }
    880     }
    881 
    882     public void testMisc() throws PatternSyntaxException {
    883         Pattern p;
    884         Matcher m;
    885 
    886         // Test (?>...)
    887         // TODO
    888 
    889         // Test (?onflags-offflags)
    890         // Valid flags are i,m,d,s,u,x
    891         // TODO
    892 
    893         // Test (?onflags-offflags:...)
    894         // TODO
    895 
    896         // Test \Q, \E
    897         p = Pattern.compile("[a-z]+;\\Q[a-z]+;\\Q(foo.*);\\E[0-9]+");
    898         m = p.matcher("abc;[a-z]+;\\Q(foo.*);411");
    899         assertTrue(m.matches());
    900         m = p.matcher("abc;def;foo42;555");
    901         assertFalse(m.matches());
    902         m = p.matcher("abc;\\Qdef;\\Qfoo99;\\E123");
    903         assertFalse(m.matches());
    904 
    905         p = Pattern.compile("[a-z]+;(foo[0-9]-\\Q(...)\\E);[0-9]+");
    906         m = p.matcher("abc;foo5-(...);123");
    907         assertTrue(m.matches());
    908         assertEquals("foo5-(...)", m.group(1));
    909         m = p.matcher("abc;foo9-(xxx);789");
    910         assertFalse(m.matches());
    911 
    912         p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q$-\\E]+);[0-9]+");
    913         m = p.matcher("abc;bar0-def$-;123");
    914         assertTrue(m.matches());
    915 
    916         // FIXME:
    917         // This should work the same as the pattern above but fails with the
    918         // the reference JDK
    919         p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q-$\\E]+);[0-9]+");
    920         m = p.matcher("abc;bar0-def$-;123");
    921         // assertTrue(m.matches());
    922 
    923         // FIXME:
    924         // This should work too .. it looks as if just about anything that
    925         // has more
    926         // than one character between \Q and \E is broken in the the reference
    927         // JDK
    928         p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q[0-9]\\E]+);[0-9]+");
    929         m = p.matcher("abc;bar0-def[99]-]0x[;123");
    930         // assertTrue(m.matches());
    931 
    932         // This is the same as above but with explicit escapes .. and this
    933         // does work
    934         // on the the reference JDK
    935         p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\[0\\-9\\]]+);[0-9]+");
    936         m = p.matcher("abc;bar0-def[99]-]0x[;123");
    937         assertTrue(m.matches());
    938 
    939         // Test #<comment text>
    940         // TODO
    941     }
    942 
    943     public void testCompile1() throws PatternSyntaxException {
    944         Pattern pattern = Pattern
    945                 .compile("[0-9A-Za-z][0-9A-Za-z\\x2e\\x3a\\x2d\\x5f]*");
    946         String name = "iso-8859-1";
    947         assertTrue(pattern.matcher(name).matches());
    948     }
    949 
    950     public void testCompile2() throws PatternSyntaxException {
    951         String findString = "\\Qimport\\E";
    952 
    953         Pattern pattern = Pattern.compile(findString, 0);
    954         Matcher matcher = pattern.matcher(new String(
    955                 "import a.A;\n\n import b.B;\nclass C {}"));
    956 
    957         assertTrue(matcher.find(0));
    958     }
    959 
    960     public void testCompile3() throws PatternSyntaxException {
    961         Pattern p;
    962         Matcher m;
    963         p = Pattern.compile("a$");
    964         m = p.matcher("a\n");
    965         assertTrue(m.find());
    966         assertEquals("a", m.group());
    967         assertFalse(m.find());
    968 
    969         p = Pattern.compile("(a$)");
    970         m = p.matcher("a\n");
    971         assertTrue(m.find());
    972         assertEquals("a", m.group());
    973         assertEquals("a", m.group(1));
    974         assertFalse(m.find());
    975 
    976         p = Pattern.compile("^.*$", Pattern.MULTILINE);
    977 
    978         m = p.matcher("a\n");
    979         assertTrue(m.find());
    980         // System.out.println("["+m.group()+"]");
    981         assertEquals("a", m.group());
    982         assertFalse(m.find());
    983 
    984         m = p.matcher("a\nb\n");
    985         assertTrue(m.find());
    986         // System.out.println("["+m.group()+"]");
    987         assertEquals("a", m.group());
    988         assertTrue(m.find());
    989         // System.out.println("["+m.group()+"]");
    990         assertEquals("b", m.group());
    991         assertFalse(m.find());
    992 
    993         m = p.matcher("a\nb");
    994         assertTrue(m.find());
    995         // System.out.println("["+m.group()+"]");
    996         assertEquals("a", m.group());
    997         assertTrue(m.find());
    998         assertEquals("b", m.group());
    999         assertFalse(m.find());
   1000 
   1001         m = p.matcher("\naa\r\nbb\rcc\n\n");
   1002         assertTrue(m.find());
   1003         // System.out.println("["+m.group()+"]");
   1004         assertTrue(m.group().equals(""));
   1005         assertTrue(m.find());
   1006         // System.out.println("["+m.group()+"]");
   1007         assertEquals("aa", m.group());
   1008         assertTrue(m.find());
   1009         // System.out.println("["+m.group()+"]");
   1010         assertEquals("bb", m.group());
   1011         assertTrue(m.find());
   1012         // System.out.println("["+m.group()+"]");
   1013         assertEquals("cc", m.group());
   1014         assertTrue(m.find());
   1015         // System.out.println("["+m.group()+"]");
   1016         assertTrue(m.group().equals(""));
   1017         assertFalse(m.find());
   1018 
   1019         m = p.matcher("a");
   1020         assertTrue(m.find());
   1021         assertEquals("a", m.group());
   1022         assertFalse(m.find());
   1023 
   1024         m = p.matcher("");
   1025         // This differs from the RI behaviour but seems more correct.
   1026         assertTrue(m.find());
   1027         assertTrue(m.group().equals(""));
   1028         assertFalse(m.find());
   1029 
   1030         p = Pattern.compile("^.*$");
   1031         m = p.matcher("");
   1032         assertTrue(m.find());
   1033         assertTrue(m.group().equals(""));
   1034         assertFalse(m.find());
   1035     }
   1036 
   1037     public void testCompile4() throws PatternSyntaxException {
   1038         String findString = "\\Qpublic\\E";
   1039         StringBuffer text = new StringBuffer("    public class Class {\n"
   1040                 + "    public class Class {");
   1041 
   1042         Pattern pattern = Pattern.compile(findString, 0);
   1043         Matcher matcher = pattern.matcher(text);
   1044 
   1045         boolean found = matcher.find();
   1046         assertTrue(found);
   1047         assertEquals(4, matcher.start());
   1048         if (found) {
   1049             // modify text
   1050             text.delete(0, text.length());
   1051             text.append("Text have been changed.");
   1052             matcher.reset(text);
   1053         }
   1054 
   1055         found = matcher.find();
   1056         assertFalse(found);
   1057     }
   1058 
   1059     public void testCompile5() throws PatternSyntaxException {
   1060         Pattern p = Pattern.compile("^[0-9]");
   1061         String s[] = p.split("12", -1);
   1062         assertEquals("", s[0]);
   1063         assertEquals("2", s[1]);
   1064         assertEquals(2, s.length);
   1065     }
   1066 
   1067     // public void testCompile6() {
   1068     // String regex = "[\\p{L}[\\p{Mn}[\\p{Pc}[\\p{Nd}[\\p{Nl}[\\p{Sc}]]]]]]+";
   1069     // String regex = "[\\p{L}\\p{Mn}\\p{Pc}\\p{Nd}\\p{Nl}\\p{Sc}]+";
   1070     // try {
   1071     // Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
   1072     // assertTrue(true);
   1073     // } catch (PatternSyntaxException e) {
   1074     // System.out.println(e.getMessage());
   1075     // assertTrue(false);
   1076     // }
   1077     // }
   1078 
   1079     private static class UBInfo {
   1080         public UBInfo(int low, int high, String name) {
   1081             this.name = name;
   1082             this.low = low;
   1083             this.high = high;
   1084         }
   1085 
   1086         public String name;
   1087 
   1088         public int low, high;
   1089     }
   1090 
   1091     // A table representing the unicode categories
   1092     // private static UBInfo[] UCategories = {
   1093     // Lu
   1094     // Ll
   1095     // Lt
   1096     // Lm
   1097     // Lo
   1098     // Mn
   1099     // Mc
   1100     // Me
   1101     // Nd
   1102     // Nl
   1103     // No
   1104     // Pc
   1105     // Pd
   1106     // Ps
   1107     // Pe
   1108     // Pi
   1109     // Pf
   1110     // Po
   1111     // Sm
   1112     // Sc
   1113     // Sk
   1114     // So
   1115     // Zs
   1116     // Zl
   1117     // Zp
   1118     // Cc
   1119     // Cf
   1120     // Cs
   1121     // Co
   1122     // Cn
   1123     // };
   1124 
   1125     // A table representing the unicode character blocks
   1126     private static UBInfo[] UBlocks = {
   1127     /* 0000; 007F; Basic Latin */
   1128     new UBInfo(0x0000, 0x007F, "BasicLatin"), // Character.UnicodeBlock.BASIC_LATIN
   1129             /* 0080; 00FF; Latin-1 Supplement */
   1130             new UBInfo(0x0080, 0x00FF, "Latin-1Supplement"), // Character.UnicodeBlock.LATIN_1_SUPPLEMENT
   1131             /* 0100; 017F; Latin Extended-A */
   1132             new UBInfo(0x0100, 0x017F, "LatinExtended-A"), // Character.UnicodeBlock.LATIN_EXTENDED_A
   1133             /* 0180; 024F; Latin Extended-B */
   1134             // new UBInfo (0x0180,0x024F,"InLatinExtended-B"), //
   1135             // Character.UnicodeBlock.LATIN_EXTENDED_B
   1136             /* 0250; 02AF; IPA Extensions */
   1137             new UBInfo(0x0250, 0x02AF, "IPAExtensions"), // Character.UnicodeBlock.IPA_EXTENSIONS
   1138             /* 02B0; 02FF; Spacing Modifier Letters */
   1139             new UBInfo(0x02B0, 0x02FF, "SpacingModifierLetters"), // Character.UnicodeBlock.SPACING_MODIFIER_LETTERS
   1140             /* 0300; 036F; Combining Diacritical Marks */
   1141             new UBInfo(0x0300, 0x036F, "CombiningDiacriticalMarks"), // Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS
   1142             /* 0370; 03FF; Greek */
   1143             new UBInfo(0x0370, 0x03FF, "Greek"), // Character.UnicodeBlock.GREEK
   1144             /* 0400; 04FF; Cyrillic */
   1145             new UBInfo(0x0400, 0x04FF, "Cyrillic"), // Character.UnicodeBlock.CYRILLIC
   1146             /* 0530; 058F; Armenian */
   1147             new UBInfo(0x0530, 0x058F, "Armenian"), // Character.UnicodeBlock.ARMENIAN
   1148             /* 0590; 05FF; Hebrew */
   1149             new UBInfo(0x0590, 0x05FF, "Hebrew"), // Character.UnicodeBlock.HEBREW
   1150             /* 0600; 06FF; Arabic */
   1151             new UBInfo(0x0600, 0x06FF, "Arabic"), // Character.UnicodeBlock.ARABIC
   1152             /* 0700; 074F; Syriac */
   1153             new UBInfo(0x0700, 0x074F, "Syriac"), // Character.UnicodeBlock.SYRIAC
   1154             /* 0780; 07BF; Thaana */
   1155             new UBInfo(0x0780, 0x07BF, "Thaana"), // Character.UnicodeBlock.THAANA
   1156             /* 0900; 097F; Devanagari */
   1157             new UBInfo(0x0900, 0x097F, "Devanagari"), // Character.UnicodeBlock.DEVANAGARI
   1158             /* 0980; 09FF; Bengali */
   1159             new UBInfo(0x0980, 0x09FF, "Bengali"), // Character.UnicodeBlock.BENGALI
   1160             /* 0A00; 0A7F; Gurmukhi */
   1161             new UBInfo(0x0A00, 0x0A7F, "Gurmukhi"), // Character.UnicodeBlock.GURMUKHI
   1162             /* 0A80; 0AFF; Gujarati */
   1163             new UBInfo(0x0A80, 0x0AFF, "Gujarati"), // Character.UnicodeBlock.GUJARATI
   1164             /* 0B00; 0B7F; Oriya */
   1165             new UBInfo(0x0B00, 0x0B7F, "Oriya"), // Character.UnicodeBlock.ORIYA
   1166             /* 0B80; 0BFF; Tamil */
   1167             new UBInfo(0x0B80, 0x0BFF, "Tamil"), // Character.UnicodeBlock.TAMIL
   1168             /* 0C00; 0C7F; Telugu */
   1169             new UBInfo(0x0C00, 0x0C7F, "Telugu"), // Character.UnicodeBlock.TELUGU
   1170             /* 0C80; 0CFF; Kannada */
   1171             new UBInfo(0x0C80, 0x0CFF, "Kannada"), // Character.UnicodeBlock.KANNADA
   1172             /* 0D00; 0D7F; Malayalam */
   1173             new UBInfo(0x0D00, 0x0D7F, "Malayalam"), // Character.UnicodeBlock.MALAYALAM
   1174             /* 0D80; 0DFF; Sinhala */
   1175             new UBInfo(0x0D80, 0x0DFF, "Sinhala"), // Character.UnicodeBlock.SINHALA
   1176             /* 0E00; 0E7F; Thai */
   1177             new UBInfo(0x0E00, 0x0E7F, "Thai"), // Character.UnicodeBlock.THAI
   1178             /* 0E80; 0EFF; Lao */
   1179             new UBInfo(0x0E80, 0x0EFF, "Lao"), // Character.UnicodeBlock.LAO
   1180             /* 0F00; 0FFF; Tibetan */
   1181             new UBInfo(0x0F00, 0x0FFF, "Tibetan"), // Character.UnicodeBlock.TIBETAN
   1182             /* 1000; 109F; Myanmar */
   1183             new UBInfo(0x1000, 0x109F, "Myanmar"), // Character.UnicodeBlock.MYANMAR
   1184             /* 10A0; 10FF; Georgian */
   1185             new UBInfo(0x10A0, 0x10FF, "Georgian"), // Character.UnicodeBlock.GEORGIAN
   1186             /* 1100; 11FF; Hangul Jamo */
   1187             new UBInfo(0x1100, 0x11FF, "HangulJamo"), // Character.UnicodeBlock.HANGUL_JAMO
   1188             /* 1200; 137F; Ethiopic */
   1189             new UBInfo(0x1200, 0x137F, "Ethiopic"), // Character.UnicodeBlock.ETHIOPIC
   1190             /* 13A0; 13FF; Cherokee */
   1191             new UBInfo(0x13A0, 0x13FF, "Cherokee"), // Character.UnicodeBlock.CHEROKEE
   1192             /* 1400; 167F; Unified Canadian Aboriginal Syllabics */
   1193             new UBInfo(0x1400, 0x167F, "UnifiedCanadianAboriginalSyllabics"), // Character.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
   1194             /* 1680; 169F; Ogham */
   1195             new UBInfo(0x1680, 0x169F, "Ogham"), // Character.UnicodeBlock.OGHAM
   1196             /* 16A0; 16FF; Runic */
   1197             new UBInfo(0x16A0, 0x16FF, "Runic"), // Character.UnicodeBlock.RUNIC
   1198             /* 1780; 17FF; Khmer */
   1199             new UBInfo(0x1780, 0x17FF, "Khmer"), // Character.UnicodeBlock.KHMER
   1200             /* 1800; 18AF; Mongolian */
   1201             new UBInfo(0x1800, 0x18AF, "Mongolian"), // Character.UnicodeBlock.MONGOLIAN
   1202             /* 1E00; 1EFF; Latin Extended Additional */
   1203             new UBInfo(0x1E00, 0x1EFF, "LatinExtendedAdditional"), // Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
   1204             /* 1F00; 1FFF; Greek Extended */
   1205             new UBInfo(0x1F00, 0x1FFF, "GreekExtended"), // Character.UnicodeBlock.GREEK_EXTENDED
   1206             /* 2000; 206F; General Punctuation */
   1207             new UBInfo(0x2000, 0x206F, "GeneralPunctuation"), // Character.UnicodeBlock.GENERAL_PUNCTUATION
   1208             /* 2070; 209F; Superscripts and Subscripts */
   1209             new UBInfo(0x2070, 0x209F, "SuperscriptsandSubscripts"), // Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS
   1210             /* 20A0; 20CF; Currency Symbols */
   1211             new UBInfo(0x20A0, 0x20CF, "CurrencySymbols"), // Character.UnicodeBlock.CURRENCY_SYMBOLS
   1212             /* 20D0; 20FF; Combining Marks for Symbols */
   1213             new UBInfo(0x20D0, 0x20FF, "CombiningMarksforSymbols"), // Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS
   1214             /* 2100; 214F; Letterlike Symbols */
   1215             new UBInfo(0x2100, 0x214F, "LetterlikeSymbols"), // Character.UnicodeBlock.LETTERLIKE_SYMBOLS
   1216             /* 2150; 218F; Number Forms */
   1217             new UBInfo(0x2150, 0x218F, "NumberForms"), // Character.UnicodeBlock.NUMBER_FORMS
   1218             /* 2190; 21FF; Arrows */
   1219             new UBInfo(0x2190, 0x21FF, "Arrows"), // Character.UnicodeBlock.ARROWS
   1220             /* 2200; 22FF; Mathematical Operators */
   1221             new UBInfo(0x2200, 0x22FF, "MathematicalOperators"), // Character.UnicodeBlock.MATHEMATICAL_OPERATORS
   1222             /* 2300; 23FF; Miscellaneous Technical */
   1223             new UBInfo(0x2300, 0x23FF, "MiscellaneousTechnical"), // Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL
   1224             /* 2400; 243F; Control Pictures */
   1225             new UBInfo(0x2400, 0x243F, "ControlPictures"), // Character.UnicodeBlock.CONTROL_PICTURES
   1226             /* 2440; 245F; Optical Character Recognition */
   1227             new UBInfo(0x2440, 0x245F, "OpticalCharacterRecognition"), // Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION
   1228             /* 2460; 24FF; Enclosed Alphanumerics */
   1229             new UBInfo(0x2460, 0x24FF, "EnclosedAlphanumerics"), // Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS
   1230             /* 2500; 257F; Box Drawing */
   1231             new UBInfo(0x2500, 0x257F, "BoxDrawing"), // Character.UnicodeBlock.BOX_DRAWING
   1232             /* 2580; 259F; Block Elements */
   1233             new UBInfo(0x2580, 0x259F, "BlockElements"), // Character.UnicodeBlock.BLOCK_ELEMENTS
   1234             /* 25A0; 25FF; Geometric Shapes */
   1235             new UBInfo(0x25A0, 0x25FF, "GeometricShapes"), // Character.UnicodeBlock.GEOMETRIC_SHAPES
   1236             /* 2600; 26FF; Miscellaneous Symbols */
   1237             new UBInfo(0x2600, 0x26FF, "MiscellaneousSymbols"), // Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS
   1238             /* 2700; 27BF; Dingbats */
   1239             new UBInfo(0x2700, 0x27BF, "Dingbats"), // Character.UnicodeBlock.DINGBATS
   1240             /* 2800; 28FF; Braille Patterns */
   1241             new UBInfo(0x2800, 0x28FF, "BraillePatterns"), // Character.UnicodeBlock.BRAILLE_PATTERNS
   1242             /* 2E80; 2EFF; CJK Radicals Supplement */
   1243             new UBInfo(0x2E80, 0x2EFF, "CJKRadicalsSupplement"), // Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT
   1244             /* 2F00; 2FDF; Kangxi Radicals */
   1245             new UBInfo(0x2F00, 0x2FDF, "KangxiRadicals"), // Character.UnicodeBlock.KANGXI_RADICALS
   1246             /* 2FF0; 2FFF; Ideographic Description Characters */
   1247             new UBInfo(0x2FF0, 0x2FFF, "IdeographicDescriptionCharacters"), // Character.UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS
   1248             /* 3000; 303F; CJK Symbols and Punctuation */
   1249             new UBInfo(0x3000, 0x303F, "CJKSymbolsandPunctuation"), // Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
   1250             /* 3040; 309F; Hiragana */
   1251             new UBInfo(0x3040, 0x309F, "Hiragana"), // Character.UnicodeBlock.HIRAGANA
   1252             /* 30A0; 30FF; Katakana */
   1253             new UBInfo(0x30A0, 0x30FF, "Katakana"), // Character.UnicodeBlock.KATAKANA
   1254             /* 3100; 312F; Bopomofo */
   1255             new UBInfo(0x3100, 0x312F, "Bopomofo"), // Character.UnicodeBlock.BOPOMOFO
   1256             /* 3130; 318F; Hangul Compatibility Jamo */
   1257             new UBInfo(0x3130, 0x318F, "HangulCompatibilityJamo"), // Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
   1258             /* 3190; 319F; Kanbun */
   1259             new UBInfo(0x3190, 0x319F, "Kanbun"), // Character.UnicodeBlock.KANBUN
   1260             /* 31A0; 31BF; Bopomofo Extended */
   1261             new UBInfo(0x31A0, 0x31BF, "BopomofoExtended"), // Character.UnicodeBlock.BOPOMOFO_EXTENDED
   1262             /* 3200; 32FF; Enclosed CJK Letters and Months */
   1263             new UBInfo(0x3200, 0x32FF, "EnclosedCJKLettersandMonths"), // Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS
   1264             /* 3300; 33FF; CJK Compatibility */
   1265             new UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY
   1266             /* 3400; 4DB5; CJK Unified Ideographs Extension A */
   1267             new UBInfo(0x3400, 0x4DBF, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
   1268             /* 4E00; 9FFF; CJK Unified Ideographs */
   1269             new UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
   1270             /* A000; A48F; Yi Syllables */
   1271             new UBInfo(0xA000, 0xA48F, "YiSyllables"), // Character.UnicodeBlock.YI_SYLLABLES
   1272             /* A490; A4CF; Yi Radicals */
   1273             new UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS
   1274             /* AC00; D7A3; Hangul Syllables */
   1275             new UBInfo(0xAC00, 0xD7AF, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES
   1276             /* D800; DB7F; High Surrogates */
   1277             /* DB80; DBFF; High Private Use Surrogates */
   1278             /* DC00; DFFF; Low Surrogates */
   1279             /* E000; F8FF; Private Use */
   1280             /* F900; FAFF; CJK Compatibility Ideographs */
   1281             new UBInfo(0xF900, 0xFAFF, "CJKCompatibilityIdeographs"), // Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
   1282             /* FB00; FB4F; Alphabetic Presentation Forms */
   1283             new UBInfo(0xFB00, 0xFB4F, "AlphabeticPresentationForms"), // Character.UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS
   1284             /* FB50; FDFF; Arabic Presentation Forms-A */
   1285             new UBInfo(0xFB50, 0xFDFF, "ArabicPresentationForms-A"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A
   1286             /* FE20; FE2F; Combining Half Marks */
   1287             new UBInfo(0xFE20, 0xFE2F, "CombiningHalfMarks"), // Character.UnicodeBlock.COMBINING_HALF_MARKS
   1288             /* FE30; FE4F; CJK Compatibility Forms */
   1289             new UBInfo(0xFE30, 0xFE4F, "CJKCompatibilityForms"), // Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS
   1290             /* FE50; FE6F; Small Form Variants */
   1291             new UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS
   1292             /* FE70; FEFE; Arabic Presentation Forms-B */
   1293             new UBInfo(0xFE70, 0xFEFF, "ArabicPresentationForms-B"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B
   1294             /* FF00; FFEF; Halfwidth and Fullwidth Forms */
   1295             new UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
   1296             /* FFF0; FFFD; Specials */
   1297             new UBInfo(0xFFF0, 0xFFFF, "Specials") // Character.UnicodeBlock.SPECIALS
   1298     };
   1299 }
   1300