Home | History | Annotate | Download | only in regex
      1 /* Licensed to the Apache Software Foundation (ASF) under one or more
      2  * contributor license agreements.  See the NOTICE file distributed with
      3  * this work for additional information regarding copyright ownership.
      4  * The ASF licenses this file to You under the Apache License, Version 2.0
      5  * (the "License"); you may not use this file except in compliance with
      6  * the License.  You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package org.apache.harmony.regex.tests.java.util.regex;
     18 
     19 import java.util.regex.Matcher;
     20 import java.util.regex.Pattern;
     21 import java.util.regex.PatternSyntaxException;
     22 
     23 import junit.framework.TestCase;
     24 
     25 /**
     26  * Tests simple Pattern compilation and Matcher methods
     27  *
     28  */
     29 public class Pattern2Test extends TestCase {
     30 
     31     public void testSimpleMatch() throws PatternSyntaxException {
     32         Pattern p = Pattern.compile("foo.*");
     33 
     34         Matcher m1 = p.matcher("foo123");
     35         assertTrue(m1.matches());
     36         assertTrue(m1.find(0));
     37         assertTrue(m1.lookingAt());
     38 
     39         Matcher m2 = p.matcher("fox");
     40         assertFalse(m2.matches());
     41         assertFalse(m2.find(0));
     42         assertFalse(m2.lookingAt());
     43 
     44         assertTrue(Pattern.matches("foo.*", "foo123"));
     45         assertFalse(Pattern.matches("foo.*", "fox"));
     46 
     47         assertFalse(Pattern.matches("bar", "foobar"));
     48 
     49         assertTrue(Pattern.matches("", ""));
     50     }
     51     public void testCursors() {
     52         Pattern p;
     53         Matcher m;
     54 
     55         try {
     56             p = Pattern.compile("foo");
     57 
     58             m = p.matcher("foobar");
     59             assertTrue(m.find());
     60             assertEquals(0, m.start());
     61             assertEquals(3, m.end());
     62             assertFalse(m.find());
     63 
     64             // Note: also testing reset here
     65             m.reset();
     66             assertTrue(m.find());
     67             assertEquals(0, m.start());
     68             assertEquals(3, m.end());
     69             assertFalse(m.find());
     70 
     71             m.reset("barfoobar");
     72             assertTrue(m.find());
     73             assertEquals(3, m.start());
     74             assertEquals(6, m.end());
     75             assertFalse(m.find());
     76 
     77             m.reset("barfoo");
     78             assertTrue(m.find());
     79             assertEquals(3, m.start());
     80             assertEquals(6, m.end());
     81             assertFalse(m.find());
     82 
     83             m.reset("foobarfoobarfoo");
     84             assertTrue(m.find());
     85             assertEquals(0, m.start());
     86             assertEquals(3, m.end());
     87             assertTrue(m.find());
     88             assertEquals(6, m.start());
     89             assertEquals(9, m.end());
     90             assertTrue(m.find());
     91             assertEquals(12, m.start());
     92             assertEquals(15, m.end());
     93             assertFalse(m.find());
     94             assertTrue(m.find(0));
     95             assertEquals(0, m.start());
     96             assertEquals(3, m.end());
     97             assertTrue(m.find(4));
     98             assertEquals(6, m.start());
     99             assertEquals(9, m.end());
    100         } catch (PatternSyntaxException e) {
    101             System.out.println(e.getMessage());
    102             fail();
    103         }
    104     }
    105     public void testGroups() throws PatternSyntaxException {
    106         Pattern p;
    107         Matcher m;
    108 
    109         p = Pattern.compile("(p[0-9]*)#?(q[0-9]*)");
    110 
    111         m = p.matcher("p1#q3p2q42p5p71p63#q888");
    112         assertTrue(m.find());
    113         assertEquals(0, m.start());
    114         assertEquals(5, m.end());
    115         assertEquals(2, m.groupCount());
    116         assertEquals(0, m.start(0));
    117         assertEquals(5, m.end(0));
    118         assertEquals(0, m.start(1));
    119         assertEquals(2, m.end(1));
    120         assertEquals(3, m.start(2));
    121         assertEquals(5, m.end(2));
    122         assertEquals("p1#q3", m.group());
    123         assertEquals("p1#q3", m.group(0));
    124         assertEquals("p1", m.group(1));
    125         assertEquals("q3", m.group(2));
    126 
    127         assertTrue(m.find());
    128         assertEquals(5, m.start());
    129         assertEquals(10, m.end());
    130         assertEquals(2, m.groupCount());
    131         assertEquals(10, m.end(0));
    132         assertEquals(5, m.start(1));
    133         assertEquals(7, m.end(1));
    134         assertEquals(7, m.start(2));
    135         assertEquals(10, m.end(2));
    136         assertEquals("p2q42", m.group());
    137         assertEquals("p2q42", m.group(0));
    138         assertEquals("p2", m.group(1));
    139         assertEquals("q42", m.group(2));
    140 
    141         assertTrue(m.find());
    142         assertEquals(15, m.start());
    143         assertEquals(23, m.end());
    144         assertEquals(2, m.groupCount());
    145         assertEquals(15, m.start(0));
    146         assertEquals(23, m.end(0));
    147         assertEquals(15, m.start(1));
    148         assertEquals(18, m.end(1));
    149         assertEquals(19, m.start(2));
    150         assertEquals(23, m.end(2));
    151         assertEquals("p63#q888", m.group());
    152         assertEquals("p63#q888", m.group(0));
    153         assertEquals("p63", m.group(1));
    154         assertEquals("q888", m.group(2));
    155         assertFalse(m.find());
    156     }
    157 
    158     public void testReplace() throws PatternSyntaxException {
    159         Pattern p;
    160         Matcher m;
    161 
    162         // Note: examples from book,
    163         // Hitchens, Ron, 2002, "Java NIO", O'Reilly, page 171
    164         p = Pattern.compile("a*b");
    165 
    166         m = p.matcher("aabfooaabfooabfoob");
    167         assertTrue(m.replaceAll("-").equals("-foo-foo-foo-"));
    168         assertTrue(m.replaceFirst("-").equals("-fooaabfooabfoob"));
    169 
    170         /*
    171          * p = Pattern.compile ("\\p{Blank}");
    172          *
    173          * m = p.matcher ("fee fie foe fum"); assertTrue
    174          * (m.replaceFirst("-").equals ("fee-fie foe fum")); assertTrue
    175          * (m.replaceAll("-").equals ("fee-fie-foe-fum"));
    176          */
    177 
    178         p = Pattern.compile("([bB])yte");
    179 
    180         m = p.matcher("Byte for byte");
    181         assertTrue(m.replaceFirst("$1ite").equals("Bite for byte"));
    182         assertTrue(m.replaceAll("$1ite").equals("Bite for bite"));
    183 
    184         p = Pattern.compile("\\d\\d\\d\\d([- ])");
    185 
    186         m = p.matcher("card #1234-5678-1234");
    187         assertTrue(m.replaceFirst("xxxx$1").equals("card #xxxx-5678-1234"));
    188         assertTrue(m.replaceAll("xxxx$1").equals("card #xxxx-xxxx-1234"));
    189 
    190         p = Pattern.compile("(up|left)( *)(right|down)");
    191 
    192         m = p.matcher("left right, up down");
    193         assertTrue(m.replaceFirst("$3$2$1").equals("right left, up down"));
    194         assertTrue(m.replaceAll("$3$2$1").equals("right left, down up"));
    195 
    196         p = Pattern.compile("([CcPp][hl]e[ea]se)");
    197 
    198         m = p.matcher("I want cheese. Please.");
    199         assertTrue(m.replaceFirst("<b> $1 </b>").equals(
    200                 "I want <b> cheese </b>. Please."));
    201         assertTrue(m.replaceAll("<b> $1 </b>").equals(
    202                 "I want <b> cheese </b>. <b> Please </b>."));
    203     }
    204 
    205     public void testEscapes() throws PatternSyntaxException {
    206         Pattern p;
    207         Matcher m;
    208 
    209         // Test \\ sequence
    210         p = Pattern.compile("([a-z]+)\\\\([a-z]+);");
    211         m = p.matcher("fred\\ginger;abbott\\costello;jekell\\hyde;");
    212         assertTrue(m.find());
    213         assertEquals("fred", m.group(1));
    214         assertEquals("ginger", m.group(2));
    215         assertTrue(m.find());
    216         assertEquals("abbott", m.group(1));
    217         assertEquals("costello", m.group(2));
    218         assertTrue(m.find());
    219         assertEquals("jekell", m.group(1));
    220         assertEquals("hyde", m.group(2));
    221         assertFalse(m.find());
    222 
    223         // Test \n, \t, \r, \f, \e, \a sequences
    224         p = Pattern.compile("([a-z]+)[\\n\\t\\r\\f\\e\\a]+([a-z]+)");
    225         m = p.matcher("aa\nbb;cc\u0009\rdd;ee\u000C\u001Bff;gg\n\u0007hh");
    226         assertTrue(m.find());
    227         assertEquals("aa", m.group(1));
    228         assertEquals("bb", m.group(2));
    229         assertTrue(m.find());
    230         assertEquals("cc", m.group(1));
    231         assertEquals("dd", m.group(2));
    232         assertTrue(m.find());
    233         assertEquals("ee", m.group(1));
    234         assertEquals("ff", m.group(2));
    235         assertTrue(m.find());
    236         assertEquals("gg", m.group(1));
    237         assertEquals("hh", m.group(2));
    238         assertFalse(m.find());
    239 
    240         // Test \\u and \\x sequences
    241 /*        p = Pattern.compile("([0-9]+)[\\u0020:\\x21];");
    242         m = p.matcher("11:;22 ;33-;44!;");
    243         assertTrue(m.find());
    244         assertEquals("11", m.group(1));
    245         assertTrue(m.find());
    246         assertEquals("22", m.group(1));
    247         assertTrue(m.find());
    248         assertEquals("44", m.group(1));
    249         assertFalse(m.find());
    250 */
    251         // Test invalid unicode sequences
    252 /*        try {
    253             p = Pattern.compile("\\u");
    254             fail("PatternSyntaxException expected");
    255         } catch (PatternSyntaxException e) {
    256         }
    257 
    258         try {
    259             p = Pattern.compile("\\u;");
    260             fail("PatternSyntaxException expected");
    261         } catch (PatternSyntaxException e) {
    262         }
    263 
    264         try {
    265             p = Pattern.compile("\\u002");
    266             fail("PatternSyntaxException expected");
    267         } catch (PatternSyntaxException e) {
    268         }
    269 
    270         try {
    271             p = Pattern.compile("\\u002;");
    272             fail("PatternSyntaxException expected");
    273         } catch (PatternSyntaxException e) {
    274         }
    275 
    276         // Test invalid hex sequences
    277         try {
    278             p = Pattern.compile("\\x");
    279             fail("PatternSyntaxException expected");
    280         } catch (PatternSyntaxException e) {
    281         }
    282 
    283         try {
    284             p = Pattern.compile("\\x;");
    285             fail("PatternSyntaxException expected");
    286         } catch (PatternSyntaxException e) {
    287         }
    288 
    289         try {
    290             p = Pattern.compile("\\xa");
    291             fail("PatternSyntaxException expected");
    292         } catch (PatternSyntaxException e) {
    293         }
    294 
    295         try {
    296             p = Pattern.compile("\\xa;");
    297             fail("PatternSyntaxException expected");
    298         } catch (PatternSyntaxException e) {
    299         }
    300 */
    301         // Test \0 (octal) sequences (1, 2 and 3 digit)
    302         p = Pattern.compile("([0-9]+)[\\07\\040\\0160];");
    303         m = p.matcher("11\u0007;22:;33 ;44p;");
    304         assertTrue(m.find());
    305         assertEquals("11", m.group(1));
    306         assertTrue(m.find());
    307         assertEquals("33", m.group(1));
    308         assertTrue(m.find());
    309         assertEquals("44", m.group(1));
    310         assertFalse(m.find());
    311 
    312         // Test invalid octal sequences
    313         try {
    314             p = Pattern.compile("\\08");
    315             fail("PatternSyntaxException expected");
    316         } catch (PatternSyntaxException e) {
    317         }
    318 
    319         //originally contributed test did not check the result
    320         //TODO: check what RI does here
    321 //        try {
    322 //            p = Pattern.compile("\\0477");
    323 //            fail("PatternSyntaxException expected");
    324 //        } catch (PatternSyntaxException e) {
    325 //        }
    326 
    327         try {
    328             p = Pattern.compile("\\0");
    329             fail("PatternSyntaxException expected");
    330         } catch (PatternSyntaxException e) {
    331         }
    332 
    333         try {
    334             p = Pattern.compile("\\0;");
    335             fail("PatternSyntaxException expected");
    336         } catch (PatternSyntaxException e) {
    337         }
    338 
    339 
    340         // Test \c (control character) sequence
    341         p = Pattern.compile("([0-9]+)[\\cA\\cB\\cC\\cD];");
    342         m = p.matcher("11\u0001;22:;33\u0002;44p;55\u0003;66\u0004;");
    343         assertTrue(m.find());
    344         assertEquals("11", m.group(1));
    345         assertTrue(m.find());
    346         assertEquals("33", m.group(1));
    347         assertTrue(m.find());
    348         assertEquals("55", m.group(1));
    349         assertTrue(m.find());
    350         assertEquals("66", m.group(1));
    351         assertFalse(m.find());
    352 
    353         // More thorough control escape test
    354         // Ensure that each escape matches exactly the corresponding
    355         // character
    356         // code and no others (well, from 0-255 at least)
    357         int i, j;
    358         for (i = 0; i < 26; i++) {
    359             p = Pattern.compile("\\c" + Character.toString((char) ('A' + i)));
    360             int match_char = -1;
    361             for (j = 0; j < 255; j++) {
    362                 m = p.matcher(Character.toString((char) j));
    363                 if (m.matches()) {
    364                     assertEquals(-1, match_char);
    365                     match_char = j;
    366                 }
    367             }
    368             assertTrue(match_char == i + 1);
    369         }
    370 
    371         // Test invalid control escapes
    372 // BEGIN android-removed
    373 // ICU doesn't complain about illegal control sequences
    374 //        try {
    375 //            p = Pattern.compile("\\c");
    376 //            fail("PatternSyntaxException expected");
    377 //        } catch (PatternSyntaxException e) {
    378 //        }
    379 // END android-removed
    380 
    381         //originally contributed test did not check the result
    382         //TODO: check what RI does here
    383 //        try {
    384 //            p = Pattern.compile("\\c;");
    385 //            fail("PatternSyntaxException expected");
    386 //        } catch (PatternSyntaxException e) {
    387 //        }
    388 //
    389 //        try {
    390 //            p = Pattern.compile("\\ca;");
    391 //            fail("PatternSyntaxException expected");
    392 //        } catch (PatternSyntaxException e) {
    393 //        }
    394 //
    395 //        try {
    396 //            p = Pattern.compile("\\c4;");
    397 //            fail("PatternSyntaxException expected");
    398 //        } catch (PatternSyntaxException e) {
    399 //        }
    400     }
    401     public void testCharacterClasses() throws PatternSyntaxException {
    402         Pattern p;
    403         Matcher m;
    404 
    405         // Test one character range
    406         p = Pattern.compile("[p].*[l]");
    407         m = p.matcher("paul");
    408         assertTrue(m.matches());
    409         m = p.matcher("pool");
    410         assertTrue(m.matches());
    411         m = p.matcher("pong");
    412         assertFalse(m.matches());
    413         m = p.matcher("pl");
    414         assertTrue(m.matches());
    415 
    416         // Test two character range
    417         p = Pattern.compile("[pm].*[lp]");
    418         m = p.matcher("prop");
    419         assertTrue(m.matches());
    420         m = p.matcher("mall");
    421         assertTrue(m.matches());
    422         m = p.matcher("pong");
    423         assertFalse(m.matches());
    424         m = p.matcher("pill");
    425         assertTrue(m.matches());
    426 
    427         // Test range including [ and ]
    428         p = Pattern.compile("[<\\[].*[\\]>]");
    429         m = p.matcher("<foo>");
    430         assertTrue(m.matches());
    431         m = p.matcher("[bar]");
    432         assertTrue(m.matches());
    433         m = p.matcher("{foobar]");
    434         assertFalse(m.matches());
    435         m = p.matcher("<pill]");
    436         assertTrue(m.matches());
    437 
    438         // Test range using ^
    439         p = Pattern.compile("[^bc][a-z]+[tr]");
    440         m = p.matcher("pat");
    441         assertTrue(m.matches());
    442         m = p.matcher("liar");
    443         assertTrue(m.matches());
    444         m = p.matcher("car");
    445         assertFalse(m.matches());
    446         m = p.matcher("gnat");
    447         assertTrue(m.matches());
    448 
    449         // Test character range using -
    450         p = Pattern.compile("[a-z]_+[a-zA-Z]-+[0-9p-z]");
    451         m = p.matcher("d__F-8");
    452         assertTrue(m.matches());
    453         m = p.matcher("c_a-q");
    454         assertTrue(m.matches());
    455         m = p.matcher("a__R-a");
    456         assertFalse(m.matches());
    457         m = p.matcher("r_____d-----5");
    458         assertTrue(m.matches());
    459 
    460         // Test range using unicode characters and unicode and hex escapes
    461         p = Pattern.compile("[\\u1234-\\u2345]_+[a-z]-+[\u0001-\\x11]");
    462         m = p.matcher("\u2000_q-\u0007");
    463         assertTrue(m.matches());
    464         m = p.matcher("\u1234_z-\u0001");
    465         assertTrue(m.matches());
    466         m = p.matcher("r_p-q");
    467         assertFalse(m.matches());
    468         m = p.matcher("\u2345_____d-----\n");
    469         assertTrue(m.matches());
    470 
    471 // BEGIN android-removed
    472 // The "---" collides with ICU's "--" operator and is likely to be a user error
    473 // anyway, so we simply comment this one out.
    474 //        // Test ranges including the "-" character
    475 //        p = Pattern.compile("[\\*-/]_+[---]!+[--AP]");
    476 //        m = p.matcher("-_-!!A");
    477 //        assertTrue(m.matches());
    478 //        m = p.matcher("\u002b_-!!!-");
    479 //        assertTrue(m.matches());
    480 //        m = p.matcher("!_-!@");
    481 //        assertFalse(m.matches());
    482 //        m = p.matcher(",______-!!!!!!!P");
    483 //        assertTrue(m.matches());
    484 // END android-removed
    485 
    486         // Test nested ranges
    487         p = Pattern.compile("[pm[t]][a-z]+[[r]lp]");
    488         m = p.matcher("prop");
    489         assertTrue(m.matches());
    490         m = p.matcher("tsar");
    491         assertTrue(m.matches());
    492         m = p.matcher("pong");
    493         assertFalse(m.matches());
    494         m = p.matcher("moor");
    495         assertTrue(m.matches());
    496 
    497         // Test character class intersection with &&
    498         // TODO: figure out what x&&y or any class with a null intersection
    499         // set (like [[a-c]&&[d-f]]) might mean. It doesn't mean "match
    500         // nothing" and doesn't mean "match anything" so I'm stumped.
    501         p = Pattern.compile("[[a-p]&&[g-z]]+-+[[a-z]&&q]-+[x&&[a-z]]-+");
    502         m = p.matcher("h--q--x--");
    503         assertTrue(m.matches());
    504         m = p.matcher("hog--q-x-");
    505         assertTrue(m.matches());
    506         m = p.matcher("ape--q-x-");
    507         assertFalse(m.matches());
    508         m = p.matcher("mop--q-x----");
    509         assertTrue(m.matches());
    510 
    511         // Test error cases with &&
    512 // BEGIN android-removed
    513 // This is more of a bug, and ICU doesn't have this behavior.
    514 //            p = Pattern.compile("[&&[xyz]]");
    515 //            m = p.matcher("&");
    516 //            // System.out.println(m.matches());
    517 //            m = p.matcher("x");
    518 //            // System.out.println(m.matches());
    519 //            m = p.matcher("y");
    520 //            // System.out.println(m.matches());
    521 // END android-removed
    522             p = Pattern.compile("[[xyz]&[axy]]");
    523             m = p.matcher("x");
    524             // System.out.println(m.matches());
    525             m = p.matcher("z");
    526             // System.out.println(m.matches());
    527             m = p.matcher("&");
    528             // System.out.println(m.matches());
    529             p = Pattern.compile("[abc[123]&&[345]def]");
    530             m = p.matcher("a");
    531             // System.out.println(m.matches());
    532 
    533 // BEGIN android-removed
    534 // This is more of a bug, and ICU doesn't have this behavior.
    535 //            p = Pattern.compile("[[xyz]&&]");
    536 // END android-removed
    537             p = Pattern.compile("[[abc]&]");
    538 
    539         try {
    540             p = Pattern.compile("[[abc]&&");
    541             fail("PatternSyntaxException expected");
    542         } catch (PatternSyntaxException e) {
    543         }
    544 
    545         p = Pattern.compile("[[abc]\\&&[xyz]]");
    546 
    547         p = Pattern.compile("[[abc]&\\&[xyz]]");
    548 
    549         // Test 3-way intersection
    550         p = Pattern.compile("[[a-p]&&[g-z]&&[d-k]]");
    551         m = p.matcher("g");
    552         assertTrue(m.matches());
    553         m = p.matcher("m");
    554         assertFalse(m.matches());
    555 
    556         // Test nested intersection
    557         p = Pattern.compile("[[[a-p]&&[g-z]]&&[d-k]]");
    558         m = p.matcher("g");
    559         assertTrue(m.matches());
    560         m = p.matcher("m");
    561         assertFalse(m.matches());
    562 
    563         // Test character class subtraction with && and ^
    564         p = Pattern.compile("[[a-z]&&[^aeiou]][aeiou][[^xyz]&&[a-z]]");
    565         m = p.matcher("pop");
    566         assertTrue(m.matches());
    567         m = p.matcher("tag");
    568         assertTrue(m.matches());
    569         m = p.matcher("eat");
    570         assertFalse(m.matches());
    571         m = p.matcher("tax");
    572         assertFalse(m.matches());
    573         m = p.matcher("zip");
    574         assertTrue(m.matches());
    575 
    576         // Test . (DOT), with and without DOTALL
    577         // Note: DOT not allowed in character classes
    578         p = Pattern.compile(".+/x.z");
    579         m = p.matcher("!$/xyz");
    580         assertTrue(m.matches());
    581         m = p.matcher("%\n\r/x\nz");
    582         assertFalse(m.matches());
    583         p = Pattern.compile(".+/x.z", Pattern.DOTALL);
    584         m = p.matcher("%\n\r/x\nz");
    585         assertTrue(m.matches());
    586 
    587         // Test \d (digit)
    588         p = Pattern.compile("\\d+[a-z][\\dx]");
    589         m = p.matcher("42a6");
    590         assertTrue(m.matches());
    591         m = p.matcher("21zx");
    592         assertTrue(m.matches());
    593         m = p.matcher("ab6");
    594         assertFalse(m.matches());
    595         m = p.matcher("56912f9");
    596         assertTrue(m.matches());
    597 
    598         // Test \D (not a digit)
    599         p = Pattern.compile("\\D+[a-z]-[\\D3]");
    600         m = p.matcher("za-p");
    601         assertTrue(m.matches());
    602         m = p.matcher("%!e-3");
    603         assertTrue(m.matches());
    604         m = p.matcher("9a-x");
    605         assertFalse(m.matches());
    606         m = p.matcher("\u1234pp\ny-3");
    607         assertTrue(m.matches());
    608 
    609         // Test \s (whitespace)
    610         p = Pattern.compile("<[a-zA-Z]+\\s+[0-9]+[\\sx][^\\s]>");
    611         m = p.matcher("<cat \t1\fx>");
    612         assertTrue(m.matches());
    613         m = p.matcher("<cat \t1\f >");
    614         assertFalse(m.matches());
    615         m = p
    616                 .matcher("xyz <foo\n\r22 5> <pp \t\n\f\r \u000b41x\u1234><pp \nx7\rc> zzz");
    617         assertTrue(m.find());
    618         assertTrue(m.find());
    619         assertFalse(m.find());
    620 
    621         // Test \S (not whitespace)
    622         p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221>");
    623         m = p.matcher("<f $0**\n** 221>");
    624         assertTrue(m.matches());
    625         m = p.matcher("<x 441\t221>");
    626         assertTrue(m.matches());
    627         m = p.matcher("<z \t9\ng 221>");
    628         assertFalse(m.matches());
    629         m = p.matcher("<z 60\ngg\u1234\f221>");
    630         assertTrue(m.matches());
    631         p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221[\\S&&[^abc]]>");
    632         m = p.matcher("<f $0**\n** 221x>");
    633         assertTrue(m.matches());
    634         m = p.matcher("<x 441\t221z>");
    635         assertTrue(m.matches());
    636         m = p.matcher("<x 441\t221 >");
    637         assertFalse(m.matches());
    638         m = p.matcher("<x 441\t221c>");
    639         assertFalse(m.matches());
    640         m = p.matcher("<z \t9\ng 221x>");
    641         assertFalse(m.matches());
    642         m = p.matcher("<z 60\ngg\u1234\f221\u0001>");
    643         assertTrue(m.matches());
    644 
    645         // Test \w (ascii word)
    646         p = Pattern.compile("<\\w+\\s[0-9]+;[^\\w]\\w+/[\\w$]+;");
    647         m = p.matcher("<f1 99;!foo5/a$7;");
    648         assertTrue(m.matches());
    649         m = p.matcher("<f$ 99;!foo5/a$7;");
    650         assertFalse(m.matches());
    651         m = p
    652                 .matcher("<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789 99;!foo5/a$7;");
    653         assertTrue(m.matches());
    654 
    655         // Test \W (not an ascii word)
    656         p = Pattern.compile("<\\W\\w+\\s[0-9]+;[\\W_][^\\W]+\\s[0-9]+;");
    657         m = p.matcher("<$foo3\n99;_bar\t0;");
    658         assertTrue(m.matches());
    659         m = p.matcher("<hh 99;_g 0;");
    660         assertFalse(m.matches());
    661         m = p.matcher("<*xx\t00;^zz\f11;");
    662         assertTrue(m.matches());
    663 
    664         // Test x|y pattern
    665         // TODO
    666     }
    667     public void testPOSIXGroups() throws PatternSyntaxException {
    668         Pattern p;
    669         Matcher m;
    670 
    671         // Test POSIX groups using \p and \P (in the group and not in the group)
    672         // Groups are Lower, Upper, ASCII, Alpha, Digit, XDigit, Alnum, Punct,
    673         // Graph, Print, Blank, Space, Cntrl
    674         // Test \p{Lower}
    675         /*
    676          * FIXME: Requires complex range processing
    677          * p = Pattern.compile("<\\p{Lower}\\d\\P{Lower}:[\\p{Lower}Z]\\s[^\\P{Lower}]>");
    678          * m = p.matcher("<a4P:g x>"); assertTrue(m.matches()); m =
    679          * p.matcher("<p4%:Z\tq>"); assertTrue(m.matches()); m =
    680          * p.matcher("<A6#:e e>"); assertFalse(m.matches());
    681          */
    682         p = Pattern.compile("\\p{Lower}+");
    683         m = p.matcher("abcdefghijklmnopqrstuvwxyz");
    684         assertTrue(m.matches());
    685 
    686         // Invalid uses of \p{Lower}
    687         try {
    688             p = Pattern.compile("\\p");
    689             fail("PatternSyntaxException expected");
    690         } catch (PatternSyntaxException e) {
    691         }
    692 
    693         try {
    694             p = Pattern.compile("\\p;");
    695             fail("PatternSyntaxException expected");
    696         } catch (PatternSyntaxException e) {
    697         }
    698 
    699         try {
    700             p = Pattern.compile("\\p{");
    701             fail("PatternSyntaxException expected");
    702         } catch (PatternSyntaxException e) {
    703         }
    704 
    705         try {
    706             p = Pattern.compile("\\p{;");
    707             fail("PatternSyntaxException expected");
    708         } catch (PatternSyntaxException e) {
    709         }
    710 
    711         try {
    712             p = Pattern.compile("\\p{Lower");
    713             fail("PatternSyntaxException expected");
    714         } catch (PatternSyntaxException e) {
    715         }
    716 
    717         try {
    718             p = Pattern.compile("\\p{Lower;");
    719             fail("PatternSyntaxException expected");
    720         } catch (PatternSyntaxException e) {
    721         }
    722 
    723         // Test \p{Upper}
    724         /*
    725          * FIXME: Requires complex range processing
    726          * p = Pattern.compile("<\\p{Upper}\\d\\P{Upper}:[\\p{Upper}z]\\s[^\\P{Upper}]>");
    727          * m = p.matcher("<A4p:G X>"); assertTrue(m.matches()); m =
    728          * p.matcher("<P4%:z\tQ>"); assertTrue(m.matches()); m =
    729          * p.matcher("<a6#:E E>"); assertFalse(m.matches());
    730          */
    731         p = Pattern.compile("\\p{Upper}+");
    732         m = p.matcher("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
    733         assertTrue(m.matches());
    734 
    735         // Invalid uses of \p{Upper}
    736         try {
    737             p = Pattern.compile("\\p{Upper");
    738             fail("PatternSyntaxException expected");
    739         } catch (PatternSyntaxException e) {
    740         }
    741 
    742         try {
    743             p = Pattern.compile("\\p{Upper;");
    744             fail("PatternSyntaxException expected");
    745         } catch (PatternSyntaxException e) {
    746         }
    747 
    748         // Test \p{ASCII}
    749         /*
    750          * FIXME: Requires complex range processing p = Pattern.compile("<\\p{ASCII}\\d\\P{ASCII}:[\\p{ASCII}\u1234]\\s[^\\P{ASCII}]>");
    751          * m = p.matcher("<A4\u0080:G X>"); assertTrue(m.matches()); m =
    752          * p.matcher("<P4\u00ff:\u1234\t\n>"); assertTrue(m.matches()); m =
    753          * p.matcher("<\u00846#:E E>"); assertFalse(m.matches())
    754          */
    755         int i;
    756         p = Pattern.compile("\\p{ASCII}");
    757         for (i = 0; i < 0x80; i++) {
    758             m = p.matcher(Character.toString((char) i));
    759             assertTrue(m.matches());
    760         }
    761         for (; i < 0xff; i++) {
    762             m = p.matcher(Character.toString((char) i));
    763             assertFalse(m.matches());
    764         }
    765 
    766         // Invalid uses of \p{ASCII}
    767         try {
    768             p = Pattern.compile("\\p{ASCII");
    769             fail("PatternSyntaxException expected");
    770         } catch (PatternSyntaxException e) {
    771         }
    772 
    773         try {
    774             p = Pattern.compile("\\p{ASCII;");
    775             fail("PatternSyntaxException expected");
    776         } catch (PatternSyntaxException e) {
    777         }
    778 
    779         // Test \p{Alpha}
    780         // TODO
    781 
    782         // Test \p{Digit}
    783         // TODO
    784 
    785         // Test \p{XDigit}
    786         // TODO
    787 
    788         // Test \p{Alnum}
    789         // TODO
    790 
    791         // Test \p{Punct}
    792         // TODO
    793 
    794         // Test \p{Graph}
    795         // TODO
    796 
    797         // Test \p{Print}
    798         // TODO
    799 
    800         // Test \p{Blank}
    801         // TODO
    802 
    803         // Test \p{Space}
    804         // TODO
    805 
    806         // Test \p{Cntrl}
    807         // TODO
    808     }
    809     public void testUnicodeCategories() throws PatternSyntaxException {
    810         // Test Unicode categories using \p and \P
    811         // One letter codes: L, M, N, P, S, Z, C
    812         // Two letter codes: Lu, Nd, Sc, Sm, ...
    813         // See java.lang.Character and Unicode standard for complete list
    814         // TODO
    815         // Test \p{L}
    816         // TODO
    817 
    818         // Test \p{N}
    819         // TODO
    820 
    821         // Test two letter codes:
    822         // From unicode.org:
    823         // Lu
    824         // Ll
    825         // Lt
    826         // Lm
    827         // Lo
    828         // Mn
    829         // Mc
    830         // Me
    831         // Nd
    832         // Nl
    833         // No
    834         // Pc
    835         // Pd
    836         // Ps
    837         // Pe
    838         // Pi
    839         // Pf
    840         // Po
    841         // Sm
    842         // Sc
    843         // Sk
    844         // So
    845         // Zs
    846         // Zl
    847         // Zp
    848         // Cc
    849         // Cf
    850         // Cs
    851         // Co
    852         // Cn
    853 
    854         // TODO add more tests per category
    855         //{"Cc", "\u0000", "-\u0041"},
    856         testCategory("Cf", "\u202B");
    857         testCategory("Co", "\uE000");
    858         testCategory("Cs", "\uD800");
    859         testCategory("Ll", "a", "b", "x", "y", "z", "-A", "-Z");
    860         testCategory("Lm", "\u02B9");
    861         testCategory("Lu", "B", "C", "-c");
    862         testCategory("Lo", "\u05E2");
    863         testCategory("Lt", "\u01C5");
    864         testCategory("Mc", "\u0903");
    865         testCategory("Me", "\u0488");
    866         testCategory("Mn", "\u0300");
    867         testCategory("Nd", "\u0030");
    868         testCategory("Nl", "\u2164");
    869         testCategory("No", "\u0BF0");
    870         // testCategory("Pc", "\u30FB");
    871         testCategory("Pd", "\u2015");
    872         testCategory("Pe", "\u207E");
    873         testCategory("Po", "\u00B7");
    874         testCategory("Ps", "\u0F3C");
    875         testCategory("Sc", "\u20A0");
    876         testCategory("Sk", "\u00B8");
    877         testCategory("Sm", "\u002B");
    878         testCategory("So", "\u0B70");
    879         testCategory("Zl", "\u2028");
    880         // testCategory("Pi", "\u200C");
    881         testCategory("Zp", "\u2029");
    882     }
    883 
    884     private void testCategory(String cat, String... matches) {
    885         String pa = "{"+cat+"}";
    886         String pat = "\\p"+pa;
    887         String npat = "\\P"+pa;
    888         Pattern p = Pattern.compile(pat);
    889         Pattern pn = Pattern.compile(npat);
    890         for (int j = 0; j < matches.length; j++) {
    891             String t = matches[j];
    892             boolean invert = t.startsWith("-");
    893             if (invert) {
    894                 // test negative case, expected to fail
    895                 t = t.substring(1);
    896                 assertFalse("expected '"+t+"' to not be matched " +
    897                         "by pattern '"+pat, p.matcher(t).matches());
    898                 assertTrue("expected '"+t+"' to  " +
    899                         "be matched by pattern '"+npat, pn.matcher(t).matches());
    900             } else {
    901                 assertTrue("expected '"+t+"' to be matched " +
    902                         "by pattern '"+pat, p.matcher(t).matches());
    903                 assertFalse("expected '"+t+"' to  " +
    904                         "not be matched by pattern '"+npat, pn.matcher(t).matches());
    905             }
    906         }
    907     }
    908 
    909     public void testUnicodeBlocks() throws PatternSyntaxException {
    910         Pattern p;
    911         Matcher m;
    912         int i, j;
    913 
    914         // Test Unicode blocks using \p and \P
    915         // FIXME:
    916         // Note that LatinExtended-B and ArabicPresentations-B are unrecognized
    917         // by the reference JDK.
    918         for (i = 0; i < UBlocks.length; i++) {
    919             /*
    920              * p = Pattern.compile("\\p{"+UBlocks[i].name+"}");
    921              *
    922              * if (UBlocks[i].low > 0) { m =
    923              * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
    924              * assertFalse(m.matches()); } for (j=UBlocks[i].low; j <=
    925              * UBlocks[i].high; j++) { m =
    926              * p.matcher(Character.toString((char)j));
    927              * assertTrue(m.matches()); } if (UBlocks[i].high < 0xFFFF) { m =
    928              * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
    929              * assertFalse(m.matches()); }
    930              *
    931              * p = Pattern.compile("\\P{"+UBlocks[i].name+"}");
    932              *
    933              * if (UBlocks[i].low > 0) { m =
    934              * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
    935              * assertTrue(m.matches()); } for (j=UBlocks[i].low; j <
    936              * UBlocks[i].high; j++) { m =
    937              * p.matcher(Character.toString((char)j));
    938              * assertFalse(m.matches()); } if (UBlocks[i].high < 0xFFFF) { m =
    939              * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
    940              * assertTrue(m.matches()); }
    941              */
    942 
    943             p = Pattern.compile("\\p{In" + UBlocks[i].name + "}");
    944 // BEGIN android-changed
    945 // Added the name of the block under test to the assertion to get more output.
    946 
    947             if (UBlocks[i].low > 0) {
    948                 m = p.matcher(Character.toString((char) (UBlocks[i].low - 1)));
    949                 assertFalse(UBlocks[i].name, m.matches());
    950             }
    951             for (j = UBlocks[i].low; j <= UBlocks[i].high; j++) {
    952                 m = p.matcher(Character.toString((char) j));
    953                 assertTrue(UBlocks[i].name, m.matches());
    954             }
    955             if (UBlocks[i].high < 0xFFFF) {
    956                 m = p.matcher(Character.toString((char) (UBlocks[i].high + 1)));
    957                 assertFalse(UBlocks[i].name, m.matches());
    958             }
    959 
    960             p = Pattern.compile("\\P{In" + UBlocks[i].name + "}");
    961 
    962             if (UBlocks[i].low > 0) {
    963                 m = p.matcher(Character.toString((char) (UBlocks[i].low - 1)));
    964                 assertTrue(UBlocks[i].name, m.matches());
    965             }
    966             for (j = UBlocks[i].low; j < UBlocks[i].high; j++) {
    967                 m = p.matcher(Character.toString((char) j));
    968                 assertFalse(UBlocks[i].name, m.matches());
    969             }
    970             if (UBlocks[i].high < 0xFFFF) {
    971                 m = p.matcher(Character.toString((char) (UBlocks[i].high + 1)));
    972                 assertTrue(UBlocks[i].name, m.matches());
    973             }
    974 
    975 // END android-changed
    976         }
    977     }
    978     public void testCapturingGroups() throws PatternSyntaxException {
    979         Pattern p;
    980         Matcher m;
    981 
    982         // Test simple capturing groups
    983         p = Pattern.compile("(a+)b");
    984         m = p.matcher("aaaaaaaab");
    985         assertTrue(m.matches());
    986         assertEquals(1, m.groupCount());
    987         assertEquals("aaaaaaaa", m.group(1));
    988 
    989         p = Pattern.compile("((an)+)((as)+)");
    990         m = p.matcher("ananas");
    991         assertTrue(m.matches());
    992         assertEquals(4, m.groupCount());
    993         assertEquals("ananas", m.group(0));
    994         assertEquals("anan", m.group(1));
    995         assertEquals("an", m.group(2));
    996         assertEquals("as", m.group(3));
    997         assertEquals("as", m.group(4));
    998 
    999         // Test grouping without capture (?:...)
   1000         p = Pattern.compile("(?:(?:an)+)(as)");
   1001         m = p.matcher("ananas");
   1002         assertTrue(m.matches());
   1003         assertEquals(1, m.groupCount());
   1004         assertEquals("as", m.group(1));
   1005         try {
   1006             m.group(2);
   1007             fail("expected IndexOutOfBoundsException");
   1008         } catch (IndexOutOfBoundsException ioobe) {
   1009             // expected
   1010         }
   1011 
   1012         // Test combination of grouping and capture
   1013         // TODO
   1014 
   1015         // Test \<num> sequence with capturing and non-capturing groups
   1016         // TODO
   1017 
   1018         // Test \<num> with <num> out of range
   1019         p = Pattern.compile("((an)+)as\\1");
   1020         m = p.matcher("ananasanan");
   1021         assertTrue(m.matches());
   1022 
   1023         try {
   1024             p = Pattern.compile("((an)+)as\\4");
   1025             fail("expected PatternSyntaxException");
   1026         } catch (PatternSyntaxException pse) {
   1027             // expected
   1028         }
   1029 
   1030     }
   1031     public void testRepeats() {
   1032         Pattern p;
   1033         Matcher m;
   1034 
   1035         // Test ?
   1036         p = Pattern.compile("(abc)?c");
   1037         m = p.matcher("abcc");
   1038         assertTrue(m.matches());
   1039         m = p.matcher("c");
   1040         assertTrue(m.matches());
   1041         m = p.matcher("cc");
   1042         assertFalse(m.matches());
   1043         m = p.matcher("abcabcc");
   1044         assertFalse(m.matches());
   1045 
   1046         // Test *
   1047         p = Pattern.compile("(abc)*c");
   1048         m = p.matcher("abcc");
   1049         assertTrue(m.matches());
   1050         m = p.matcher("c");
   1051         assertTrue(m.matches());
   1052         m = p.matcher("cc");
   1053         assertFalse(m.matches());
   1054         m = p.matcher("abcabcc");
   1055         assertTrue(m.matches());
   1056 
   1057         // Test +
   1058         p = Pattern.compile("(abc)+c");
   1059         m = p.matcher("abcc");
   1060         assertTrue(m.matches());
   1061         m = p.matcher("c");
   1062         assertFalse(m.matches());
   1063         m = p.matcher("cc");
   1064         assertFalse(m.matches());
   1065         m = p.matcher("abcabcc");
   1066         assertTrue(m.matches());
   1067 
   1068         // Test {<num>}, including 0, 1 and more
   1069         p = Pattern.compile("(abc){0}c");
   1070         m = p.matcher("abcc");
   1071         assertFalse(m.matches());
   1072         m = p.matcher("c");
   1073         assertTrue(m.matches());
   1074 
   1075         p = Pattern.compile("(abc){1}c");
   1076         m = p.matcher("abcc");
   1077         assertTrue(m.matches());
   1078         m = p.matcher("c");
   1079         assertFalse(m.matches());
   1080         m = p.matcher("abcabcc");
   1081         assertFalse(m.matches());
   1082 
   1083         p = Pattern.compile("(abc){2}c");
   1084         m = p.matcher("abcc");
   1085         assertFalse(m.matches());
   1086         m = p.matcher("c");
   1087         assertFalse(m.matches());
   1088         m = p.matcher("cc");
   1089         assertFalse(m.matches());
   1090         m = p.matcher("abcabcc");
   1091         assertTrue(m.matches());
   1092 
   1093         // Test {<num>,}, including 0, 1 and more
   1094         // TODO
   1095 
   1096         // Test {<n1>,<n2>}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?)
   1097         // TODO
   1098     }
   1099     public void testAnchors() throws PatternSyntaxException {
   1100         Pattern p;
   1101         Matcher m;
   1102 
   1103         // Test ^, default and MULTILINE
   1104         p = Pattern.compile("^abc\\n^abc", Pattern.MULTILINE);
   1105         m = p.matcher("abc\nabc");
   1106         assertTrue(m.matches());
   1107 
   1108         p = Pattern.compile("^abc\\n^abc");
   1109         m = p.matcher("abc\nabc");
   1110         assertFalse(m.matches());
   1111 
   1112         // Test $, default and MULTILINE
   1113         // TODO
   1114 
   1115         // Test \b (word boundary)
   1116         // TODO
   1117 
   1118         // Test \B (not a word boundary)
   1119         // TODO
   1120 
   1121         // Test \A (beginning of string)
   1122         // TODO
   1123 
   1124         // Test \Z (end of string)
   1125         // TODO
   1126 
   1127         // Test \z (end of string)
   1128         // TODO
   1129 
   1130         // Test \G
   1131         // TODO
   1132 
   1133         // Test positive lookahead using (?=...)
   1134         // TODO
   1135 
   1136         // Test negative lookahead using (?!...)
   1137         // TODO
   1138 
   1139         // Test positive lookbehind using (?<=...)
   1140         // TODO
   1141 
   1142         // Test negative lookbehind using (?<!...)
   1143         // TODO
   1144     }
   1145     public void testMisc() throws PatternSyntaxException {
   1146         Pattern p;
   1147         Matcher m;
   1148 
   1149         // Test (?>...)
   1150         // TODO
   1151 
   1152         // Test (?onflags-offflags)
   1153         // Valid flags are i,m,d,s,u,x
   1154         // TODO
   1155 
   1156         // Test (?onflags-offflags:...)
   1157         // TODO
   1158 
   1159         // Test \Q, \E
   1160         p = Pattern.compile("[a-z]+;\\Q[a-z]+;\\Q(foo.*);\\E[0-9]+");
   1161         m = p.matcher("abc;[a-z]+;\\Q(foo.*);411");
   1162         assertTrue(m.matches());
   1163         m = p.matcher("abc;def;foo42;555");
   1164         assertFalse(m.matches());
   1165         m = p.matcher("abc;\\Qdef;\\Qfoo99;\\E123");
   1166         assertFalse(m.matches());
   1167 
   1168         p = Pattern.compile("[a-z]+;(foo[0-9]-\\Q(...)\\E);[0-9]+");
   1169         m = p.matcher("abc;foo5-(...);123");
   1170         assertTrue(m.matches());
   1171         assertEquals("foo5-(...)", m.group(1));
   1172         m = p.matcher("abc;foo9-(xxx);789");
   1173         assertFalse(m.matches());
   1174 
   1175         p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q$-\\E]+);[0-9]+");
   1176         m = p.matcher("abc;bar0-def$-;123");
   1177         assertTrue(m.matches());
   1178 
   1179         // FIXME:
   1180         // This should work the same as the pattern above but fails with the
   1181         // the reference JDK
   1182         p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q-$\\E]+);[0-9]+");
   1183         m = p.matcher("abc;bar0-def$-;123");
   1184         // assertTrue(m.matches());
   1185 
   1186         // FIXME:
   1187         // This should work too .. it looks as if just about anything that
   1188         // has more
   1189         // than one character between \Q and \E is broken in the the reference JDK
   1190         p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q[0-9]\\E]+);[0-9]+");
   1191         m = p.matcher("abc;bar0-def[99]-]0x[;123");
   1192         // assertTrue(m.matches());
   1193 
   1194         // This is the same as above but with explicit escapes .. and this
   1195         // does work
   1196         // on the the reference JDK
   1197         p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\[0\\-9\\]]+);[0-9]+");
   1198         m = p.matcher("abc;bar0-def[99]-]0x[;123");
   1199         assertTrue(m.matches());
   1200 
   1201         // Test #<comment text>
   1202         // TODO
   1203     }
   1204     public void testCompile1() throws PatternSyntaxException {
   1205         Pattern pattern = Pattern
   1206                 .compile("[0-9A-Za-z][0-9A-Za-z\\x2e\\x3a\\x2d\\x5f]*");
   1207         String name = "iso-8859-1";
   1208         assertTrue(pattern.matcher(name).matches());
   1209     }
   1210     public void testCompile2() throws PatternSyntaxException {
   1211         String findString = "\\Qimport\\E";
   1212 
   1213         Pattern pattern = Pattern.compile(findString, 0);
   1214         Matcher matcher = pattern.matcher(new String(
   1215                 "import a.A;\n\n import b.B;\nclass C {}"));
   1216 
   1217         assertTrue(matcher.find(0));
   1218     }
   1219     public void testCompile3() throws PatternSyntaxException {
   1220         Pattern p;
   1221         Matcher m;
   1222         p = Pattern.compile("a$");
   1223         m = p.matcher("a\n");
   1224         assertTrue(m.find());
   1225         assertEquals("a", m.group());
   1226         assertFalse(m.find());
   1227 
   1228         p = Pattern.compile("(a$)");
   1229         m = p.matcher("a\n");
   1230         assertTrue(m.find());
   1231         assertEquals("a", m.group());
   1232         assertEquals("a", m.group(1));
   1233         assertFalse(m.find());
   1234 
   1235         p = Pattern.compile("^.*$", Pattern.MULTILINE);
   1236 
   1237         m = p.matcher("a\n");
   1238         assertTrue(m.find());
   1239         // System.out.println("["+m.group()+"]");
   1240         assertEquals("a", m.group());
   1241         assertFalse(m.find());
   1242 
   1243         m = p.matcher("a\nb\n");
   1244         assertTrue(m.find());
   1245         // System.out.println("["+m.group()+"]");
   1246         assertEquals("a", m.group());
   1247         assertTrue(m.find());
   1248         // System.out.println("["+m.group()+"]");
   1249         assertEquals("b", m.group());
   1250         assertFalse(m.find());
   1251 
   1252         m = p.matcher("a\nb");
   1253         assertTrue(m.find());
   1254         // System.out.println("["+m.group()+"]");
   1255         assertEquals("a", m.group());
   1256         assertTrue(m.find());
   1257         assertEquals("b", m.group());
   1258         assertFalse(m.find());
   1259 
   1260         m = p.matcher("\naa\r\nbb\rcc\n\n");
   1261         assertTrue(m.find());
   1262         // System.out.println("["+m.group()+"]");
   1263         assertTrue(m.group().equals(""));
   1264         assertTrue(m.find());
   1265         // System.out.println("["+m.group()+"]");
   1266         assertEquals("aa", m.group());
   1267         assertTrue(m.find());
   1268         // System.out.println("["+m.group()+"]");
   1269         assertEquals("bb", m.group());
   1270         assertTrue(m.find());
   1271         // System.out.println("["+m.group()+"]");
   1272         assertEquals("cc", m.group());
   1273         assertTrue(m.find());
   1274         // System.out.println("["+m.group()+"]");
   1275         assertTrue(m.group().equals(""));
   1276         assertFalse(m.find());
   1277 
   1278         m = p.matcher("a");
   1279         assertTrue(m.find());
   1280         assertEquals("a", m.group());
   1281         assertFalse(m.find());
   1282 
   1283 // BEGIN android-removed
   1284 // Makes no sense to duplicate this weird behavior
   1285 //        m = p.matcher("");
   1286 //        // FIXME: This matches the reference behaviour but is
   1287 //        // inconsistent with matching "a" - ie. the end of the
   1288 //        // target string should match against $ always but this
   1289 //        // appears to work with the null string only when not in
   1290 //        // multiline mode (see below)
   1291 //        assertFalse(m.find());
   1292 // END android-removed
   1293 
   1294         p = Pattern.compile("^.*$");
   1295         m = p.matcher("");
   1296         assertTrue(m.find());
   1297         assertTrue(m.group().equals(""));
   1298         assertFalse(m.find());
   1299     }
   1300     public void testCompile4() throws PatternSyntaxException {
   1301         String findString = "\\Qpublic\\E";
   1302         StringBuffer text = new StringBuffer("    public class Class {\n"
   1303                 + "    public class Class {");
   1304 
   1305         Pattern pattern = Pattern.compile(findString, 0);
   1306         Matcher matcher = pattern.matcher(text);
   1307 
   1308         boolean found = matcher.find();
   1309         assertTrue(found);
   1310         assertEquals(4, matcher.start());
   1311         if (found) {
   1312             // modify text
   1313             text.delete(0, text.length());
   1314             text.append("Text have been changed.");
   1315             matcher.reset(text);
   1316         }
   1317 
   1318         found = matcher.find();
   1319         assertFalse(found);
   1320     }
   1321     public void testCompile5() throws PatternSyntaxException {
   1322         Pattern p = Pattern.compile("^[0-9]");
   1323         String s[] = p.split("12", -1);
   1324         assertEquals("", s[0]);
   1325         assertEquals("2", s[1]);
   1326         assertEquals(2, s.length);
   1327     }
   1328 
   1329     //      public void testCompile6() {
   1330     //        String regex = "[\\p{L}[\\p{Mn}[\\p{Pc}[\\p{Nd}[\\p{Nl}[\\p{Sc}]]]]]]+";
   1331     //        String regex = "[\\p{L}\\p{Mn}\\p{Pc}\\p{Nd}\\p{Nl}\\p{Sc}]+";
   1332     //        try {
   1333     //            Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
   1334     //            assertTrue(true);
   1335     //        } catch (PatternSyntaxException e) {
   1336     //            System.out.println(e.getMessage());
   1337     //            assertTrue(false);
   1338     //        }
   1339     //    }
   1340 
   1341     private static class UBInfo {
   1342         public UBInfo(int low, int high, String name) {
   1343             this.name = name;
   1344             this.low = low;
   1345             this.high = high;
   1346         }
   1347 
   1348         public String name;
   1349 
   1350         public int low, high;
   1351     }
   1352 
   1353     // A table representing the unicode categories
   1354     //private static UBInfo[] UCategories = {
   1355     // Lu
   1356     // Ll
   1357     // Lt
   1358     // Lm
   1359     // Lo
   1360     // Mn
   1361     // Mc
   1362     // Me
   1363     // Nd
   1364     // Nl
   1365     // No
   1366     // Pc
   1367     // Pd
   1368     // Ps
   1369     // Pe
   1370     // Pi
   1371     // Pf
   1372     // Po
   1373     // Sm
   1374     // Sc
   1375     // Sk
   1376     // So
   1377     // Zs
   1378     // Zl
   1379     // Zp
   1380     // Cc
   1381     // Cf
   1382     // Cs
   1383     // Co
   1384     // Cn
   1385     //};
   1386 
   1387     // A table representing the unicode character blocks
   1388     private static UBInfo[] UBlocks = {
   1389     /* 0000; 007F; Basic Latin */
   1390     new UBInfo(0x0000, 0x007F, "BasicLatin"), // Character.UnicodeBlock.BASIC_LATIN
   1391             /* 0080; 00FF; Latin-1 Supplement */
   1392             new UBInfo(0x0080, 0x00FF, "Latin-1Supplement"), // Character.UnicodeBlock.LATIN_1_SUPPLEMENT
   1393             /* 0100; 017F; Latin Extended-A */
   1394             new UBInfo(0x0100, 0x017F, "LatinExtended-A"), // Character.UnicodeBlock.LATIN_EXTENDED_A
   1395             /* 0180; 024F; Latin Extended-B */
   1396             // new UBInfo (0x0180,0x024F,"InLatinExtended-B"), //
   1397             // Character.UnicodeBlock.LATIN_EXTENDED_B
   1398             /* 0250; 02AF; IPA Extensions */
   1399             new UBInfo(0x0250, 0x02AF, "IPAExtensions"), // Character.UnicodeBlock.IPA_EXTENSIONS
   1400             /* 02B0; 02FF; Spacing Modifier Letters */
   1401             new UBInfo(0x02B0, 0x02FF, "SpacingModifierLetters"), // Character.UnicodeBlock.SPACING_MODIFIER_LETTERS
   1402             /* 0300; 036F; Combining Diacritical Marks */
   1403             new UBInfo(0x0300, 0x036F, "CombiningDiacriticalMarks"), // Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS
   1404             /* 0370; 03FF; Greek */
   1405             new UBInfo(0x0370, 0x03FF, "Greek"), // Character.UnicodeBlock.GREEK
   1406             /* 0400; 04FF; Cyrillic */
   1407             new UBInfo(0x0400, 0x04FF, "Cyrillic"), // Character.UnicodeBlock.CYRILLIC
   1408             /* 0530; 058F; Armenian */
   1409             new UBInfo(0x0530, 0x058F, "Armenian"), // Character.UnicodeBlock.ARMENIAN
   1410             /* 0590; 05FF; Hebrew */
   1411             new UBInfo(0x0590, 0x05FF, "Hebrew"), // Character.UnicodeBlock.HEBREW
   1412             /* 0600; 06FF; Arabic */
   1413             new UBInfo(0x0600, 0x06FF, "Arabic"), // Character.UnicodeBlock.ARABIC
   1414             /* 0700; 074F; Syriac */
   1415             new UBInfo(0x0700, 0x074F, "Syriac"), // Character.UnicodeBlock.SYRIAC
   1416             /* 0780; 07BF; Thaana */
   1417             new UBInfo(0x0780, 0x07BF, "Thaana"), // Character.UnicodeBlock.THAANA
   1418             /* 0900; 097F; Devanagari */
   1419             new UBInfo(0x0900, 0x097F, "Devanagari"), // Character.UnicodeBlock.DEVANAGARI
   1420             /* 0980; 09FF; Bengali */
   1421             new UBInfo(0x0980, 0x09FF, "Bengali"), // Character.UnicodeBlock.BENGALI
   1422             /* 0A00; 0A7F; Gurmukhi */
   1423             new UBInfo(0x0A00, 0x0A7F, "Gurmukhi"), // Character.UnicodeBlock.GURMUKHI
   1424             /* 0A80; 0AFF; Gujarati */
   1425             new UBInfo(0x0A80, 0x0AFF, "Gujarati"), // Character.UnicodeBlock.GUJARATI
   1426             /* 0B00; 0B7F; Oriya */
   1427             new UBInfo(0x0B00, 0x0B7F, "Oriya"), // Character.UnicodeBlock.ORIYA
   1428             /* 0B80; 0BFF; Tamil */
   1429             new UBInfo(0x0B80, 0x0BFF, "Tamil"), // Character.UnicodeBlock.TAMIL
   1430             /* 0C00; 0C7F; Telugu */
   1431             new UBInfo(0x0C00, 0x0C7F, "Telugu"), // Character.UnicodeBlock.TELUGU
   1432             /* 0C80; 0CFF; Kannada */
   1433             new UBInfo(0x0C80, 0x0CFF, "Kannada"), // Character.UnicodeBlock.KANNADA
   1434             /* 0D00; 0D7F; Malayalam */
   1435             new UBInfo(0x0D00, 0x0D7F, "Malayalam"), // Character.UnicodeBlock.MALAYALAM
   1436             /* 0D80; 0DFF; Sinhala */
   1437             new UBInfo(0x0D80, 0x0DFF, "Sinhala"), // Character.UnicodeBlock.SINHALA
   1438             /* 0E00; 0E7F; Thai */
   1439             new UBInfo(0x0E00, 0x0E7F, "Thai"), // Character.UnicodeBlock.THAI
   1440             /* 0E80; 0EFF; Lao */
   1441             new UBInfo(0x0E80, 0x0EFF, "Lao"), // Character.UnicodeBlock.LAO
   1442             /* 0F00; 0FFF; Tibetan */
   1443             new UBInfo(0x0F00, 0x0FFF, "Tibetan"), // Character.UnicodeBlock.TIBETAN
   1444             /* 1000; 109F; Myanmar */
   1445             new UBInfo(0x1000, 0x109F, "Myanmar"), // Character.UnicodeBlock.MYANMAR
   1446             /* 10A0; 10FF; Georgian */
   1447             new UBInfo(0x10A0, 0x10FF, "Georgian"), // Character.UnicodeBlock.GEORGIAN
   1448             /* 1100; 11FF; Hangul Jamo */
   1449             new UBInfo(0x1100, 0x11FF, "HangulJamo"), // Character.UnicodeBlock.HANGUL_JAMO
   1450             /* 1200; 137F; Ethiopic */
   1451             new UBInfo(0x1200, 0x137F, "Ethiopic"), // Character.UnicodeBlock.ETHIOPIC
   1452             /* 13A0; 13FF; Cherokee */
   1453             new UBInfo(0x13A0, 0x13FF, "Cherokee"), // Character.UnicodeBlock.CHEROKEE
   1454             /* 1400; 167F; Unified Canadian Aboriginal Syllabics */
   1455             new UBInfo(0x1400, 0x167F, "UnifiedCanadianAboriginalSyllabics"), // Character.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
   1456             /* 1680; 169F; Ogham */
   1457             new UBInfo(0x1680, 0x169F, "Ogham"), // Character.UnicodeBlock.OGHAM
   1458             /* 16A0; 16FF; Runic */
   1459             new UBInfo(0x16A0, 0x16FF, "Runic"), // Character.UnicodeBlock.RUNIC
   1460             /* 1780; 17FF; Khmer */
   1461             new UBInfo(0x1780, 0x17FF, "Khmer"), // Character.UnicodeBlock.KHMER
   1462             /* 1800; 18AF; Mongolian */
   1463             new UBInfo(0x1800, 0x18AF, "Mongolian"), // Character.UnicodeBlock.MONGOLIAN
   1464             /* 1E00; 1EFF; Latin Extended Additional */
   1465             new UBInfo(0x1E00, 0x1EFF, "LatinExtendedAdditional"), // Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
   1466             /* 1F00; 1FFF; Greek Extended */
   1467             new UBInfo(0x1F00, 0x1FFF, "GreekExtended"), // Character.UnicodeBlock.GREEK_EXTENDED
   1468             /* 2000; 206F; General Punctuation */
   1469             new UBInfo(0x2000, 0x206F, "GeneralPunctuation"), // Character.UnicodeBlock.GENERAL_PUNCTUATION
   1470             /* 2070; 209F; Superscripts and Subscripts */
   1471             new UBInfo(0x2070, 0x209F, "SuperscriptsandSubscripts"), // Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS
   1472             /* 20A0; 20CF; Currency Symbols */
   1473             new UBInfo(0x20A0, 0x20CF, "CurrencySymbols"), // Character.UnicodeBlock.CURRENCY_SYMBOLS
   1474             /* 20D0; 20FF; Combining Marks for Symbols */
   1475             new UBInfo(0x20D0, 0x20FF, "CombiningMarksforSymbols"), // Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS
   1476             /* 2100; 214F; Letterlike Symbols */
   1477             new UBInfo(0x2100, 0x214F, "LetterlikeSymbols"), // Character.UnicodeBlock.LETTERLIKE_SYMBOLS
   1478             /* 2150; 218F; Number Forms */
   1479             new UBInfo(0x2150, 0x218F, "NumberForms"), // Character.UnicodeBlock.NUMBER_FORMS
   1480             /* 2190; 21FF; Arrows */
   1481             new UBInfo(0x2190, 0x21FF, "Arrows"), // Character.UnicodeBlock.ARROWS
   1482             /* 2200; 22FF; Mathematical Operators */
   1483             new UBInfo(0x2200, 0x22FF, "MathematicalOperators"), // Character.UnicodeBlock.MATHEMATICAL_OPERATORS
   1484             /* 2300; 23FF; Miscellaneous Technical */
   1485             new UBInfo(0x2300, 0x23FF, "MiscellaneousTechnical"), // Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL
   1486             /* 2400; 243F; Control Pictures */
   1487             new UBInfo(0x2400, 0x243F, "ControlPictures"), // Character.UnicodeBlock.CONTROL_PICTURES
   1488             /* 2440; 245F; Optical Character Recognition */
   1489             new UBInfo(0x2440, 0x245F, "OpticalCharacterRecognition"), // Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION
   1490             /* 2460; 24FF; Enclosed Alphanumerics */
   1491             new UBInfo(0x2460, 0x24FF, "EnclosedAlphanumerics"), // Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS
   1492             /* 2500; 257F; Box Drawing */
   1493             new UBInfo(0x2500, 0x257F, "BoxDrawing"), // Character.UnicodeBlock.BOX_DRAWING
   1494             /* 2580; 259F; Block Elements */
   1495             new UBInfo(0x2580, 0x259F, "BlockElements"), // Character.UnicodeBlock.BLOCK_ELEMENTS
   1496             /* 25A0; 25FF; Geometric Shapes */
   1497             new UBInfo(0x25A0, 0x25FF, "GeometricShapes"), // Character.UnicodeBlock.GEOMETRIC_SHAPES
   1498             /* 2600; 26FF; Miscellaneous Symbols */
   1499             new UBInfo(0x2600, 0x26FF, "MiscellaneousSymbols"), // Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS
   1500             /* 2700; 27BF; Dingbats */
   1501             new UBInfo(0x2700, 0x27BF, "Dingbats"), // Character.UnicodeBlock.DINGBATS
   1502             /* 2800; 28FF; Braille Patterns */
   1503             new UBInfo(0x2800, 0x28FF, "BraillePatterns"), // Character.UnicodeBlock.BRAILLE_PATTERNS
   1504             /* 2E80; 2EFF; CJK Radicals Supplement */
   1505             new UBInfo(0x2E80, 0x2EFF, "CJKRadicalsSupplement"), // Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT
   1506             /* 2F00; 2FDF; Kangxi Radicals */
   1507             new UBInfo(0x2F00, 0x2FDF, "KangxiRadicals"), // Character.UnicodeBlock.KANGXI_RADICALS
   1508             /* 2FF0; 2FFF; Ideographic Description Characters */
   1509             new UBInfo(0x2FF0, 0x2FFF, "IdeographicDescriptionCharacters"), // Character.UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS
   1510             /* 3000; 303F; CJK Symbols and Punctuation */
   1511             new UBInfo(0x3000, 0x303F, "CJKSymbolsandPunctuation"), // Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
   1512             /* 3040; 309F; Hiragana */
   1513             new UBInfo(0x3040, 0x309F, "Hiragana"), // Character.UnicodeBlock.HIRAGANA
   1514             /* 30A0; 30FF; Katakana */
   1515             new UBInfo(0x30A0, 0x30FF, "Katakana"), // Character.UnicodeBlock.KATAKANA
   1516             /* 3100; 312F; Bopomofo */
   1517             new UBInfo(0x3100, 0x312F, "Bopomofo"), // Character.UnicodeBlock.BOPOMOFO
   1518             /* 3130; 318F; Hangul Compatibility Jamo */
   1519             new UBInfo(0x3130, 0x318F, "HangulCompatibilityJamo"), // Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
   1520             /* 3190; 319F; Kanbun */
   1521             new UBInfo(0x3190, 0x319F, "Kanbun"), // Character.UnicodeBlock.KANBUN
   1522             /* 31A0; 31BF; Bopomofo Extended */
   1523             new UBInfo(0x31A0, 0x31BF, "BopomofoExtended"), // Character.UnicodeBlock.BOPOMOFO_EXTENDED
   1524             /* 3200; 32FF; Enclosed CJK Letters and Months */
   1525             new UBInfo(0x3200, 0x32FF, "EnclosedCJKLettersandMonths"), // Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS
   1526             /* 3300; 33FF; CJK Compatibility */
   1527             new UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY
   1528             /* 3400; 4DB5; CJK Unified Ideographs Extension A */
   1529 // BEGIN android-changed
   1530 // Modified this to reflect current Unicode tables (or maybe it was a typo)
   1531             new UBInfo(0x3400, 0x4DBF, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
   1532 // END android-changed
   1533             /* 4E00; 9FFF; CJK Unified Ideographs */
   1534             new UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
   1535             /* A000; A48F; Yi Syllables */
   1536             new UBInfo(0xA000, 0xA48F, "YiSyllables"), // Character.UnicodeBlock.YI_SYLLABLES
   1537             /* A490; A4CF; Yi Radicals */
   1538             new UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS
   1539             /* AC00; D7A3; Hangul Syllables */
   1540 // BEGIN android-changed
   1541 // Modified this to reflect current Unicode tables (or maybe it was a typo)
   1542             new UBInfo(0xAC00, 0xD7AF, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES
   1543 // END android-changed
   1544             /* D800; DB7F; High Surrogates */
   1545             /* DB80; DBFF; High Private Use Surrogates */
   1546             /* DC00; DFFF; Low Surrogates */
   1547             /* E000; F8FF; Private Use */
   1548             /* F900; FAFF; CJK Compatibility Ideographs */
   1549             new UBInfo(0xF900, 0xFAFF, "CJKCompatibilityIdeographs"), // Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
   1550             /* FB00; FB4F; Alphabetic Presentation Forms */
   1551             new UBInfo(0xFB00, 0xFB4F, "AlphabeticPresentationForms"), // Character.UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS
   1552             /* FB50; FDFF; Arabic Presentation Forms-A */
   1553             new UBInfo(0xFB50, 0xFDFF, "ArabicPresentationForms-A"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A
   1554             /* FE20; FE2F; Combining Half Marks */
   1555             new UBInfo(0xFE20, 0xFE2F, "CombiningHalfMarks"), // Character.UnicodeBlock.COMBINING_HALF_MARKS
   1556             /* FE30; FE4F; CJK Compatibility Forms */
   1557             new UBInfo(0xFE30, 0xFE4F, "CJKCompatibilityForms"), // Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS
   1558             /* FE50; FE6F; Small Form Variants */
   1559             new UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS
   1560             /* FE70; FEFE; Arabic Presentation Forms-B */
   1561             // new UBInfo (0xFE70,0xFEFE,"InArabicPresentationForms-B"), //
   1562             // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B
   1563             /* FEFF; FEFF; Specials */
   1564 // BEGIN android-changed
   1565 // Modified this to reflect current Unicode tables (or maybe it was a typo)
   1566 // FEFF is actually still Arabic Presentation Forms B
   1567 //            new UBInfo(0xFEFF, 0xFEFF, "Specials"), // Character.UnicodeBlock.SPECIALS
   1568 // END android-changed
   1569             /* FF00; FFEF; Halfwidth and Fullwidth Forms */
   1570             new UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
   1571             /* FFF0; FFFD; Specials */
   1572             // BEGIN android-changed
   1573 // Modified this to reflect current Unicode tables (or maybe it was a typo)
   1574             new UBInfo(0xFFF0, 0xFFFF, "Specials") // Character.UnicodeBlock.SPECIALS
   1575 // END android-changed
   1576     };
   1577 }
   1578