Home | History | Annotate | Download | only in regex
      1 /*
      2  *  Licensed to the Apache Software Foundation (ASF) under one or more
      3  *  contributor license agreements.  See the NOTICE file distributed with
      4  *  this work for additional information regarding copyright ownership.
      5  *  The ASF licenses this file to You under the Apache License, Version 2.0
      6  *  (the "License"); you may not use this file except in compliance with
      7  *  the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  *  Unless required by applicable law or agreed to in writing, software
     12  *  distributed under the License is distributed on an "AS IS" BASIS,
     13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  *  See the License for the specific language governing permissions and
     15  *  limitations under the License.
     16  */
     17 
     18 package org.apache.harmony.regex.tests.java.util.regex;
     19 
     20 import java.io.Serializable;
     21 import java.util.regex.Matcher;
     22 import java.util.regex.Pattern;
     23 import java.util.regex.PatternSyntaxException;
     24 
     25 import junit.framework.TestCase;
     26 
     27 import org.apache.harmony.testframework.serialization.SerializationTest;
     28 import org.apache.harmony.testframework.serialization.SerializationTest.SerializableAssert;
     29 
     30 public class PatternTest extends TestCase {
     31     String[] testPatterns = {
     32             "(a|b)*abb",
     33             "(1*2*3*4*)*567",
     34             "(a|b|c|d)*aab",
     35             "(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*",
     36             "(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*",
     37             "(a|b)*(a|b)*A(a|b)*lice.*",
     38             "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|"
     39                     + "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do",
     40 // BEGIN Android-changed
     41 // We don't have canonical equivalence.
     42 //            "x(?c)y", "x(?cc)y"
     43 //            "x(?:c)y"
     44 // END Android-changed
     45 
     46     };
     47 
     48     String[] testPatternsAlt = {
     49             /*
     50              * According to JavaDoc 2 and 3 oct digit sequences like \\o70\\o347
     51              * should be OK, but test is failed for them
     52              */
     53             "[ab]\\b\\\\o5\\xF9\\u1E7B\\t\\n\\f\\r\\a\\e[yz]",
     54             "^\\p{Lower}*\\p{Upper}*\\p{ASCII}?\\p{Alpha}?\\p{Digit}*\\p{Alnum}\\p{Punct}\\p{Graph}\\p{Print}\\p{Blank}\\p{Cntrl}\\p{XDigit}\\p{Space}",
     55             "$\\p{javaLowerCase}\\p{javaUpperCase}\\p{javaWhitespace}\\p{javaMirrored}",
     56             "\\p{InGreek}\\p{Lu}\\p{Sc}\\P{InGreek}[\\p{L}&&[^\\p{Lu}]]" };
     57 
     58     String[] wrongTestPatterns = { "\\o9A", "\\p{Lawer}", "\\xG0" };
     59 
     60     final static int[] flagsSet = { Pattern.CASE_INSENSITIVE,
     61             Pattern.MULTILINE, Pattern.DOTALL, Pattern.UNICODE_CASE
     62             /* , Pattern.CANON_EQ */ };
     63 
     64     /*
     65      * Based on RI implenetation documents. Need to check this set regarding
     66      * actual implementation.
     67      */
     68     final static int[] wrongFlagsSet = { 256, 512, 1024 };
     69 
     70     final static int DEFAULT_FLAGS = 0;
     71 
     72     public void testMatcher() {
     73         // some very simple test
     74         Pattern p = Pattern.compile("a");
     75         assertNotNull(p.matcher("bcde"));
     76         assertNotSame(p.matcher("a"), p.matcher("a"));
     77     }
     78 
     79     public void testSplitCharSequenceInt() {
     80         // splitting CharSequence which ends with pattern
     81         // bug6193
     82         assertEquals(",,".split(",", 3).length, 3);
     83         assertEquals(",,".split(",", 4).length, 3);
     84         // bug6193
     85         // bug5391
     86         assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5);
     87         assertEquals(Pattern.compile("b").split("ab", -1).length, 2);
     88         // bug5391
     89         String s[];
     90         Pattern pat = Pattern.compile("x");
     91         s = pat.split("zxx:zzz:zxx", 10);
     92         assertEquals(s.length, 5);
     93         s = pat.split("zxx:zzz:zxx", 3);
     94         assertEquals(s.length, 3);
     95         s = pat.split("zxx:zzz:zxx", -1);
     96         assertEquals(s.length, 5);
     97         s = pat.split("zxx:zzz:zxx", 0);
     98         assertEquals(s.length, 3);
     99         // other splitting
    100         // negative limit
    101         pat = Pattern.compile("b");
    102         s = pat.split("abccbadfebb", -1);
    103         assertEquals(s.length, 5);
    104         s = pat.split("", -1);
    105         assertEquals(s.length, 1);
    106         pat = Pattern.compile("");
    107         s = pat.split("", -1);
    108         assertEquals(s.length, 1);
    109         s = pat.split("abccbadfe", -1);
    110         assertEquals(s.length, 11);
    111         // zero limit
    112         pat = Pattern.compile("b");
    113         s = pat.split("abccbadfebb", 0);
    114         assertEquals(s.length, 3);
    115         s = pat.split("", 0);
    116         assertEquals(s.length, 1);
    117         pat = Pattern.compile("");
    118         s = pat.split("", 0);
    119         assertEquals(s.length, 1);
    120         s = pat.split("abccbadfe", 0);
    121         assertEquals(s.length, 10);
    122         // positive limit
    123         pat = Pattern.compile("b");
    124         s = pat.split("abccbadfebb", 12);
    125         assertEquals(s.length, 5);
    126         s = pat.split("", 6);
    127         assertEquals(s.length, 1);
    128         pat = Pattern.compile("");
    129         s = pat.split("", 11);
    130         assertEquals(s.length, 1);
    131         s = pat.split("abccbadfe", 15);
    132         assertEquals(s.length, 11);
    133 
    134         pat = Pattern.compile("b");
    135         s = pat.split("abccbadfebb", 5);
    136         assertEquals(s.length, 5);
    137         s = pat.split("", 1);
    138         assertEquals(s.length, 1);
    139         pat = Pattern.compile("");
    140         s = pat.split("", 1);
    141         assertEquals(s.length, 1);
    142         s = pat.split("abccbadfe", 11);
    143         assertEquals(s.length, 11);
    144 
    145         pat = Pattern.compile("b");
    146         s = pat.split("abccbadfebb", 3);
    147         assertEquals(s.length, 3);
    148         pat = Pattern.compile("");
    149         s = pat.split("abccbadfe", 5);
    150         assertEquals(s.length, 5);
    151     }
    152 
    153     public void testSplitCharSequence() {
    154         String s[];
    155         Pattern pat = Pattern.compile("b");
    156         s = pat.split("abccbadfebb");
    157         assertEquals(s.length, 3);
    158         s = pat.split("");
    159         assertEquals(s.length, 1);
    160         pat = Pattern.compile("");
    161         s = pat.split("");
    162         assertEquals(s.length, 1);
    163         s = pat.split("abccbadfe");
    164         assertEquals(s.length, 10);
    165         // bug6544
    166         String s1 = "";
    167         String[] arr = s1.split(":");
    168         assertEquals(arr.length, 1);
    169         // bug6544
    170     }
    171 
    172     public void testPattern() {
    173         /* Positive assertion test. */
    174         for (String aPattern : testPatterns) {
    175             Pattern p = Pattern.compile(aPattern);
    176             try {
    177                 assertTrue(p.pattern().equals(aPattern));
    178             } catch (Exception e) {
    179                 fail("Unexpected exception: " + e);
    180             }
    181         }
    182     }
    183 
    184     public void testCompile() {
    185         /* Positive assertion test. */
    186         for (String aPattern : testPatterns) {
    187             try {
    188                 Pattern p = Pattern.compile(aPattern);
    189             } catch (Exception e) {
    190                 fail("Unexpected exception: " + e);
    191             }
    192         }
    193 
    194         /* Positive assertion test with alternative templates. */
    195         for (String aPattern : testPatternsAlt) {
    196             try {
    197                 Pattern p = Pattern.compile(aPattern);
    198             } catch (Exception e) {
    199                 fail("Unexpected exception: " + e);
    200             }
    201         }
    202 
    203         /* Negative assertion test. */
    204         for (String aPattern : wrongTestPatterns) {
    205             try {
    206                 Pattern p = Pattern.compile(aPattern);
    207                 fail("PatternSyntaxException is expected");
    208             } catch (PatternSyntaxException pse) {
    209                 /* OKAY */
    210             } catch (Exception e) {
    211                 fail("Unexpected exception: " + e);
    212             }
    213         }
    214     }
    215 
    216     public void testFlags() {
    217         String baseString;
    218         String testString;
    219         Pattern pat;
    220         Matcher mat;
    221 
    222         baseString = "((?i)|b)a";
    223         testString = "A";
    224         pat = Pattern.compile(baseString);
    225         mat = pat.matcher(testString);
    226         assertFalse(mat.matches());
    227 
    228         baseString = "(?i)a|b";
    229         testString = "A";
    230         pat = Pattern.compile(baseString);
    231         mat = pat.matcher(testString);
    232         assertTrue(mat.matches());
    233 
    234         baseString = "(?i)a|b";
    235         testString = "B";
    236         pat = Pattern.compile(baseString);
    237         mat = pat.matcher(testString);
    238         assertTrue(mat.matches());
    239 
    240         baseString = "c|(?i)a|b";
    241         testString = "B";
    242         pat = Pattern.compile(baseString);
    243         mat = pat.matcher(testString);
    244         assertTrue(mat.matches());
    245 
    246         baseString = "(?i)a|(?s)b";
    247         testString = "B";
    248         pat = Pattern.compile(baseString);
    249         mat = pat.matcher(testString);
    250         assertTrue(mat.matches());
    251 
    252         baseString = "(?i)a|(?-i)b";
    253         testString = "B";
    254         pat = Pattern.compile(baseString);
    255         mat = pat.matcher(testString);
    256         assertFalse(mat.matches());
    257 
    258         baseString = "(?i)a|(?-i)c|b";
    259         testString = "B";
    260         pat = Pattern.compile(baseString);
    261         mat = pat.matcher(testString);
    262         assertFalse(mat.matches());
    263 
    264         baseString = "(?i)a|(?-i)c|(?i)b";
    265         testString = "B";
    266         pat = Pattern.compile(baseString);
    267         mat = pat.matcher(testString);
    268         assertTrue(mat.matches());
    269 
    270         baseString = "(?i)a|(?-i)b";
    271         testString = "A";
    272         pat = Pattern.compile(baseString);
    273         mat = pat.matcher(testString);
    274         assertTrue(mat.matches());
    275 
    276         baseString = "((?i))a";
    277         testString = "A";
    278         pat = Pattern.compile(baseString);
    279         mat = pat.matcher(testString);
    280         assertFalse(mat.matches());
    281 
    282         baseString = "|(?i)|a";
    283         testString = "A";
    284         pat = Pattern.compile(baseString);
    285         mat = pat.matcher(testString);
    286         assertTrue(mat.matches());
    287 
    288         baseString = "(?i)((?s)a.)";
    289         testString = "A\n";
    290         pat = Pattern.compile(baseString);
    291         mat = pat.matcher(testString);
    292         assertTrue(mat.matches());
    293 
    294         baseString = "(?i)((?-i)a)";
    295         testString = "A";
    296         pat = Pattern.compile(baseString);
    297         mat = pat.matcher(testString);
    298         assertFalse(mat.matches());
    299 
    300         baseString = "(?i)(?s:a.)";
    301         testString = "A\n";
    302         pat = Pattern.compile(baseString);
    303         mat = pat.matcher(testString);
    304         assertTrue(mat.matches());
    305 
    306         baseString = "(?i)fgh(?s:aa)";
    307         testString = "fghAA";
    308         pat = Pattern.compile(baseString);
    309         mat = pat.matcher(testString);
    310         assertTrue(mat.matches());
    311 
    312         baseString = "(?i)((?-i))a";
    313         testString = "A";
    314         pat = Pattern.compile(baseString);
    315         mat = pat.matcher(testString);
    316         assertTrue(mat.matches());
    317 
    318         baseString = "abc(?i)d";
    319         testString = "ABCD";
    320         pat = Pattern.compile(baseString);
    321         mat = pat.matcher(testString);
    322         assertFalse(mat.matches());
    323 
    324         testString = "abcD";
    325         mat = pat.matcher(testString);
    326         assertTrue(mat.matches());
    327 
    328         baseString = "a(?i)a(?-i)a(?i)a(?-i)a";
    329         testString = "aAaAa";
    330         pat = Pattern.compile(baseString);
    331         mat = pat.matcher(testString);
    332         assertTrue(mat.matches());
    333 
    334         testString = "aAAAa";
    335         mat = pat.matcher(testString);
    336         assertFalse(mat.matches());
    337     }
    338 
    339 // BEGIN Android-removed
    340 // The flags() method should only return those flags that were explicitly
    341 // passed during the compilation. The JDK also accepts the ones implicitly
    342 // contained in the pattern, but ICU doesn't do this.
    343 //
    344 //    public void testFlagsMethod() {
    345 //        String baseString;
    346 //        Pattern pat;
    347 //
    348 //        /*
    349 //         * These tests are for compatibility with RI only. Logically we have to
    350 //         * return only flags specified during the compilation. For example
    351 //         * pat.flags() == 0 when we compile Pattern pat =
    352 //         * Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled
    353 //         * in a case insensitive manner. So there is little sense to do calls to
    354 //         * flags() now.
    355 //         */
    356 //        baseString = "(?-i)";
    357 //        pat = Pattern.compile(baseString);
    358 //
    359 //        baseString = "(?idmsux)abc(?-i)vg(?-dmu)";
    360 //        pat = Pattern.compile(baseString);
    361 //        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
    362 //
    363 //        baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)";
    364 //        pat = Pattern.compile(baseString);
    365 //        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
    366 //
    367 //        baseString = "(?is)a((?x)b.)";
    368 //        pat = Pattern.compile(baseString);
    369 //        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
    370 //
    371 //        baseString = "(?i)a((?-i))";
    372 //        pat = Pattern.compile(baseString);
    373 //        assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE);
    374 //
    375 //        baseString = "((?i)a)";
    376 //        pat = Pattern.compile(baseString);
    377 //        assertEquals(pat.flags(), 0);
    378 //
    379 //        pat = Pattern.compile("(?is)abc");
    380 //        assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
    381 //    }
    382 //END Android-removed
    383 
    384     /*
    385      * Check default flags when they are not specified in pattern. Based on RI
    386      * since could not find that info
    387      */
    388     public void testFlagsCompileDefault() {
    389         for (String pat : testPatternsAlt) {
    390             try {
    391                 Pattern p = Pattern.compile(pat);
    392                 assertEquals(p.flags(), DEFAULT_FLAGS);
    393             } catch (Exception e) {
    394                 fail("Unexpected exception: " + e);
    395             }
    396         }
    397     }
    398 
    399     /*
    400      * Check that flags specified during compile are set properly This is a
    401      * simple implementation that does not use flags combinations. Need to
    402      * improve.
    403      */
    404     public void testFlagsCompileValid() {
    405         for (String pat : testPatternsAlt) {
    406             for (int flags : flagsSet) {
    407                 try {
    408                     Pattern p = Pattern.compile(pat, flags);
    409                     assertEquals(p.flags(), flags);
    410                 } catch (Exception e) {
    411                     fail("Unexpected exception: " + e);
    412                 }
    413             }
    414         }
    415     }
    416 
    417     public void testCompileStringInt() {
    418         /*
    419          * these tests are needed to verify that appropriate exceptions are
    420          * thrown
    421          */
    422         String pattern = "b)a";
    423         try {
    424             Pattern.compile(pattern);
    425             fail("Expected a PatternSyntaxException when compiling pattern: "
    426                     + pattern);
    427         } catch (PatternSyntaxException e) {
    428             // pass
    429         }
    430         pattern = "bcde)a";
    431         try {
    432             Pattern.compile(pattern);
    433             fail("Expected a PatternSyntaxException when compiling pattern: "
    434                     + pattern);
    435         } catch (PatternSyntaxException e) {
    436             // pass
    437         }
    438         pattern = "bbg())a";
    439         try {
    440             Pattern pat = Pattern.compile(pattern);
    441             fail("Expected a PatternSyntaxException when compiling pattern: "
    442                     + pattern);
    443         } catch (PatternSyntaxException e) {
    444             // pass
    445         }
    446 
    447         pattern = "cdb(?i))a";
    448         try {
    449             Pattern pat = Pattern.compile(pattern);
    450             fail("Expected a PatternSyntaxException when compiling pattern: "
    451                     + pattern);
    452         } catch (PatternSyntaxException e) {
    453             // pass
    454         }
    455 
    456         /*
    457          * This pattern should compile - HARMONY-2127
    458          * icu4c doesn't support canonical equivalence.
    459          */
    460 //        pattern = "x(?c)y";
    461 //        Pattern.compile(pattern);
    462 
    463         /*
    464          * this pattern doesn't match any string, but should be compiled anyway
    465          */
    466         pattern = "(b\\1)a";
    467         Pattern.compile(pattern);
    468     }
    469 
    470     /*
    471      * Class under test for Pattern compile(String)
    472      */
    473     public void testQuantCompileNeg() {
    474         String[] patterns = { "5{,2}", "{5asd", "{hgdhg", "{5,hjkh", "{,5hdsh",
    475                 "{5,3shdfkjh}" };
    476         for (String element : patterns) {
    477             try {
    478                 Pattern.compile(element);
    479                 fail("PatternSyntaxException was expected, but compilation succeeds");
    480             } catch (PatternSyntaxException pse) {
    481                 continue;
    482             }
    483         }
    484         // Regression for HARMONY-1365
    485 // BEGIN Android-changed
    486 // Original regex contained some illegal stuff. Changed it slightly,
    487 // while maintaining the wicked character of this "mother of all
    488 // regexes".
    489 //        String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\G*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\B*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]*+)|(?x-xd:^{5}+)()";
    490         String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\.*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\.*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]{1,5})|(?x-xd:^{5}+)()";
    491 // END Android-changed
    492         assertNotNull(Pattern.compile(pattern));
    493     }
    494 
    495     public void testQuantCompilePos() {
    496         String[] patterns = {/* "(abc){1,3}", */"abc{2,}", "abc{5}" };
    497         for (String element : patterns) {
    498             Pattern.compile(element);
    499         }
    500     }
    501 
    502     public void testQuantComposition() {
    503         String pattern = "(a{1,3})aab";
    504         java.util.regex.Pattern pat = java.util.regex.Pattern.compile(pattern);
    505         java.util.regex.Matcher mat = pat.matcher("aaab");
    506         mat.matches();
    507         mat.start(1);
    508         mat.group(1);
    509     }
    510 
    511     public void testMatches() {
    512         String[][] posSeq = {
    513                 { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
    514                 { "213567", "12324567", "1234567", "213213567",
    515                         "21312312312567", "444444567" },
    516                 { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
    517                 { "213234567", "3458", "0987654", "7689546432", "0398576",
    518                         "98432", "5" },
    519                 {
    520                         "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
    521                         "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    522                                 + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
    523                 { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
    524                         "abbbAbbbliceaaa", "Alice" },
    525                 { "a123", "bnxnvgds156", "for", "while", "if", "struct" },
    526                 { "xy" }, { "xy" }, { "xcy" }
    527 
    528         };
    529 
    530         for (int i = 0; i < testPatterns.length; i++) {
    531             for (int j = 0; j < posSeq[i].length; j++) {
    532                 assertTrue("Incorrect match: " + testPatterns[i] + " vs "
    533                         + posSeq[i][j], Pattern.matches(testPatterns[i],
    534                         posSeq[i][j]));
    535             }
    536         }
    537     }
    538 
    539     public void testMatchesException() {
    540         /* Negative assertion test. */
    541         for (String aPattern : wrongTestPatterns) {
    542             try {
    543                 Pattern.matches(aPattern, "Foo");
    544                 fail("PatternSyntaxException is expected");
    545             } catch (PatternSyntaxException pse) {
    546                 /* OKAY */
    547             } catch (Exception e) {
    548                 fail("Unexpected exception: " + e);
    549             }
    550         }
    551     }
    552 
    553     public void testTimeZoneIssue() {
    554         Pattern p = Pattern.compile("GMT(\\+|\\-)(\\d+)(:(\\d+))?");
    555         Matcher m = p.matcher("GMT-9:45");
    556         assertTrue(m.matches());
    557         assertEquals("-", m.group(1));
    558         assertEquals("9", m.group(2));
    559         assertEquals(":45", m.group(3));
    560         assertEquals("45", m.group(4));
    561     }
    562 
    563 // BEGIN Android-changed
    564 // Removed one pattern that is buggy on the JDK. We don't want to duplicate that.
    565     public void testCompileRanges() {
    566         String[] correctTestPatterns = { "[^]*abb]*", /* "[^a-d[^m-p]]*abb", */
    567                 "[a-d\\d]*abb", "[abc]*abb", "[a-e&&[de]]*abb", "[^abc]*abb",
    568                 "[a-e&&[^de]]*abb", "[a-z&&[^m-p]]*abb", "[a-d[m-p]]*abb",
    569                 "[a-zA-Z]*abb", "[+*?]*abb", "[^+*?]*abb" };
    570 
    571         String[] inputSecuence = { "kkkk", /* "admpabb", */ "abcabcd124654abb",
    572                 "abcabccbacababb", "dededededededeedabb", "gfdhfghgdfghabb",
    573                 "accabacbcbaabb", "acbvfgtyabb", "adbcacdbmopabcoabb",
    574                 "jhfkjhaSDFGHJkdfhHNJMjkhfabb", "+*??+*abb", "sdfghjkabb" };
    575 
    576         Pattern pat;
    577 
    578         for (int i = 0; i < correctTestPatterns.length; i++) {
    579             assertTrue("pattern: " + correctTestPatterns[i] + " input: "
    580                     + inputSecuence[i], Pattern.matches(correctTestPatterns[i],
    581                     inputSecuence[i]));
    582 
    583         }
    584 
    585         String[] wrongInputSecuence = { "]", /* "admpkk", */  "abcabcd124k654abb",
    586                 "abwcabccbacababb", "abababdeababdeabb", "abcabcacbacbabb",
    587                 "acdcbecbaabb", "acbotyabb", "adbcaecdbmopabcoabb",
    588                 "jhfkjhaSDFGHJk;dfhHNJMjkhfabb", "+*?a?+*abb", "sdf+ghjkabb" };
    589 
    590         for (int i = 0; i < correctTestPatterns.length; i++) {
    591             assertFalse("pattern: " + correctTestPatterns[i] + " input: "
    592                     + wrongInputSecuence[i], Pattern.matches(
    593                     correctTestPatterns[i], wrongInputSecuence[i]));
    594 
    595         }
    596     }
    597 
    598     public void testRangesSpecialCases() {
    599         String neg_patterns[] = { "[a-&&[b-c]]", "[a-\\w]", "[b-a]", "[]" };
    600 
    601         for (String element : neg_patterns) {
    602             try {
    603                 Pattern.compile(element);
    604                 fail("PatternSyntaxException was expected: " + element);
    605             } catch (PatternSyntaxException pse) {
    606             }
    607         }
    608 
    609         String pos_patterns[] = { "[-]+", "----", "[a-]+", "a-a-a-a-aa--",
    610                 "[\\w-a]+", "123-2312--aaa-213", "[a-]]+", "-]]]]]]]]]]]]]]]" };
    611 
    612         for (int i = 0; i < pos_patterns.length; i++) {
    613             String pat = pos_patterns[i++];
    614             String inp = pos_patterns[i];
    615             assertTrue("pattern: " + pat + " input: " + inp, Pattern.matches(
    616                     pat, inp));
    617         }
    618     }
    619  // END Android-changed
    620 
    621     public void testZeroSymbols() {
    622         assertTrue(Pattern.matches("[\0]*abb", "\0\0\0\0\0\0abb"));
    623     }
    624 
    625     public void testEscapes() {
    626         Pattern pat = Pattern.compile("\\Q{]()*?");
    627         Matcher mat = pat.matcher("{]()*?");
    628 
    629         assertTrue(mat.matches());
    630     }
    631 
    632     public void test_bug_181() {
    633         Pattern.compile("[\\t-\\r]");
    634     }
    635 
    636     // https://code.google.com/p/android/issues/detail?id=40103
    637     public void test_bug_40103() {
    638         Pattern.compile("(?<!abc {1,100}|def {1,100}|ghi {1,100})jkl");
    639 
    640         // Looks like harmony had a similar "Bug187"...
    641         Pattern.compile("|(?idmsux-idmsux)|(?idmsux-idmsux)|[^|\\[-\\0274|\\,-\\\\[^|W\\}\\nq\\x65\\002\\xFE\\05\\06\\00\\x66\\x47i\\,\\xF2\\=\\06\\u0EA4\\x9B\\x3C\\f\\|\\{\\xE5\\05\\r\\u944A\\xCA\\e|\\x19\\04\\x07\\04\\u607B\\023\\0073\\x91Tr\\0150\\x83]]?(?idmsux-idmsux:\\p{Alpha}{7}?)||(?<=[^\\uEC47\\01\\02\\u3421\\a\\f\\a\\013q\\035w\\e])(?<=\\p{Punct}{0,}?)(?=^\\p{Lower})(?!\\b{8,14})(?<![|\\00-\\0146[^|\\04\\01\\04\\060\\f\\u224DO\\x1A\\xC4\\00\\02\\0315\\0351\\u84A8\\xCBt\\xCC\\06|\\0141\\00\\=\\e\\f\\x6B\\0026Tb\\040\\x76xJ&&[\\\\-\\]\\05\\07\\02\\u2DAF\\t\\x9C\\e\\0023\\02\\,X\\e|\\u6058flY\\u954C]]]{5}?)(?<=\\p{Sc}{8}+)[^|\\026-\\u89BA|o\\u6277\\t\\07\\x50&&\\p{Punct}]{8,14}+((?<=^\\p{Punct})|(?idmsux-idmsux)||(?>[\\x3E-\\]])|(?idmsux-idmsux:\\p{Punct})|(?<![\\0111\\0371\\xDF\\u6A49\\07\\u2A4D\\00\\0212\\02Xd-\\xED[^\\a-\\0061|\\0257\\04\\f\\[\\0266\\043\\03\\x2D\\042&&[^\\f-\\]&&\\s]]])|(?>[|\\n\\042\\uB09F\\06\\u0F2B\\uC96D\\x89\\uC166\\xAA|\\04-\\][^|\\a\\|\\rx\\04\\uA770\\n\\02\\t\\052\\056\\0274\\|\\=\\07\\e|\\00-\\x1D&&[^\\005\\uB15B\\uCDAC\\n\\x74\\0103\\0147\\uD91B\\n\\062G\\u9B4B\\077\\}\\0324&&[^\\0302\\,\\0221\\04\\u6D16\\04xy\\uD193\\[\\061\\06\\045\\x0F|\\e\\xBB\\f\\u1B52\\023\\u3AD2\\033\\007\\022\\}\\x66\\uA63FJ-\\0304]]]]{0,0})||(?<![^|\\0154U\\u0877\\03\\fy\\n\\|\\0147\\07-\\=[|q\\u69BE\\0243\\rp\\053\\02\\x33I\\u5E39\\u9C40\\052-\\xBC[|\\0064-\\?|\\uFC0C\\x30\\0060\\x45\\\\\\02\\?p\\xD8\\0155\\07\\0367\\04\\uF07B\\000J[^|\\0051-\\{|\\u9E4E\\u7328\\]\\u6AB8\\06\\x71\\a\\]\\e\\|KN\\u06AA\\0000\\063\\u2523&&[\\005\\0277\\x41U\\034\\}R\\u14C7\\u4767\\x09\\n\\054Ev\\0144\\<\\f\\,Q-\\xE4]]]]]{3}+)|(?>^+)|(?![^|\\|\\nJ\\t\\<\\04E\\\\\\t\\01\\\\\\02\\|\\=\\}\\xF3\\uBEC2\\032K\\014\\uCC5F\\072q\\|\\0153\\xD9\\0322\\uC6C8[^\\t\\0342\\x34\\x91\\06\\{\\xF1\\a\\u1710\\?\\xE7\\uC106\\02pF\\<&&[^|\\]\\064\\u381D\\u50CF\\eO&&[^|\\06\\x2F\\04\\045\\032\\u8536W\\0377\\0017|\\x06\\uE5FA\\05\\xD4\\020\\04c\\xFC\\02H\\x0A\\r]]]]+?)(?idmsux-idmsux)|(?<![|\\r-\\,&&[I\\t\\r\\0201\\xDB\\e&&[^|\\02\\06\\00\\<\\a\\u7952\\064\\051\\073\\x41\\?n\\040\\0053\\031&&[\\x15-\\|]]]]{8,11}?)(?![^|\\<-\\uA74B\\xFA\\u7CD2\\024\\07n\\<\\x6A\\0042\\uE4FF\\r\\u896B\\[\\=\\042Y&&^\\p{ASCII}]++)|(?<![R-\\|&&[\\a\\0120A\\u6145\\<\\050-d[|\\e-\\uA07C|\\016-\\u80D9]]]{1,}+)|(?idmsux-idmsux)|(?idmsux-idmsux)|(?idmsux-idmsux:\\B{6,}?)|(?<=\\D{5,8}?)|(?>[\\{-\\0207|\\06-\\0276\\p{XDigit}])(?idmsux-idmsux:[^|\\x52\\0012\\]u\\xAD\\0051f\\0142\\\\l\\|\\050\\05\\f\\t\\u7B91\\r\\u7763\\{|h\\0104\\a\\f\\0234\\u2D4F&&^\\P{InGreek}]))");
    642     }
    643 
    644     public void test_bug_4472() {
    645         // HARMONY-4472
    646         Pattern.compile("a*.+");
    647     }
    648 
    649     public void test_bug_5858() {
    650         // HARMONY-5858
    651         Pattern.compile("\\u6211", Pattern.LITERAL);
    652     }
    653 
    654     public void testOrphanQuantifiers() {
    655         try {
    656             Pattern.compile("+++++");
    657             fail("PatternSyntaxException expected");
    658         } catch (PatternSyntaxException pse) {
    659         }
    660     }
    661 
    662     public void testOrphanQuantifiers2() {
    663         try {
    664             Pattern pat = Pattern.compile("\\d+*");
    665             fail("PatternSyntaxException expected");
    666         } catch (PatternSyntaxException pse) {
    667         }
    668     }
    669 
    670     public void testBug197() {
    671         Object[] vals = { ":", new Integer(2),
    672                 new String[] { "boo", "and:foo" }, ":", new Integer(5),
    673                 new String[] { "boo", "and", "foo" }, ":", new Integer(-2),
    674                 new String[] { "boo", "and", "foo" }, ":", new Integer(3),
    675                 new String[] { "boo", "and", "foo" }, ":", new Integer(1),
    676                 new String[] { "boo:and:foo" }, "o", new Integer(5),
    677                 new String[] { "b", "", ":and:f", "", "" }, "o",
    678                 new Integer(4), new String[] { "b", "", ":and:f", "o" }, "o",
    679                 new Integer(-2), new String[] { "b", "", ":and:f", "", "" },
    680                 "o", new Integer(0), new String[] { "b", "", ":and:f" } };
    681 
    682         for (int i = 0; i < vals.length / 3;) {
    683             String[] res = Pattern.compile(vals[i++].toString()).split(
    684                     "boo:and:foo", ((Integer) vals[i++]).intValue());
    685             String[] expectedRes = (String[]) vals[i++];
    686 
    687             assertEquals(expectedRes.length, res.length);
    688 
    689             for (int j = 0; j < expectedRes.length; j++) {
    690                 assertEquals(expectedRes[j], res[j]);
    691             }
    692         }
    693     }
    694 
    695     public void testURIPatterns() {
    696         String URI_REGEXP_STR = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
    697         String SCHEME_REGEXP_STR = "^[a-zA-Z]{1}[\\w+-.]+$";
    698         String REL_URI_REGEXP_STR = "^(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
    699         String IPV6_REGEXP_STR = "^[0-9a-fA-F\\:\\.]+(\\%\\w+)?$";
    700         String IPV6_REGEXP_STR2 = "^\\[[0-9a-fA-F\\:\\.]+(\\%\\w+)?\\]$";
    701         String IPV4_REGEXP_STR = "^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$";
    702         String HOSTNAME_REGEXP_STR = "\\w+[\\w\\-\\.]*";
    703 
    704         Pattern.compile(URI_REGEXP_STR);
    705         Pattern.compile(REL_URI_REGEXP_STR);
    706         Pattern.compile(SCHEME_REGEXP_STR);
    707         Pattern.compile(IPV4_REGEXP_STR);
    708         Pattern.compile(IPV6_REGEXP_STR);
    709         Pattern.compile(IPV6_REGEXP_STR2);
    710         Pattern.compile(HOSTNAME_REGEXP_STR);
    711     }
    712 
    713     public void testFindBoundaryCases1() {
    714         Pattern pat = Pattern.compile(".*\n");
    715         Matcher mat = pat.matcher("a\n");
    716 
    717         mat.find();
    718         assertEquals("a\n", mat.group());
    719     }
    720 
    721     public void testFindBoundaryCases2() {
    722         Pattern pat = Pattern.compile(".*A");
    723         Matcher mat = pat.matcher("aAa");
    724 
    725         mat.find();
    726         assertEquals("aA", mat.group());
    727     }
    728 
    729     public void testFindBoundaryCases3() {
    730         Pattern pat = Pattern.compile(".*A");
    731         Matcher mat = pat.matcher("a\naA\n");
    732 
    733         mat.find();
    734         assertEquals("aA", mat.group());
    735     }
    736 
    737     public void testFindBoundaryCases4() {
    738         Pattern pat = Pattern.compile("A.*");
    739         Matcher mat = pat.matcher("A\n");
    740 
    741         mat.find();
    742         assertEquals("A", mat.group());
    743     }
    744 
    745     public void testFindBoundaryCases5() {
    746         Pattern pat = Pattern.compile(".*A.*");
    747         Matcher mat = pat.matcher("\nA\naaa\nA\naaAaa\naaaA\n");
    748         // Matcher mat = pat.matcher("\nA\n");
    749         String[] res = { "A", "A", "aaAaa", "aaaA" };
    750         int k = 0;
    751         for (; mat.find(); k++) {
    752             assertEquals(res[k], mat.group());
    753         }
    754     }
    755 
    756     public void testFindBoundaryCases6() {
    757         String[] res = { "", "a", "", "" };
    758         Pattern pat = Pattern.compile(".*");
    759         Matcher mat = pat.matcher("\na\n");
    760         int k = 0;
    761         for (; mat.find(); k++) {
    762             assertEquals(res[k], mat.group());
    763         }
    764         assertEquals(4, k);
    765     }
    766 
    767     public void testBackReferences() {
    768         Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))");
    769         Matcher mat = pat.matcher("(start1: word :start1)(start2: word :start2)");
    770         int k = 1;
    771         for (; mat.find(); k++) {
    772             assertEquals("start" + k, mat.group(2));
    773             assertEquals(" word ", mat.group(3));
    774             assertEquals("start" + k, mat.group(4));
    775         }
    776 
    777         assertEquals(3, k);
    778         pat = Pattern.compile(".*(.)\\1");
    779         mat = pat.matcher("saa");
    780         assertTrue(mat.matches());
    781     }
    782 
    783     public void testNewLine() {
    784         Pattern pat = Pattern.compile("(^$)*\n", Pattern.MULTILINE);
    785         Matcher mat = pat.matcher("\r\n\n");
    786         int counter = 0;
    787         while (mat.find()) {
    788             counter++;
    789         }
    790         assertEquals(2, counter);
    791     }
    792 
    793     public void testFindGreedy() {
    794         Pattern pat = Pattern.compile(".*aaa", Pattern.DOTALL);
    795         Matcher mat = pat.matcher("aaaa\naaa\naaaaaa");
    796         mat.matches();
    797         assertEquals(15, mat.end());
    798     }
    799 
    800     public void testSerialization() throws Exception {
    801         Pattern pat = Pattern.compile("a*bc");
    802         SerializableAssert comparator = new SerializableAssert() {
    803             public void assertDeserialized(Serializable initial,
    804                     Serializable deserialized) {
    805                 assertEquals(((Pattern) initial).toString(),
    806                         ((Pattern) deserialized).toString());
    807             }
    808         };
    809         SerializationTest.verifyGolden(this, pat, comparator);
    810         SerializationTest.verifySelf(pat, comparator);
    811     }
    812 
    813     public void testSOLQuant() {
    814         Pattern pat = Pattern.compile("$*", Pattern.MULTILINE);
    815         Matcher mat = pat.matcher("\n\n");
    816         int counter = 0;
    817         while (mat.find()) {
    818             counter++;
    819         }
    820 
    821         assertEquals(3, counter);
    822     }
    823 
    824     public void testIllegalEscape() {
    825         try {
    826             Pattern.compile("\\y");
    827             fail("PatternSyntaxException expected");
    828         } catch (PatternSyntaxException pse) {
    829         }
    830     }
    831 
    832     public void testEmptyFamily() {
    833         Pattern.compile("\\p{Lower}");
    834     }
    835 
    836     public void testNonCaptConstr() {
    837         // Flags
    838         Pattern pat = Pattern.compile("(?i)b*(?-i)a*");
    839         assertTrue(pat.matcher("bBbBaaaa").matches());
    840         assertFalse(pat.matcher("bBbBAaAa").matches());
    841 
    842         // Non-capturing groups
    843         pat = Pattern.compile("(?i:b*)a*");
    844         assertTrue(pat.matcher("bBbBaaaa").matches());
    845         assertFalse(pat.matcher("bBbBAaAa").matches());
    846 
    847         pat = Pattern
    848         // 1 2 3 4 5 6 7 8 9 10 11
    849                 .compile("(?:-|(-?\\d+\\d\\d\\d))?(?:-|-(\\d\\d))?(?:-|-(\\d\\d))?(T)?(?:(\\d\\d):(\\d\\d):(\\d\\d)(\\.\\d+)?)?(?:(?:((?:\\+|\\-)\\d\\d):(\\d\\d))|(Z))?");
    850         Matcher mat = pat.matcher("-1234-21-31T41:51:61.789+71:81");
    851         assertTrue(mat.matches());
    852         assertEquals("-1234", mat.group(1));
    853         assertEquals("21", mat.group(2));
    854         assertEquals("31", mat.group(3));
    855         assertEquals("T", mat.group(4));
    856         assertEquals("41", mat.group(5));
    857         assertEquals("51", mat.group(6));
    858         assertEquals("61", mat.group(7));
    859         assertEquals(".789", mat.group(8));
    860         assertEquals("+71", mat.group(9));
    861         assertEquals("81", mat.group(10));
    862 
    863         // positive lookahead
    864         pat = Pattern.compile(".*\\.(?=log$).*$");
    865         assertTrue(pat.matcher("a.b.c.log").matches());
    866         assertFalse(pat.matcher("a.b.c.log.").matches());
    867 
    868         // negative lookahead
    869         pat = Pattern.compile(".*\\.(?!log$).*$");
    870         assertFalse(pat.matcher("abc.log").matches());
    871         assertTrue(pat.matcher("abc.logg").matches());
    872 
    873         // positive lookbehind
    874         pat = Pattern.compile(".*(?<=abc)\\.log$");
    875         assertFalse(pat.matcher("cde.log").matches());
    876         assertTrue(pat.matcher("abc.log").matches());
    877 
    878         // negative lookbehind
    879         pat = Pattern.compile(".*(?<!abc)\\.log$");
    880         assertTrue(pat.matcher("cde.log").matches());
    881         assertFalse(pat.matcher("abc.log").matches());
    882 
    883         // atomic group
    884         pat = Pattern.compile("(?>a*)abb");
    885         assertFalse(pat.matcher("aaabb").matches());
    886         pat = Pattern.compile("(?>a*)bb");
    887         assertTrue(pat.matcher("aaabb").matches());
    888 
    889         pat = Pattern.compile("(?>a|aa)aabb");
    890         assertTrue(pat.matcher("aaabb").matches());
    891         pat = Pattern.compile("(?>aa|a)aabb");
    892         assertFalse(pat.matcher("aaabb").matches());
    893 
    894 // BEGIN Android-removed
    895 // Questionable constructs that ICU doesn't support.
    896 //        // quantifiers over look ahead
    897 //        pat = Pattern.compile(".*(?<=abc)*\\.log$");
    898 //        assertTrue(pat.matcher("cde.log").matches());
    899 //        pat = Pattern.compile(".*(?<=abc)+\\.log$");
    900 //        assertFalse(pat.matcher("cde.log").matches());
    901 // END Android-removed
    902 
    903     }
    904 
    905     public void testCorrectReplacementBackreferencedJointSet() {
    906         Pattern.compile("ab(a)*\\1");
    907         Pattern.compile("abc(cd)fg");
    908         Pattern.compile("aba*cd");
    909         Pattern.compile("ab(a)*+cd");
    910         Pattern.compile("ab(a)*?cd");
    911         Pattern.compile("ab(a)+cd");
    912         Pattern.compile(".*(.)\\1");
    913         Pattern.compile("ab((a)|c|d)e");
    914         Pattern.compile("abc((a(b))cd)");
    915         Pattern.compile("ab(a)++cd");
    916         Pattern.compile("ab(a)?(c)d");
    917         Pattern.compile("ab(a)?+cd");
    918         Pattern.compile("ab(a)??cd");
    919         Pattern.compile("ab(a)??cd");
    920         Pattern.compile("ab(a){1,3}?(c)d");
    921     }
    922 
    923     public void testCompilePatternWithTerminatorMark() {
    924         Pattern pat = Pattern.compile("a\u0000\u0000cd");
    925         Matcher mat = pat.matcher("a\u0000\u0000cd");
    926         assertTrue(mat.matches());
    927     }
    928 
    929     public void testAlternations() {
    930         String baseString = "|a|bc";
    931         Pattern pat = Pattern.compile(baseString);
    932         Matcher mat = pat.matcher("");
    933 
    934         assertTrue(mat.matches());
    935 
    936         baseString = "a||bc";
    937         pat = Pattern.compile(baseString);
    938         mat = pat.matcher("");
    939         assertTrue(mat.matches());
    940 
    941         baseString = "a|bc|";
    942         pat = Pattern.compile(baseString);
    943         mat = pat.matcher("");
    944         assertTrue(mat.matches());
    945 
    946         baseString = "a|b|";
    947         pat = Pattern.compile(baseString);
    948         mat = pat.matcher("");
    949         assertTrue(mat.matches());
    950 
    951         baseString = "a(|b|cd)e";
    952         pat = Pattern.compile(baseString);
    953         mat = pat.matcher("ae");
    954         assertTrue(mat.matches());
    955 
    956         baseString = "a(b||cd)e";
    957         pat = Pattern.compile(baseString);
    958         mat = pat.matcher("ae");
    959         assertTrue(mat.matches());
    960 
    961         baseString = "a(b|cd|)e";
    962         pat = Pattern.compile(baseString);
    963         mat = pat.matcher("ae");
    964         assertTrue(mat.matches());
    965 
    966         baseString = "a(b|c|)e";
    967         pat = Pattern.compile(baseString);
    968         mat = pat.matcher("ae");
    969         assertTrue(mat.matches());
    970 
    971         baseString = "a(|)e";
    972         pat = Pattern.compile(baseString);
    973         mat = pat.matcher("ae");
    974         assertTrue(mat.matches());
    975 
    976         baseString = "|";
    977         pat = Pattern.compile(baseString);
    978         mat = pat.matcher("");
    979         assertTrue(mat.matches());
    980 
    981         baseString = "a(?:|)e";
    982         pat = Pattern.compile(baseString);
    983         mat = pat.matcher("ae");
    984         assertTrue(mat.matches());
    985 
    986         baseString = "a||||bc";
    987         pat = Pattern.compile(baseString);
    988         mat = pat.matcher("");
    989         assertTrue(mat.matches());
    990 
    991         baseString = "(?i-is)|a";
    992         pat = Pattern.compile(baseString);
    993         mat = pat.matcher("a");
    994         assertTrue(mat.matches());
    995     }
    996 
    997     public void testMatchWithGroups() {
    998         String baseString = "jwkerhjwehrkwjehrkwjhrwkjehrjwkehrjkwhrkwehrkwhrkwrhwkhrwkjehr";
    999         String pattern = ".*(..).*\\1.*";
   1000         assertTrue(Pattern.compile(pattern).matcher(baseString).matches());
   1001 
   1002         baseString = "saa";
   1003         pattern = ".*(.)\\1";
   1004         assertTrue(Pattern.compile(pattern).matcher(baseString).matches());
   1005         assertTrue(Pattern.compile(pattern).matcher(baseString).find());
   1006     }
   1007 
   1008     public void testSplitEmptyCharSequence() {
   1009         String s1 = "";
   1010         String[] arr = s1.split(":");
   1011         assertEquals(arr.length, 1);
   1012     }
   1013 
   1014     public void testSplitEndsWithPattern() {
   1015         assertEquals(",,".split(",", 3).length, 3);
   1016         assertEquals(",,".split(",", 4).length, 3);
   1017 
   1018         assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5);
   1019         assertEquals(Pattern.compile("b").split("ab", -1).length, 2);
   1020     }
   1021 
   1022     public void testCaseInsensitiveFlag() {
   1023         assertTrue(Pattern.matches("(?i-:AbC)", "ABC"));
   1024     }
   1025 
   1026     public void testEmptyGroups() {
   1027         Pattern pat = Pattern.compile("ab(?>)cda");
   1028         Matcher mat = pat.matcher("abcda");
   1029         assertTrue(mat.matches());
   1030 
   1031         pat = Pattern.compile("ab()");
   1032         mat = pat.matcher("ab");
   1033         assertTrue(mat.matches());
   1034 
   1035         pat = Pattern.compile("abc(?:)(..)");
   1036         mat = pat.matcher("abcgf");
   1037         assertTrue(mat.matches());
   1038     }
   1039 
   1040     public void testEmbeddedFlags() {
   1041         String baseString = "(?i)((?s)a)";
   1042         String testString = "A";
   1043         Pattern pat = Pattern.compile(baseString);
   1044         Matcher mat = pat.matcher(testString);
   1045         assertTrue(mat.matches());
   1046 
   1047         baseString = "(?x)(?i)(?s)(?d)a";
   1048         testString = "A";
   1049         pat = Pattern.compile(baseString);
   1050         mat = pat.matcher(testString);
   1051         assertTrue(mat.matches());
   1052 
   1053         baseString = "(?x)(?i)(?s)(?d)a.";
   1054         testString = "a\n";
   1055         pat = Pattern.compile(baseString);
   1056         mat = pat.matcher(testString);
   1057         assertTrue(mat.matches());
   1058 
   1059         baseString = "abc(?x:(?i)(?s)(?d)a.)";
   1060         testString = "abcA\n";
   1061         pat = Pattern.compile(baseString);
   1062         mat = pat.matcher(testString);
   1063         assertTrue(mat.matches());
   1064 
   1065         baseString = "abc((?x)d)(?i)(?s)a";
   1066         testString = "abcdA";
   1067         pat = Pattern.compile(baseString);
   1068         mat = pat.matcher(testString);
   1069         assertTrue(mat.matches());
   1070     }
   1071 
   1072     public void testAltWithFlags() {
   1073         Pattern.compile("|(?i-xi)|()");
   1074     }
   1075 
   1076     public void testRestoreFlagsAfterGroup() {
   1077         String baseString = "abc((?x)d)   a";
   1078         String testString = "abcd   a";
   1079         Pattern pat = Pattern.compile(baseString);
   1080         Matcher mat = pat.matcher(testString);
   1081 
   1082         assertTrue(mat.matches());
   1083     }
   1084 
   1085     /*
   1086      * Verify if the Pattern support the following character classes:
   1087      * \p{javaLowerCase} \p{javaUpperCase} \p{javaWhitespace} \p{javaMirrored}
   1088      */
   1089     public void testCompileCharacterClass() {
   1090         // Regression for HARMONY-606, 696
   1091         Pattern pattern = Pattern.compile("\\p{javaLowerCase}");
   1092         assertNotNull(pattern);
   1093 
   1094         pattern = Pattern.compile("\\p{javaUpperCase}");
   1095         assertNotNull(pattern);
   1096 
   1097         pattern = Pattern.compile("\\p{javaWhitespace}");
   1098         assertNotNull(pattern);
   1099 
   1100         pattern = Pattern.compile("\\p{javaMirrored}");
   1101         assertNotNull(pattern);
   1102 
   1103         pattern = Pattern.compile("\\p{javaDefined}");
   1104         assertNotNull(pattern);
   1105 
   1106         pattern = Pattern.compile("\\p{javaDigit}");
   1107         assertNotNull(pattern);
   1108 
   1109         pattern = Pattern.compile("\\p{javaIdentifierIgnorable}");
   1110         assertNotNull(pattern);
   1111 
   1112         pattern = Pattern.compile("\\p{javaISOControl}");
   1113         assertNotNull(pattern);
   1114 
   1115         pattern = Pattern.compile("\\p{javaJavaIdentifierPart}");
   1116         assertNotNull(pattern);
   1117 
   1118         pattern = Pattern.compile("\\p{javaJavaIdentifierStart}");
   1119         assertNotNull(pattern);
   1120 
   1121         pattern = Pattern.compile("\\p{javaLetter}");
   1122         assertNotNull(pattern);
   1123 
   1124         pattern = Pattern.compile("\\p{javaLetterOrDigit}");
   1125         assertNotNull(pattern);
   1126 
   1127         pattern = Pattern.compile("\\p{javaSpaceChar}");
   1128         assertNotNull(pattern);
   1129 
   1130         pattern = Pattern.compile("\\p{javaTitleCase}");
   1131         assertNotNull(pattern);
   1132 
   1133         pattern = Pattern.compile("\\p{javaUnicodeIdentifierPart}");
   1134         assertNotNull(pattern);
   1135 
   1136         pattern = Pattern.compile("\\p{javaUnicodeIdentifierStart}");
   1137         assertNotNull(pattern);
   1138     }
   1139 
   1140     public void testRangesWithSurrogatesSupplementary() {
   1141         String patString = "[abc\uD8D2]";
   1142         String testString = "\uD8D2";
   1143         Pattern pat = Pattern.compile(patString);
   1144         Matcher mat = pat.matcher(testString);
   1145         assertTrue(mat.matches());
   1146 
   1147         testString = "a";
   1148         mat = pat.matcher(testString);
   1149         assertTrue(mat.matches());
   1150 
   1151         testString = "ef\uD8D2\uDD71gh";
   1152         mat = pat.matcher(testString);
   1153         assertFalse(mat.find());
   1154 
   1155         testString = "ef\uD8D2gh";
   1156         mat = pat.matcher(testString);
   1157         assertTrue(mat.find());
   1158 
   1159         patString = "[abc\uD8D3&&[c\uD8D3]]";
   1160         testString = "c";
   1161         pat = Pattern.compile(patString);
   1162         mat = pat.matcher(testString);
   1163         assertTrue(mat.matches());
   1164 
   1165         testString = "a";
   1166         mat = pat.matcher(testString);
   1167         assertFalse(mat.matches());
   1168 
   1169         testString = "ef\uD8D3\uDD71gh";
   1170         mat = pat.matcher(testString);
   1171         assertFalse(mat.find());
   1172 
   1173         testString = "ef\uD8D3gh";
   1174         mat = pat.matcher(testString);
   1175         assertTrue(mat.find());
   1176 
   1177         patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]";
   1178         testString = "c";
   1179         pat = Pattern.compile(patString);
   1180         mat = pat.matcher(testString);
   1181         assertTrue(mat.matches());
   1182 
   1183         testString = "\uDBEE\uDF0C";
   1184         mat = pat.matcher(testString);
   1185         assertTrue(mat.matches());
   1186 
   1187         testString = "ef\uD8D3\uDD71gh";
   1188         mat = pat.matcher(testString);
   1189         assertFalse(mat.find());
   1190 
   1191         testString = "ef\uD8D3gh";
   1192         mat = pat.matcher(testString);
   1193         assertTrue(mat.find());
   1194 
   1195         patString = "[abc\uDBFC]\uDDC2cd";
   1196         testString = "\uDBFC\uDDC2cd";
   1197         pat = Pattern.compile(patString);
   1198         mat = pat.matcher(testString);
   1199         assertFalse(mat.matches());
   1200 
   1201         testString = "a\uDDC2cd";
   1202         mat = pat.matcher(testString);
   1203         assertTrue(mat.matches());
   1204     }
   1205 
   1206     public void testSequencesWithSurrogatesSupplementary() {
   1207         String patString = "abcd\uD8D3";
   1208         String testString = "abcd\uD8D3\uDFFC";
   1209         Pattern pat = Pattern.compile(patString);
   1210         Matcher mat = pat.matcher(testString);
   1211 // BEGIN Android-changed
   1212 // This one really doesn't make sense, as the above is a corrupt surrogate.
   1213 // Even if it's matched by the JDK, it's more of a bug than of a behavior one
   1214 // might want to duplicate.
   1215 //        assertFalse(mat.find());
   1216 // END Android-changed
   1217 
   1218         testString = "abcd\uD8D3abc";
   1219         mat = pat.matcher(testString);
   1220         assertTrue(mat.find());
   1221 
   1222         patString = "ab\uDBEFcd";
   1223         testString = "ab\uDBEFcd";
   1224         pat = Pattern.compile(patString);
   1225         mat = pat.matcher(testString);
   1226         assertTrue(mat.matches());
   1227 
   1228         patString = "\uDFFCabcd";
   1229         testString = "\uD8D3\uDFFCabcd";
   1230         pat = Pattern.compile(patString);
   1231         mat = pat.matcher(testString);
   1232         assertFalse(mat.find());
   1233 
   1234         testString = "abc\uDFFCabcdecd";
   1235         mat = pat.matcher(testString);
   1236         assertTrue(mat.find());
   1237 
   1238         patString = "\uD8D3\uDFFCabcd";
   1239         testString = "abc\uD8D3\uD8D3\uDFFCabcd";
   1240         pat = Pattern.compile(patString);
   1241         mat = pat.matcher(testString);
   1242         assertTrue(mat.find());
   1243     }
   1244 
   1245     public void testPredefinedClassesWithSurrogatesSupplementary() {
   1246         String patString = "[123\\D]";
   1247         String testString = "a";
   1248         Pattern pat = Pattern.compile(patString);
   1249         Matcher mat = pat.matcher(testString);
   1250         assertTrue(mat.find());
   1251 
   1252         testString = "5";
   1253         mat = pat.matcher(testString);
   1254         assertFalse(mat.find());
   1255 
   1256         testString = "3";
   1257         mat = pat.matcher(testString);
   1258         assertTrue(mat.find());
   1259 
   1260         // low surrogate
   1261         testString = "\uDFC4";
   1262         mat = pat.matcher(testString);
   1263         assertTrue(mat.find());
   1264 
   1265         // high surrogate
   1266         testString = "\uDADA";
   1267         mat = pat.matcher(testString);
   1268         assertTrue(mat.find());
   1269 
   1270         testString = "\uDADA\uDFC4";
   1271         mat = pat.matcher(testString);
   1272         assertTrue(mat.find());
   1273 
   1274         patString = "[123[^\\p{javaDigit}]]";
   1275         testString = "a";
   1276         pat = Pattern.compile(patString);
   1277         mat = pat.matcher(testString);
   1278         assertTrue(mat.find());
   1279 
   1280         testString = "5";
   1281         mat = pat.matcher(testString);
   1282         assertFalse(mat.find());
   1283 
   1284         testString = "3";
   1285         mat = pat.matcher(testString);
   1286         assertTrue(mat.find());
   1287 
   1288         // low surrogate
   1289         testString = "\uDFC4";
   1290         mat = pat.matcher(testString);
   1291         assertTrue(mat.find());
   1292 
   1293         // high surrogate
   1294         testString = "\uDADA";
   1295         mat = pat.matcher(testString);
   1296         assertTrue(mat.find());
   1297 
   1298         testString = "\uDADA\uDFC4";
   1299         mat = pat.matcher(testString);
   1300         assertTrue(mat.find());
   1301 
   1302         // surrogate characters
   1303         patString = "\\p{Cs}";
   1304         testString = "\uD916\uDE27";
   1305         pat = Pattern.compile(patString);
   1306         mat = pat.matcher(testString);
   1307 
   1308         /*
   1309          * see http://www.unicode.org/reports/tr18/#Supplementary_Characters we
   1310          * have to treat text as code points not code units. \\p{Cs} matches any
   1311          * surrogate character but here testString is a one code point
   1312          * consisting of two code units (two surrogate characters) so we find
   1313          * nothing
   1314          */
   1315         // assertFalse(mat.find());
   1316         // swap low and high surrogates
   1317         testString = "\uDE27\uD916";
   1318         mat = pat.matcher(testString);
   1319         assertTrue(mat.find());
   1320 
   1321         patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]";
   1322         testString = "1";
   1323         pat = Pattern.compile(patString);
   1324         mat = pat.matcher(testString);
   1325         assertTrue(mat.find());
   1326 
   1327         testString = "\uD916";
   1328         pat = Pattern.compile(patString);
   1329         mat = pat.matcher(testString);
   1330         assertFalse(mat.find());
   1331 
   1332         testString = "\uD916\uDE27";
   1333         pat = Pattern.compile(patString);
   1334         mat = pat.matcher(testString);
   1335         assertTrue(mat.find());
   1336 
   1337         // \uD9A0\uDE8E=\u7828E
   1338         // \u78281=\uD9A0\uDE81
   1339         patString = "[a-\uD9A0\uDE8E]";
   1340         testString = "\uD9A0\uDE81";
   1341         pat = Pattern.compile(patString);
   1342         mat = pat.matcher(testString);
   1343         assertTrue(mat.matches());
   1344     }
   1345 
   1346     public void testDotConstructionWithSurrogatesSupplementary() {
   1347         String patString = ".";
   1348         String testString = "\uD9A0\uDE81";
   1349         Pattern pat = Pattern.compile(patString);
   1350         Matcher mat = pat.matcher(testString);
   1351         assertTrue(mat.matches());
   1352 
   1353         testString = "\uDE81";
   1354         mat = pat.matcher(testString);
   1355         assertTrue(mat.matches());
   1356 
   1357         testString = "\uD9A0";
   1358         mat = pat.matcher(testString);
   1359         assertTrue(mat.matches());
   1360 
   1361         testString = "\n";
   1362         mat = pat.matcher(testString);
   1363         assertFalse(mat.matches());
   1364 
   1365         patString = ".*\uDE81";
   1366         testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81";
   1367         pat = Pattern.compile(patString);
   1368         mat = pat.matcher(testString);
   1369         assertFalse(mat.matches());
   1370 
   1371         testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81";
   1372         mat = pat.matcher(testString);
   1373         assertTrue(mat.matches());
   1374 
   1375         patString = ".*";
   1376         testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81";
   1377         pat = Pattern.compile(patString, Pattern.DOTALL);
   1378         mat = pat.matcher(testString);
   1379         assertTrue(mat.matches());
   1380     }
   1381 
   1382     public void test_quoteLjava_lang_String() {
   1383         for (String aPattern : testPatterns) {
   1384             Pattern p = Pattern.compile(aPattern);
   1385             try {
   1386                 assertEquals("quote was wrong for plain text", "\\Qtest\\E", p
   1387                         .quote("test"));
   1388                 assertEquals("quote was wrong for text with quote sign",
   1389                         "\\Q\\Qtest\\E", p.quote("\\Qtest"));
   1390                 assertEquals("quote was wrong for quotted text",
   1391                         "\\Q\\Qtest\\E\\\\E\\Q\\E", p.quote("\\Qtest\\E"));
   1392             } catch (Exception e) {
   1393                 fail("Unexpected exception: " + e);
   1394             }
   1395         }
   1396     }
   1397 
   1398     public void test_matcherLjava_lang_StringLjava_lang_CharSequence() {
   1399         String[][] posSeq = {
   1400                 { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
   1401                 { "213567", "12324567", "1234567", "213213567",
   1402                         "21312312312567", "444444567" },
   1403                 { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
   1404                 { "213234567", "3458", "0987654", "7689546432", "0398576",
   1405                         "98432", "5" },
   1406                 {
   1407                         "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
   1408                         "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
   1409                                 + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
   1410                 { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
   1411                         "abbbAbbbliceaaa", "Alice" },
   1412                 { "a123", "bnxnvgds156", "for", "while", "if", "struct" },
   1413                 { "xy" }, { "xy" }, { "xcy" }
   1414 
   1415         };
   1416 
   1417         for (int i = 0; i < testPatterns.length; i++) {
   1418             for (int j = 0; j < posSeq[i].length; j++) {
   1419                 assertTrue("Incorrect match: " + testPatterns[i] + " vs "
   1420                         + posSeq[i][j], Pattern.compile(testPatterns[i])
   1421                         .matcher(posSeq[i][j]).matches());
   1422             }
   1423         }
   1424     }
   1425 
   1426     public void testQuantifiersWithSurrogatesSupplementary() {
   1427         String patString = "\uD9A0\uDE81*abc";
   1428         String testString = "\uD9A0\uDE81\uD9A0\uDE81abc";
   1429         Pattern pat = Pattern.compile(patString);
   1430         Matcher mat = pat.matcher(testString);
   1431         assertTrue(mat.matches());
   1432 
   1433         testString = "abc";
   1434         mat = pat.matcher(testString);
   1435         assertTrue(mat.matches());
   1436     }
   1437 
   1438     public void testAlternationsWithSurrogatesSupplementary() {
   1439         String patString = "\uDE81|\uD9A0\uDE81|\uD9A0";
   1440         String testString = "\uD9A0";
   1441         Pattern pat = Pattern.compile(patString);
   1442         Matcher mat = pat.matcher(testString);
   1443         assertTrue(mat.matches());
   1444 
   1445         testString = "\uDE81";
   1446         mat = pat.matcher(testString);
   1447         assertTrue(mat.matches());
   1448 
   1449         testString = "\uD9A0\uDE81";
   1450         mat = pat.matcher(testString);
   1451         assertTrue(mat.matches());
   1452 
   1453         testString = "\uDE81\uD9A0";
   1454         mat = pat.matcher(testString);
   1455         assertFalse(mat.matches());
   1456     }
   1457 
   1458     public void testGroupsWithSurrogatesSupplementary() {
   1459 
   1460         //this pattern matches nothing
   1461         String patString = "(\uD9A0)\uDE81";
   1462         String testString = "\uD9A0\uDE81";
   1463         Pattern pat = Pattern.compile(patString);
   1464         Matcher mat = pat.matcher(testString);
   1465         assertFalse(mat.matches());
   1466 
   1467         patString = "(\uD9A0)";
   1468         testString = "\uD9A0\uDE81";
   1469         pat = Pattern.compile(patString, Pattern.DOTALL);
   1470         mat = pat.matcher(testString);
   1471         assertFalse(mat.find());
   1472     }
   1473 
   1474     /*
   1475      * Regression test for HARMONY-688
   1476      */
   1477     public void testUnicodeCategoryWithSurrogatesSupplementary() {
   1478         Pattern p = Pattern.compile("\\p{javaLowerCase}");
   1479         Matcher matcher = p.matcher("\uD801\uDC28");
   1480         assertTrue(matcher.find());
   1481     }
   1482 
   1483     public void testSplitEmpty() {
   1484 
   1485         Pattern pat = Pattern.compile("");
   1486         String[] s = pat.split("", -1);
   1487 
   1488         assertEquals(1, s.length);
   1489         assertEquals("", s[0]);
   1490     }
   1491 
   1492     public void testToString() {
   1493         for (int i = 0; i < testPatterns.length; i++) {
   1494             Pattern p = Pattern.compile(testPatterns[i]);
   1495             assertEquals(testPatterns[i], p.toString());
   1496         }
   1497     }
   1498 
   1499     // http://code.google.com/p/android/issues/detail?id=19308
   1500     public void test_hitEnd() {
   1501         Pattern p = Pattern.compile("^2(2[4-9]|3\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$");
   1502         Matcher m = p.matcher("224..");
   1503         boolean isPartialMatch = !m.matches() && m.hitEnd();
   1504         assertFalse(isPartialMatch);
   1505     }
   1506 
   1507     public void testCommentsInPattern() {
   1508         Pattern p = Pattern.compile("ab# this is a comment\ncd", Pattern.COMMENTS);
   1509         assertTrue(p.matcher("abcd").matches());
   1510     }
   1511 
   1512     public void testCompileNonCaptGroup() {
   1513         // icu4c doesn't support CANON_EQ.
   1514         Pattern.compile("(?:)"/*, Pattern.CANON_EQ*/);
   1515         Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.DOTALL);
   1516         Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.CASE_INSENSITIVE);
   1517         Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.COMMENTS | Pattern.UNIX_LINES);
   1518     }
   1519 
   1520     public void testFlagsMethod() {
   1521         // icu4c doesn't count inline flags that span the entire regex as being global flags.
   1522         // Android just returns those flags actually passed to Pattern.compile.
   1523         if (true) {
   1524             return;
   1525         }
   1526 
   1527         String baseString;
   1528         Pattern pat;
   1529 
   1530         // These tests are for compatibility with RI only. Logically we have to
   1531         // return only flags specified during the compilation. For example
   1532         // pat.flags() == 0 when we compile Pattern pat =
   1533         // Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled
   1534         // in a case insensitive manner. So there is little sense to do calls to
   1535         // flags() now.
   1536         baseString = "(?-i)";
   1537         pat = Pattern.compile(baseString);
   1538 
   1539         baseString = "(?idmsux)abc(?-i)vg(?-dmu)";
   1540         pat = Pattern.compile(baseString);
   1541         assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
   1542 
   1543         baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)";
   1544         pat = Pattern.compile(baseString);
   1545         assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
   1546 
   1547         baseString = "(?is)a((?x)b.)";
   1548         pat = Pattern.compile(baseString);
   1549         assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
   1550 
   1551         baseString = "(?i)a((?-i))";
   1552         pat = Pattern.compile(baseString);
   1553         assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE);
   1554 
   1555         baseString = "((?i)a)";
   1556         pat = Pattern.compile(baseString);
   1557         assertEquals(pat.flags(), 0);
   1558 
   1559         pat = Pattern.compile("(?is)abc");
   1560         assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
   1561     }
   1562 
   1563     public void testCanonEqFlag() {
   1564         // icu4c doesn't support CANON_EQ.
   1565         if (true) {
   1566             return;
   1567         }
   1568 
   1569         // for decompositions see
   1570         // http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt
   1571         // http://www.unicode.org/reports/tr15/#Decomposition
   1572         String baseString;
   1573         String testString;
   1574         Pattern pat;
   1575         Matcher mat;
   1576 
   1577         baseString = "ab(a*)\\1";
   1578         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1579 
   1580         baseString = "a(abcdf)d";
   1581         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1582 
   1583         baseString = "aabcdfd";
   1584         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1585 
   1586         // \u01E0 -> \u0226\u0304 ->\u0041\u0307\u0304
   1587         // \u00CC -> \u0049\u0300
   1588 
   1589         baseString = "\u01E0\u00CCcdb(ac)";
   1590         testString = "\u0226\u0304\u0049\u0300cdbac";
   1591         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1592         mat = pat.matcher(testString);
   1593         assertTrue(mat.matches());
   1594 
   1595         baseString = "\u01E0cdb(a\u00CCc)";
   1596         testString = "\u0041\u0307\u0304cdba\u0049\u0300c";
   1597         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1598         mat = pat.matcher(testString);
   1599         assertTrue(mat.matches());
   1600 
   1601         baseString = "a\u00CC";
   1602         testString = "a\u0049\u0300";
   1603         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1604         mat = pat.matcher(testString);
   1605         assertTrue(mat.matches());
   1606 
   1607         baseString = "\u0226\u0304cdb(ac\u0049\u0300)";
   1608         testString = "\u01E0cdbac\u00CC";
   1609         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1610         mat = pat.matcher(testString);
   1611         assertTrue(mat.matches());
   1612 
   1613         baseString = "cdb(?:\u0041\u0307\u0304\u00CC)";
   1614         testString = "cdb\u0226\u0304\u0049\u0300";
   1615         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1616         mat = pat.matcher(testString);
   1617         assertTrue(mat.matches());
   1618 
   1619         baseString = "\u01E0[a-c]\u0049\u0300cdb(ac)";
   1620         testString = "\u01E0b\u00CCcdbac";
   1621         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1622         mat = pat.matcher(testString);
   1623         assertTrue(mat.matches());
   1624 
   1625         baseString = "\u01E0|\u00CCcdb(ac)";
   1626         testString = "\u0041\u0307\u0304";
   1627         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1628         mat = pat.matcher(testString);
   1629         assertTrue(mat.matches());
   1630 
   1631         baseString = "\u00CC?cdb(ac)*(\u01E0)*[a-c]";
   1632         testString = "cdb\u0041\u0307\u0304b";
   1633         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1634         mat = pat.matcher(testString);
   1635         assertTrue(mat.matches());
   1636 
   1637         baseString = "a\u0300";
   1638         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1639         mat = pat.matcher("a\u00E0a");
   1640         assertTrue(mat.find());
   1641 
   1642         baseString = "\u7B20\uF9F8abc";
   1643         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1644         mat = pat.matcher("\uF9F8\uF9F8abc");
   1645         assertTrue(mat.matches());
   1646 
   1647         // \u01F9 -> \u006E\u0300
   1648         // \u00C3 -> \u0041\u0303
   1649 
   1650         baseString = "cdb(?:\u00C3\u006E\u0300)";
   1651         testString = "cdb\u0041\u0303\u01F9";
   1652         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1653         mat = pat.matcher(testString);
   1654         assertTrue(mat.matches());
   1655 
   1656         // \u014C -> \u004F\u0304
   1657         // \u0163 -> \u0074\u0327
   1658 
   1659         baseString = "cdb(?:\u0163\u004F\u0304)";
   1660         testString = "cdb\u0074\u0327\u014C";
   1661         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1662         mat = pat.matcher(testString);
   1663         assertTrue(mat.matches());
   1664 
   1665         // \u00E1->a\u0301
   1666         // canonical ordering takes place \u0301\u0327 -> \u0327\u0301
   1667 
   1668         baseString = "c\u0327\u0301";
   1669         testString = "c\u0301\u0327";
   1670         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1671         mat = pat.matcher(testString);
   1672         assertTrue(mat.matches());
   1673 
   1674         /*
   1675         Hangul decompositions
   1676         */
   1677         // \uD4DB->\u1111\u1171\u11B6
   1678         // \uD21E->\u1110\u116D\u11B5
   1679         // \uD264->\u1110\u1170
   1680         // not Hangul:\u0453->\u0433\u0301
   1681         baseString = "a\uD4DB\u1111\u1171\u11B6\uD264";
   1682         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1683 
   1684         baseString = "\u0453c\uD4DB";
   1685         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1686 
   1687         baseString = "a\u1110\u116D\u11B5b\uD21Ebc";
   1688         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1689 
   1690         baseString = "\uD4DB\uD21E\u1110\u1170cdb(ac)";
   1691         testString = "\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac";
   1692         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1693         mat = pat.matcher(testString);
   1694         assertTrue(mat.matches());
   1695 
   1696         baseString = "\uD4DB\uD264cdb(a\uD21Ec)";
   1697         testString = "\u1111\u1171\u11B6\u1110\u1170cdba\u1110\u116D\u11B5c";
   1698         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1699         mat = pat.matcher(testString);
   1700         assertTrue(mat.matches());
   1701 
   1702         baseString = "a\uD4DB";
   1703         testString = "a\u1111\u1171\u11B6";
   1704         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1705         mat = pat.matcher(testString);
   1706         assertTrue(mat.matches());
   1707 
   1708         baseString = "a\uD21E";
   1709         testString = "a\u1110\u116D\u11B5";
   1710         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1711         mat = pat.matcher(testString);
   1712         assertTrue(mat.matches());
   1713 
   1714         baseString = "\u1111\u1171\u11B6cdb(ac\u1110\u116D\u11B5)";
   1715         testString = "\uD4DBcdbac\uD21E";
   1716         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1717         mat = pat.matcher(testString);
   1718         assertTrue(mat.matches());
   1719 
   1720         baseString = "cdb(?:\u1111\u1171\u11B6\uD21E)";
   1721         testString = "cdb\uD4DB\u1110\u116D\u11B5";
   1722         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1723         mat = pat.matcher(testString);
   1724         assertTrue(mat.matches());
   1725 
   1726         baseString = "\uD4DB[a-c]\u1110\u116D\u11B5cdb(ac)";
   1727         testString = "\uD4DBb\uD21Ecdbac";
   1728         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1729         mat = pat.matcher(testString);
   1730         assertTrue(mat.matches());
   1731 
   1732         baseString = "\uD4DB|\u00CCcdb(ac)";
   1733         testString = "\u1111\u1171\u11B6";
   1734         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1735         mat = pat.matcher(testString);
   1736         assertTrue(mat.matches());
   1737 
   1738         baseString = "\uD4DB|\u00CCcdb(ac)";
   1739         testString = "\u1111\u1171";
   1740         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1741         mat = pat.matcher(testString);
   1742         assertFalse(mat.matches());
   1743 
   1744         baseString = "\u00CC?cdb(ac)*(\uD4DB)*[a-c]";
   1745         testString = "cdb\u1111\u1171\u11B6b";
   1746         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1747         mat = pat.matcher(testString);
   1748         assertTrue(mat.matches());
   1749 
   1750         baseString = "\uD4DB";
   1751         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1752         mat = pat.matcher("a\u1111\u1171\u11B6a");
   1753         assertTrue(mat.find());
   1754 
   1755         baseString = "\u1111";
   1756         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1757         mat = pat.matcher("bcda\uD4DBr");
   1758         assertFalse(mat.find());
   1759     }
   1760 
   1761     public void testIndexesCanonicalEq() {
   1762         // icu4c doesn't support CANON_EQ.
   1763         if (true) {
   1764             return;
   1765         }
   1766 
   1767         String baseString;
   1768         String testString;
   1769         Pattern pat;
   1770         Matcher mat;
   1771 
   1772         baseString = "\uD4DB";
   1773         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1774         mat = pat.matcher("bcda\u1111\u1171\u11B6awr");
   1775         assertTrue(mat.find());
   1776         assertEquals(mat.start(), 4);
   1777         assertEquals(mat.end(), 7);
   1778 
   1779         baseString = "\uD4DB\u1111\u1171\u11B6";
   1780         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1781         mat = pat.matcher("bcda\u1111\u1171\u11B6\uD4DBawr");
   1782         assertTrue(mat.find());
   1783         assertEquals(mat.start(), 4);
   1784         assertEquals(mat.end(), 8);
   1785 
   1786         baseString = "\uD4DB\uD21E\u1110\u1170";
   1787         testString = "abcabc\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac";
   1788         pat = Pattern.compile(baseString, Pattern.CANON_EQ);
   1789         mat = pat.matcher(testString);
   1790         assertTrue(mat.find());
   1791         assertEquals(mat.start(), 6);
   1792         assertEquals(mat.end(), 13);
   1793     }
   1794 
   1795     public void testCanonEqFlagWithSupplementaryCharacters() {
   1796         // icu4c doesn't support CANON_EQ.
   1797         if (true) {
   1798             return;
   1799         }
   1800 
   1801         /*
   1802         \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32
   1803         \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F
   1804         ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16
   1805         */
   1806         String patString = "abc\uD834\uDDBFef";
   1807         String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
   1808         Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ);
   1809         Matcher mat = pat.matcher(testString);
   1810         assertTrue(mat.matches());
   1811 
   1812         testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
   1813         mat = pat.matcher(testString);
   1814         assertTrue(mat.matches());
   1815 
   1816         patString = "abc\uD834\uDDBB\uD834\uDD6Fef";
   1817         testString = "abc\uD834\uDDBFef";
   1818         pat = Pattern.compile(patString, Pattern.CANON_EQ);
   1819         mat = pat.matcher(testString);
   1820         assertTrue(mat.matches());
   1821 
   1822         testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
   1823         mat = pat.matcher(testString);
   1824         assertTrue(mat.matches());
   1825 
   1826         patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
   1827         testString = "abc\uD834\uDDBFef";
   1828         pat = Pattern.compile(patString, Pattern.CANON_EQ);
   1829         mat = pat.matcher(testString);
   1830         assertTrue(mat.matches());
   1831 
   1832         testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
   1833         mat = pat.matcher(testString);
   1834         assertTrue(mat.matches());
   1835 
   1836         // Test supplementary characters with no decomposition
   1837         patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef";
   1838         testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef";
   1839         pat = Pattern.compile(patString, Pattern.CANON_EQ);
   1840         mat = pat.matcher(testString);
   1841         assertTrue(mat.matches());
   1842     }
   1843 
   1844     public void testAsPredicate() {
   1845         String[][] posSeq = {
   1846                 { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
   1847                 { "213567", "12324567", "1234567", "213213567",
   1848                         "21312312312567", "444444567" },
   1849                 { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
   1850                 { "213234567", "3458", "0987654", "7689546432", "0398576",
   1851                         "98432", "5" },
   1852                 {
   1853                         "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
   1854                         "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
   1855                                 + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
   1856                 { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
   1857                         "abbbAbbbliceaaa", "Alice" },
   1858                 { "a123", "bnxnvgds156", "for", "while", "if", "struct" },
   1859                 { "xy" }, { "xy" }, { "xcy" }
   1860         };
   1861 
   1862         for (int i = 0; i < testPatterns.length; i++) {
   1863             Pattern p = Pattern.compile(testPatterns[i]);
   1864             for (int j = 0; j < posSeq[i].length; j++) {
   1865                 assertTrue(p.asPredicate().test(posSeq[i][j]));
   1866             }
   1867         }
   1868     }
   1869 
   1870     public void testSplitAsStream() {
   1871         String s[];
   1872         Pattern pat = Pattern.compile("b");
   1873         s = pat.splitAsStream("abccbadfebb").toArray(String[]::new);
   1874         assertEquals(s.length, 3);
   1875         s = pat.splitAsStream("").toArray(String[]::new);
   1876         assertEquals(s.length, 0);
   1877         pat = Pattern.compile("");
   1878         s = pat.splitAsStream("").toArray(String[]::new);
   1879         assertEquals(s.length, 0);
   1880         s = pat.splitAsStream("abccbadfe").toArray(String[]::new);
   1881         assertEquals(s.length, 9);
   1882     }
   1883 }
   1884