Home | History | Annotate | Download | only in regex
      1 /* Licensed to the Apache Software Foundation (ASF) under one or more
      2  * contributor license agreements.  See the NOTICE file distributed with
      3  * this work for additional information regarding copyright ownership.
      4  * The ASF licenses this file to You under the Apache License, Version 2.0
      5  * (the "License"); you may not use this file except in compliance with
      6  * the License.  You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package org.apache.harmony.regex.tests.java.util.regex;
     18 
     19 import java.util.regex.Matcher;
     20 import java.util.regex.Pattern;
     21 import java.util.regex.PatternSyntaxException;
     22 
     23 import junit.framework.TestCase;
     24 
     25 /**
     26  * Tests simple Pattern compilation and Matcher methods
     27  *
     28  */
     29 public class Pattern2Test extends TestCase {
     30 
     31     public void testUnicodeCategories() throws PatternSyntaxException {
     32         // Test Unicode categories using \p and \P
     33         // One letter codes: L, M, N, P, S, Z, C
     34         // Two letter codes: Lu, Nd, Sc, Sm, ...
     35         // See java.lang.Character and Unicode standard for complete list
     36         // TODO
     37         // Test \p{L}
     38         // TODO
     39 
     40         // Test \p{N}
     41         // TODO
     42 
     43         // Test two letter codes:
     44         // From unicode.org:
     45         // Lu
     46         // Ll
     47         // Lt
     48         // Lm
     49         // Lo
     50         // Mn
     51         // Mc
     52         // Me
     53         // Nd
     54         // Nl
     55         // No
     56         // Pc
     57         // Pd
     58         // Ps
     59         // Pe
     60         // Pi
     61         // Pf
     62         // Po
     63         // Sm
     64         // Sc
     65         // Sk
     66         // So
     67         // Zs
     68         // Zl
     69         // Zp
     70         // Cc
     71         // Cf
     72         // Cs
     73         // Co
     74         // Cn
     75 
     76         // TODO add more tests per category
     77         //{"Cc", "\u0000", "-\u0041"},
     78         testCategory("Cf", "\u202B");
     79         testCategory("Co", "\uE000");
     80         testCategory("Cs", "\uD800");
     81         testCategory("Ll", "a", "b", "x", "y", "z", "-A", "-Z");
     82         testCategory("Lm", "\u02B9");
     83         testCategory("Lu", "B", "C", "-c");
     84         testCategory("Lo", "\u05E2");
     85         testCategory("Lt", "\u01C5");
     86         testCategory("Mc", "\u0903");
     87         testCategory("Me", "\u0488");
     88         testCategory("Mn", "\u0300");
     89         testCategory("Nd", "\u0030");
     90         testCategory("Nl", "\u2164");
     91         testCategory("No", "\u0BF0");
     92         // testCategory("Pc", "\u30FB");
     93         testCategory("Pd", "\u2015");
     94         testCategory("Pe", "\u207E");
     95         testCategory("Po", "\u00B7");
     96         testCategory("Ps", "\u0F3C");
     97         testCategory("Sc", "\u20A0");
     98         testCategory("Sk", "\u00B8");
     99         testCategory("Sm", "\u002B");
    100         testCategory("So", "\u0B70");
    101         testCategory("Zl", "\u2028");
    102         // testCategory("Pi", "\u200C");
    103         testCategory("Zp", "\u2029");
    104     }
    105 
    106     private void testCategory(String cat, String... matches) {
    107         String pa = "{"+cat+"}";
    108         String pat = "\\p"+pa;
    109         String npat = "\\P"+pa;
    110         Pattern p = Pattern.compile(pat);
    111         Pattern pn = Pattern.compile(npat);
    112         for (int j = 0; j < matches.length; j++) {
    113             String t = matches[j];
    114             boolean invert = t.startsWith("-");
    115             if (invert) {
    116                 // test negative case, expected to fail
    117                 t = t.substring(1);
    118                 assertFalse("expected '"+t+"' to not be matched " +
    119                         "by pattern '"+pat, p.matcher(t).matches());
    120                 assertTrue("expected '"+t+"' to  " +
    121                         "be matched by pattern '"+npat, pn.matcher(t).matches());
    122             } else {
    123                 assertTrue("expected '"+t+"' to be matched " +
    124                         "by pattern '"+pat, p.matcher(t).matches());
    125                 assertFalse("expected '"+t+"' to  " +
    126                         "not be matched by pattern '"+npat, pn.matcher(t).matches());
    127             }
    128         }
    129     }
    130 
    131     public void testCapturingGroups() throws PatternSyntaxException {
    132         Pattern p;
    133         Matcher m;
    134 
    135         // Test simple capturing groups
    136         p = Pattern.compile("(a+)b");
    137         m = p.matcher("aaaaaaaab");
    138         assertTrue(m.matches());
    139         assertEquals(1, m.groupCount());
    140         assertEquals("aaaaaaaa", m.group(1));
    141 
    142         p = Pattern.compile("((an)+)((as)+)");
    143         m = p.matcher("ananas");
    144         assertTrue(m.matches());
    145         assertEquals(4, m.groupCount());
    146         assertEquals("ananas", m.group(0));
    147         assertEquals("anan", m.group(1));
    148         assertEquals("an", m.group(2));
    149         assertEquals("as", m.group(3));
    150         assertEquals("as", m.group(4));
    151 
    152         // Test grouping without capture (?:...)
    153         p = Pattern.compile("(?:(?:an)+)(as)");
    154         m = p.matcher("ananas");
    155         assertTrue(m.matches());
    156         assertEquals(1, m.groupCount());
    157         assertEquals("as", m.group(1));
    158         try {
    159             m.group(2);
    160             fail("expected IndexOutOfBoundsException");
    161         } catch (IndexOutOfBoundsException ioobe) {
    162             // expected
    163         }
    164 
    165         // Test combination of grouping and capture
    166         // TODO
    167 
    168         // Test \<num> sequence with capturing and non-capturing groups
    169         // TODO
    170 
    171         // Test \<num> with <num> out of range
    172         p = Pattern.compile("((an)+)as\\1");
    173         m = p.matcher("ananasanan");
    174         assertTrue(m.matches());
    175 
    176         try {
    177             p = Pattern.compile("((an)+)as\\4");
    178             fail("expected PatternSyntaxException");
    179         } catch (PatternSyntaxException pse) {
    180             // expected
    181         }
    182 
    183     }
    184     public void testRepeats() {
    185         Pattern p;
    186         Matcher m;
    187 
    188         // Test ?
    189         p = Pattern.compile("(abc)?c");
    190         m = p.matcher("abcc");
    191         assertTrue(m.matches());
    192         m = p.matcher("c");
    193         assertTrue(m.matches());
    194         m = p.matcher("cc");
    195         assertFalse(m.matches());
    196         m = p.matcher("abcabcc");
    197         assertFalse(m.matches());
    198 
    199         // Test *
    200         p = Pattern.compile("(abc)*c");
    201         m = p.matcher("abcc");
    202         assertTrue(m.matches());
    203         m = p.matcher("c");
    204         assertTrue(m.matches());
    205         m = p.matcher("cc");
    206         assertFalse(m.matches());
    207         m = p.matcher("abcabcc");
    208         assertTrue(m.matches());
    209 
    210         // Test +
    211         p = Pattern.compile("(abc)+c");
    212         m = p.matcher("abcc");
    213         assertTrue(m.matches());
    214         m = p.matcher("c");
    215         assertFalse(m.matches());
    216         m = p.matcher("cc");
    217         assertFalse(m.matches());
    218         m = p.matcher("abcabcc");
    219         assertTrue(m.matches());
    220 
    221         // Test {<num>}, including 0, 1 and more
    222         p = Pattern.compile("(abc){0}c");
    223         m = p.matcher("abcc");
    224         assertFalse(m.matches());
    225         m = p.matcher("c");
    226         assertTrue(m.matches());
    227 
    228         p = Pattern.compile("(abc){1}c");
    229         m = p.matcher("abcc");
    230         assertTrue(m.matches());
    231         m = p.matcher("c");
    232         assertFalse(m.matches());
    233         m = p.matcher("abcabcc");
    234         assertFalse(m.matches());
    235 
    236         p = Pattern.compile("(abc){2}c");
    237         m = p.matcher("abcc");
    238         assertFalse(m.matches());
    239         m = p.matcher("c");
    240         assertFalse(m.matches());
    241         m = p.matcher("cc");
    242         assertFalse(m.matches());
    243         m = p.matcher("abcabcc");
    244         assertTrue(m.matches());
    245 
    246         // Test {<num>,}, including 0, 1 and more
    247         // TODO
    248 
    249         // Test {<n1>,<n2>}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?)
    250         // TODO
    251     }
    252     public void testAnchors() throws PatternSyntaxException {
    253         Pattern p;
    254         Matcher m;
    255 
    256         // Test ^, default and MULTILINE
    257         p = Pattern.compile("^abc\\n^abc", Pattern.MULTILINE);
    258         m = p.matcher("abc\nabc");
    259         assertTrue(m.matches());
    260 
    261         p = Pattern.compile("^abc\\n^abc");
    262         m = p.matcher("abc\nabc");
    263         assertFalse(m.matches());
    264 
    265         // Test $, default and MULTILINE
    266         // TODO
    267 
    268         // Test \b (word boundary)
    269         // TODO
    270 
    271         // Test \B (not a word boundary)
    272         // TODO
    273 
    274         // Test \A (beginning of string)
    275         // TODO
    276 
    277         // Test \Z (end of string)
    278         // TODO
    279 
    280         // Test \z (end of string)
    281         // TODO
    282 
    283         // Test \G
    284         // TODO
    285 
    286         // Test positive lookahead using (?=...)
    287         // TODO
    288 
    289         // Test negative lookahead using (?!...)
    290         // TODO
    291 
    292         // Test positive lookbehind using (?<=...)
    293         // TODO
    294 
    295         // Test negative lookbehind using (?<!...)
    296         // TODO
    297     }
    298 }
    299