1 /* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.harmony.regex.tests.java.util.regex; 18 19 import java.util.regex.Matcher; 20 import java.util.regex.Pattern; 21 import java.util.regex.PatternSyntaxException; 22 23 import junit.framework.TestCase; 24 25 /** 26 * Tests simple Pattern compilation and Matcher methods 27 * 28 */ 29 public class Pattern2Test extends TestCase { 30 31 public void testUnicodeCategories() throws PatternSyntaxException { 32 // Test Unicode categories using \p and \P 33 // One letter codes: L, M, N, P, S, Z, C 34 // Two letter codes: Lu, Nd, Sc, Sm, ... 35 // See java.lang.Character and Unicode standard for complete list 36 // TODO 37 // Test \p{L} 38 // TODO 39 40 // Test \p{N} 41 // TODO 42 43 // Test two letter codes: 44 // From unicode.org: 45 // Lu 46 // Ll 47 // Lt 48 // Lm 49 // Lo 50 // Mn 51 // Mc 52 // Me 53 // Nd 54 // Nl 55 // No 56 // Pc 57 // Pd 58 // Ps 59 // Pe 60 // Pi 61 // Pf 62 // Po 63 // Sm 64 // Sc 65 // Sk 66 // So 67 // Zs 68 // Zl 69 // Zp 70 // Cc 71 // Cf 72 // Cs 73 // Co 74 // Cn 75 76 // TODO add more tests per category 77 //{"Cc", "\u0000", "-\u0041"}, 78 testCategory("Cf", "\u202B"); 79 testCategory("Co", "\uE000"); 80 testCategory("Cs", "\uD800"); 81 testCategory("Ll", "a", "b", "x", "y", "z", "-A", "-Z"); 82 testCategory("Lm", "\u02B9"); 83 testCategory("Lu", "B", "C", "-c"); 84 testCategory("Lo", "\u05E2"); 85 testCategory("Lt", "\u01C5"); 86 testCategory("Mc", "\u0903"); 87 testCategory("Me", "\u0488"); 88 testCategory("Mn", "\u0300"); 89 testCategory("Nd", "\u0030"); 90 testCategory("Nl", "\u2164"); 91 testCategory("No", "\u0BF0"); 92 // testCategory("Pc", "\u30FB"); 93 testCategory("Pd", "\u2015"); 94 testCategory("Pe", "\u207E"); 95 testCategory("Po", "\u00B7"); 96 testCategory("Ps", "\u0F3C"); 97 testCategory("Sc", "\u20A0"); 98 testCategory("Sk", "\u00B8"); 99 testCategory("Sm", "\u002B"); 100 testCategory("So", "\u0B70"); 101 testCategory("Zl", "\u2028"); 102 // testCategory("Pi", "\u200C"); 103 testCategory("Zp", "\u2029"); 104 } 105 106 private void testCategory(String cat, String... matches) { 107 String pa = "{"+cat+"}"; 108 String pat = "\\p"+pa; 109 String npat = "\\P"+pa; 110 Pattern p = Pattern.compile(pat); 111 Pattern pn = Pattern.compile(npat); 112 for (int j = 0; j < matches.length; j++) { 113 String t = matches[j]; 114 boolean invert = t.startsWith("-"); 115 if (invert) { 116 // test negative case, expected to fail 117 t = t.substring(1); 118 assertFalse("expected '"+t+"' to not be matched " + 119 "by pattern '"+pat, p.matcher(t).matches()); 120 assertTrue("expected '"+t+"' to " + 121 "be matched by pattern '"+npat, pn.matcher(t).matches()); 122 } else { 123 assertTrue("expected '"+t+"' to be matched " + 124 "by pattern '"+pat, p.matcher(t).matches()); 125 assertFalse("expected '"+t+"' to " + 126 "not be matched by pattern '"+npat, pn.matcher(t).matches()); 127 } 128 } 129 } 130 131 public void testCapturingGroups() throws PatternSyntaxException { 132 Pattern p; 133 Matcher m; 134 135 // Test simple capturing groups 136 p = Pattern.compile("(a+)b"); 137 m = p.matcher("aaaaaaaab"); 138 assertTrue(m.matches()); 139 assertEquals(1, m.groupCount()); 140 assertEquals("aaaaaaaa", m.group(1)); 141 142 p = Pattern.compile("((an)+)((as)+)"); 143 m = p.matcher("ananas"); 144 assertTrue(m.matches()); 145 assertEquals(4, m.groupCount()); 146 assertEquals("ananas", m.group(0)); 147 assertEquals("anan", m.group(1)); 148 assertEquals("an", m.group(2)); 149 assertEquals("as", m.group(3)); 150 assertEquals("as", m.group(4)); 151 152 // Test grouping without capture (?:...) 153 p = Pattern.compile("(?:(?:an)+)(as)"); 154 m = p.matcher("ananas"); 155 assertTrue(m.matches()); 156 assertEquals(1, m.groupCount()); 157 assertEquals("as", m.group(1)); 158 try { 159 m.group(2); 160 fail("expected IndexOutOfBoundsException"); 161 } catch (IndexOutOfBoundsException ioobe) { 162 // expected 163 } 164 165 // Test combination of grouping and capture 166 // TODO 167 168 // Test \<num> sequence with capturing and non-capturing groups 169 // TODO 170 171 // Test \<num> with <num> out of range 172 p = Pattern.compile("((an)+)as\\1"); 173 m = p.matcher("ananasanan"); 174 assertTrue(m.matches()); 175 176 try { 177 p = Pattern.compile("((an)+)as\\4"); 178 fail("expected PatternSyntaxException"); 179 } catch (PatternSyntaxException pse) { 180 // expected 181 } 182 183 } 184 public void testRepeats() { 185 Pattern p; 186 Matcher m; 187 188 // Test ? 189 p = Pattern.compile("(abc)?c"); 190 m = p.matcher("abcc"); 191 assertTrue(m.matches()); 192 m = p.matcher("c"); 193 assertTrue(m.matches()); 194 m = p.matcher("cc"); 195 assertFalse(m.matches()); 196 m = p.matcher("abcabcc"); 197 assertFalse(m.matches()); 198 199 // Test * 200 p = Pattern.compile("(abc)*c"); 201 m = p.matcher("abcc"); 202 assertTrue(m.matches()); 203 m = p.matcher("c"); 204 assertTrue(m.matches()); 205 m = p.matcher("cc"); 206 assertFalse(m.matches()); 207 m = p.matcher("abcabcc"); 208 assertTrue(m.matches()); 209 210 // Test + 211 p = Pattern.compile("(abc)+c"); 212 m = p.matcher("abcc"); 213 assertTrue(m.matches()); 214 m = p.matcher("c"); 215 assertFalse(m.matches()); 216 m = p.matcher("cc"); 217 assertFalse(m.matches()); 218 m = p.matcher("abcabcc"); 219 assertTrue(m.matches()); 220 221 // Test {<num>}, including 0, 1 and more 222 p = Pattern.compile("(abc){0}c"); 223 m = p.matcher("abcc"); 224 assertFalse(m.matches()); 225 m = p.matcher("c"); 226 assertTrue(m.matches()); 227 228 p = Pattern.compile("(abc){1}c"); 229 m = p.matcher("abcc"); 230 assertTrue(m.matches()); 231 m = p.matcher("c"); 232 assertFalse(m.matches()); 233 m = p.matcher("abcabcc"); 234 assertFalse(m.matches()); 235 236 p = Pattern.compile("(abc){2}c"); 237 m = p.matcher("abcc"); 238 assertFalse(m.matches()); 239 m = p.matcher("c"); 240 assertFalse(m.matches()); 241 m = p.matcher("cc"); 242 assertFalse(m.matches()); 243 m = p.matcher("abcabcc"); 244 assertTrue(m.matches()); 245 246 // Test {<num>,}, including 0, 1 and more 247 // TODO 248 249 // Test {<n1>,<n2>}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?) 250 // TODO 251 } 252 public void testAnchors() throws PatternSyntaxException { 253 Pattern p; 254 Matcher m; 255 256 // Test ^, default and MULTILINE 257 p = Pattern.compile("^abc\\n^abc", Pattern.MULTILINE); 258 m = p.matcher("abc\nabc"); 259 assertTrue(m.matches()); 260 261 p = Pattern.compile("^abc\\n^abc"); 262 m = p.matcher("abc\nabc"); 263 assertFalse(m.matches()); 264 265 // Test $, default and MULTILINE 266 // TODO 267 268 // Test \b (word boundary) 269 // TODO 270 271 // Test \B (not a word boundary) 272 // TODO 273 274 // Test \A (beginning of string) 275 // TODO 276 277 // Test \Z (end of string) 278 // TODO 279 280 // Test \z (end of string) 281 // TODO 282 283 // Test \G 284 // TODO 285 286 // Test positive lookahead using (?=...) 287 // TODO 288 289 // Test negative lookahead using (?!...) 290 // TODO 291 292 // Test positive lookbehind using (?<=...) 293 // TODO 294 295 // Test negative lookbehind using (?<!...) 296 // TODO 297 } 298 } 299