1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.harmony.regex.tests.java.util.regex; 19 20 import java.io.Serializable; 21 import java.util.regex.Matcher; 22 import java.util.regex.Pattern; 23 import java.util.regex.PatternSyntaxException; 24 25 import junit.framework.TestCase; 26 27 import org.apache.harmony.testframework.serialization.SerializationTest; 28 import org.apache.harmony.testframework.serialization.SerializationTest.SerializableAssert; 29 30 public class PatternTest extends TestCase { 31 String[] testPatterns = { 32 "(a|b)*abb", 33 "(1*2*3*4*)*567", 34 "(a|b|c|d)*aab", 35 "(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*", 36 "(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*", 37 "(a|b)*(a|b)*A(a|b)*lice.*", 38 "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|" 39 + "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do", 40 // BEGIN android-changed 41 // We don't have canonical equivalence. 42 // "x(?c)y", "x(?cc)y" 43 // "x(?:c)y" 44 // END android-changed 45 46 }; 47 48 String[] testPatternsAlt = { 49 /* 50 * According to JavaDoc 2 and 3 oct digit sequences like \\o70\\o347 51 * should be OK, but test is failed for them 52 */ 53 "[ab]\\b\\\\o5\\xF9\\u1E7B\\t\\n\\f\\r\\a\\e[yz]", 54 "^\\p{Lower}*\\p{Upper}*\\p{ASCII}?\\p{Alpha}?\\p{Digit}*\\p{Alnum}\\p{Punct}\\p{Graph}\\p{Print}\\p{Blank}\\p{Cntrl}\\p{XDigit}\\p{Space}", 55 "$\\p{javaLowerCase}\\p{javaUpperCase}\\p{javaWhitespace}\\p{javaMirrored}", 56 "\\p{InGreek}\\p{Lu}\\p{Sc}\\P{InGreek}[\\p{L}&&[^\\p{Lu}]]" }; 57 58 String[] wrongTestPatterns = { "\\o9A", "\\p{Lawer}", "\\xG0" }; 59 60 final static int[] flagsSet = { Pattern.CASE_INSENSITIVE, 61 Pattern.MULTILINE, Pattern.DOTALL, Pattern.UNICODE_CASE 62 /* , Pattern.CANON_EQ */ }; 63 64 /* 65 * Based on RI implenetation documents. Need to check this set regarding 66 * actual implementation. 67 */ 68 final static int[] wrongFlagsSet = { 256, 512, 1024 }; 69 70 final static int DEFAULT_FLAGS = 0; 71 72 public void testMatcher() { 73 // some very simple test 74 Pattern p = Pattern.compile("a"); 75 assertNotNull(p.matcher("bcde")); 76 assertNotSame(p.matcher("a"), p.matcher("a")); 77 } 78 79 public void testSplitCharSequenceInt() { 80 // splitting CharSequence which ends with pattern 81 // bug6193 82 assertEquals(",,".split(",", 3).length, 3); 83 assertEquals(",,".split(",", 4).length, 3); 84 // bug6193 85 // bug5391 86 assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5); 87 assertEquals(Pattern.compile("b").split("ab", -1).length, 2); 88 // bug5391 89 String s[]; 90 Pattern pat = Pattern.compile("x"); 91 s = pat.split("zxx:zzz:zxx", 10); 92 assertEquals(s.length, 5); 93 s = pat.split("zxx:zzz:zxx", 3); 94 assertEquals(s.length, 3); 95 s = pat.split("zxx:zzz:zxx", -1); 96 assertEquals(s.length, 5); 97 s = pat.split("zxx:zzz:zxx", 0); 98 assertEquals(s.length, 3); 99 // other splitting 100 // negative limit 101 pat = Pattern.compile("b"); 102 s = pat.split("abccbadfebb", -1); 103 assertEquals(s.length, 5); 104 s = pat.split("", -1); 105 assertEquals(s.length, 1); 106 pat = Pattern.compile(""); 107 s = pat.split("", -1); 108 assertEquals(s.length, 1); 109 s = pat.split("abccbadfe", -1); 110 assertEquals(s.length, 11); 111 // zero limit 112 pat = Pattern.compile("b"); 113 s = pat.split("abccbadfebb", 0); 114 assertEquals(s.length, 3); 115 s = pat.split("", 0); 116 assertEquals(s.length, 1); 117 pat = Pattern.compile(""); 118 s = pat.split("", 0); 119 assertEquals(s.length, 1); 120 s = pat.split("abccbadfe", 0); 121 assertEquals(s.length, 10); 122 // positive limit 123 pat = Pattern.compile("b"); 124 s = pat.split("abccbadfebb", 12); 125 assertEquals(s.length, 5); 126 s = pat.split("", 6); 127 assertEquals(s.length, 1); 128 pat = Pattern.compile(""); 129 s = pat.split("", 11); 130 assertEquals(s.length, 1); 131 s = pat.split("abccbadfe", 15); 132 assertEquals(s.length, 11); 133 134 pat = Pattern.compile("b"); 135 s = pat.split("abccbadfebb", 5); 136 assertEquals(s.length, 5); 137 s = pat.split("", 1); 138 assertEquals(s.length, 1); 139 pat = Pattern.compile(""); 140 s = pat.split("", 1); 141 assertEquals(s.length, 1); 142 s = pat.split("abccbadfe", 11); 143 assertEquals(s.length, 11); 144 145 pat = Pattern.compile("b"); 146 s = pat.split("abccbadfebb", 3); 147 assertEquals(s.length, 3); 148 pat = Pattern.compile(""); 149 s = pat.split("abccbadfe", 5); 150 assertEquals(s.length, 5); 151 } 152 153 public void testSplitCharSequence() { 154 String s[]; 155 Pattern pat = Pattern.compile("b"); 156 s = pat.split("abccbadfebb"); 157 assertEquals(s.length, 3); 158 s = pat.split(""); 159 assertEquals(s.length, 1); 160 pat = Pattern.compile(""); 161 s = pat.split(""); 162 assertEquals(s.length, 1); 163 s = pat.split("abccbadfe"); 164 assertEquals(s.length, 10); 165 // bug6544 166 String s1 = ""; 167 String[] arr = s1.split(":"); 168 assertEquals(arr.length, 1); 169 // bug6544 170 } 171 172 public void testPattern() { 173 /* Positive assertion test. */ 174 for (String aPattern : testPatterns) { 175 Pattern p = Pattern.compile(aPattern); 176 try { 177 assertTrue(p.pattern().equals(aPattern)); 178 } catch (Exception e) { 179 fail("Unexpected exception: " + e); 180 } 181 } 182 } 183 184 public void testCompile() { 185 /* Positive assertion test. */ 186 for (String aPattern : testPatterns) { 187 try { 188 Pattern p = Pattern.compile(aPattern); 189 } catch (Exception e) { 190 fail("Unexpected exception: " + e); 191 } 192 } 193 194 /* Positive assertion test with alternative templates. */ 195 for (String aPattern : testPatternsAlt) { 196 try { 197 Pattern p = Pattern.compile(aPattern); 198 } catch (Exception e) { 199 fail("Unexpected exception: " + e); 200 } 201 } 202 203 /* Negative assertion test. */ 204 for (String aPattern : wrongTestPatterns) { 205 try { 206 Pattern p = Pattern.compile(aPattern); 207 fail("PatternSyntaxException is expected"); 208 } catch (PatternSyntaxException pse) { 209 /* OKAY */ 210 } catch (Exception e) { 211 fail("Unexpected exception: " + e); 212 } 213 } 214 } 215 216 public void testFlags() { 217 String baseString; 218 String testString; 219 Pattern pat; 220 Matcher mat; 221 222 baseString = "((?i)|b)a"; 223 testString = "A"; 224 pat = Pattern.compile(baseString); 225 mat = pat.matcher(testString); 226 assertFalse(mat.matches()); 227 228 baseString = "(?i)a|b"; 229 testString = "A"; 230 pat = Pattern.compile(baseString); 231 mat = pat.matcher(testString); 232 assertTrue(mat.matches()); 233 234 baseString = "(?i)a|b"; 235 testString = "B"; 236 pat = Pattern.compile(baseString); 237 mat = pat.matcher(testString); 238 assertTrue(mat.matches()); 239 240 baseString = "c|(?i)a|b"; 241 testString = "B"; 242 pat = Pattern.compile(baseString); 243 mat = pat.matcher(testString); 244 assertTrue(mat.matches()); 245 246 baseString = "(?i)a|(?s)b"; 247 testString = "B"; 248 pat = Pattern.compile(baseString); 249 mat = pat.matcher(testString); 250 assertTrue(mat.matches()); 251 252 baseString = "(?i)a|(?-i)b"; 253 testString = "B"; 254 pat = Pattern.compile(baseString); 255 mat = pat.matcher(testString); 256 assertFalse(mat.matches()); 257 258 baseString = "(?i)a|(?-i)c|b"; 259 testString = "B"; 260 pat = Pattern.compile(baseString); 261 mat = pat.matcher(testString); 262 assertFalse(mat.matches()); 263 264 baseString = "(?i)a|(?-i)c|(?i)b"; 265 testString = "B"; 266 pat = Pattern.compile(baseString); 267 mat = pat.matcher(testString); 268 assertTrue(mat.matches()); 269 270 baseString = "(?i)a|(?-i)b"; 271 testString = "A"; 272 pat = Pattern.compile(baseString); 273 mat = pat.matcher(testString); 274 assertTrue(mat.matches()); 275 276 baseString = "((?i))a"; 277 testString = "A"; 278 pat = Pattern.compile(baseString); 279 mat = pat.matcher(testString); 280 assertFalse(mat.matches()); 281 282 baseString = "|(?i)|a"; 283 testString = "A"; 284 pat = Pattern.compile(baseString); 285 mat = pat.matcher(testString); 286 assertTrue(mat.matches()); 287 288 baseString = "(?i)((?s)a.)"; 289 testString = "A\n"; 290 pat = Pattern.compile(baseString); 291 mat = pat.matcher(testString); 292 assertTrue(mat.matches()); 293 294 baseString = "(?i)((?-i)a)"; 295 testString = "A"; 296 pat = Pattern.compile(baseString); 297 mat = pat.matcher(testString); 298 assertFalse(mat.matches()); 299 300 baseString = "(?i)(?s:a.)"; 301 testString = "A\n"; 302 pat = Pattern.compile(baseString); 303 mat = pat.matcher(testString); 304 assertTrue(mat.matches()); 305 306 baseString = "(?i)fgh(?s:aa)"; 307 testString = "fghAA"; 308 pat = Pattern.compile(baseString); 309 mat = pat.matcher(testString); 310 assertTrue(mat.matches()); 311 312 baseString = "(?i)((?-i))a"; 313 testString = "A"; 314 pat = Pattern.compile(baseString); 315 mat = pat.matcher(testString); 316 assertTrue(mat.matches()); 317 318 baseString = "abc(?i)d"; 319 testString = "ABCD"; 320 pat = Pattern.compile(baseString); 321 mat = pat.matcher(testString); 322 assertFalse(mat.matches()); 323 324 testString = "abcD"; 325 mat = pat.matcher(testString); 326 assertTrue(mat.matches()); 327 328 baseString = "a(?i)a(?-i)a(?i)a(?-i)a"; 329 testString = "aAaAa"; 330 pat = Pattern.compile(baseString); 331 mat = pat.matcher(testString); 332 assertTrue(mat.matches()); 333 334 testString = "aAAAa"; 335 mat = pat.matcher(testString); 336 assertFalse(mat.matches()); 337 } 338 339 // BEGIN android-removed 340 // The flags() method should only return those flags that were explicitly 341 // passed during the compilation. The JDK also accepts the ones implicitly 342 // contained in the pattern, but ICU doesn't do this. 343 // 344 // public void testFlagsMethod() { 345 // String baseString; 346 // Pattern pat; 347 // 348 // /* 349 // * These tests are for compatibility with RI only. Logically we have to 350 // * return only flags specified during the compilation. For example 351 // * pat.flags() == 0 when we compile Pattern pat = 352 // * Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled 353 // * in a case insensitive manner. So there is little sense to do calls to 354 // * flags() now. 355 // */ 356 // baseString = "(?-i)"; 357 // pat = Pattern.compile(baseString); 358 // 359 // baseString = "(?idmsux)abc(?-i)vg(?-dmu)"; 360 // pat = Pattern.compile(baseString); 361 // assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS); 362 // 363 // baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)"; 364 // pat = Pattern.compile(baseString); 365 // assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS); 366 // 367 // baseString = "(?is)a((?x)b.)"; 368 // pat = Pattern.compile(baseString); 369 // assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE); 370 // 371 // baseString = "(?i)a((?-i))"; 372 // pat = Pattern.compile(baseString); 373 // assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE); 374 // 375 // baseString = "((?i)a)"; 376 // pat = Pattern.compile(baseString); 377 // assertEquals(pat.flags(), 0); 378 // 379 // pat = Pattern.compile("(?is)abc"); 380 // assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL); 381 // } 382 //END android-removed 383 384 /* 385 * Check default flags when they are not specified in pattern. Based on RI 386 * since could not find that info 387 */ 388 public void testFlagsCompileDefault() { 389 for (String pat : testPatternsAlt) { 390 try { 391 Pattern p = Pattern.compile(pat); 392 assertEquals(p.flags(), DEFAULT_FLAGS); 393 } catch (Exception e) { 394 fail("Unexpected exception: " + e); 395 } 396 } 397 } 398 399 /* 400 * Check that flags specified during compile are set properly This is a 401 * simple implementation that does not use flags combinations. Need to 402 * improve. 403 */ 404 public void testFlagsCompileValid() { 405 for (String pat : testPatternsAlt) { 406 for (int flags : flagsSet) { 407 try { 408 Pattern p = Pattern.compile(pat, flags); 409 assertEquals(p.flags(), flags); 410 } catch (Exception e) { 411 fail("Unexpected exception: " + e); 412 } 413 } 414 } 415 } 416 417 public void testCompileStringInt() { 418 /* 419 * these tests are needed to verify that appropriate exceptions are 420 * thrown 421 */ 422 String pattern = "b)a"; 423 try { 424 Pattern.compile(pattern); 425 fail("Expected a PatternSyntaxException when compiling pattern: " 426 + pattern); 427 } catch (PatternSyntaxException e) { 428 // pass 429 } 430 pattern = "bcde)a"; 431 try { 432 Pattern.compile(pattern); 433 fail("Expected a PatternSyntaxException when compiling pattern: " 434 + pattern); 435 } catch (PatternSyntaxException e) { 436 // pass 437 } 438 pattern = "bbg())a"; 439 try { 440 Pattern pat = Pattern.compile(pattern); 441 fail("Expected a PatternSyntaxException when compiling pattern: " 442 + pattern); 443 } catch (PatternSyntaxException e) { 444 // pass 445 } 446 447 pattern = "cdb(?i))a"; 448 try { 449 Pattern pat = Pattern.compile(pattern); 450 fail("Expected a PatternSyntaxException when compiling pattern: " 451 + pattern); 452 } catch (PatternSyntaxException e) { 453 // pass 454 } 455 456 /* 457 * This pattern should compile - HARMONY-2127 458 * icu4c doesn't support canonical equivalence. 459 */ 460 // pattern = "x(?c)y"; 461 // Pattern.compile(pattern); 462 463 /* 464 * this pattern doesn't match any string, but should be compiled anyway 465 */ 466 pattern = "(b\\1)a"; 467 Pattern.compile(pattern); 468 } 469 470 /* 471 * Class under test for Pattern compile(String) 472 */ 473 public void testQuantCompileNeg() { 474 String[] patterns = { "5{,2}", "{5asd", "{hgdhg", "{5,hjkh", "{,5hdsh", 475 "{5,3shdfkjh}" }; 476 for (String element : patterns) { 477 try { 478 Pattern.compile(element); 479 fail("PatternSyntaxException was expected, but compilation succeeds"); 480 } catch (PatternSyntaxException pse) { 481 continue; 482 } 483 } 484 // Regression for HARMONY-1365 485 // BEGIN android-changed 486 // Original regex contained some illegal stuff. Changed it slightly, 487 // while maintaining the wicked character of this "mother of all 488 // regexes". 489 // String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\G*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\B*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]*+)|(?x-xd:^{5}+)()"; 490 String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\.*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\.*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]{1,5})|(?x-xd:^{5}+)()"; 491 // END android-changed 492 assertNotNull(Pattern.compile(pattern)); 493 } 494 495 public void testQuantCompilePos() { 496 String[] patterns = {/* "(abc){1,3}", */"abc{2,}", "abc{5}" }; 497 for (String element : patterns) { 498 Pattern.compile(element); 499 } 500 } 501 502 public void testQuantComposition() { 503 String pattern = "(a{1,3})aab"; 504 java.util.regex.Pattern pat = java.util.regex.Pattern.compile(pattern); 505 java.util.regex.Matcher mat = pat.matcher("aaab"); 506 mat.matches(); 507 mat.start(1); 508 mat.group(1); 509 } 510 511 public void testMatches() { 512 String[][] posSeq = { 513 { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" }, 514 { "213567", "12324567", "1234567", "213213567", 515 "21312312312567", "444444567" }, 516 { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" }, 517 { "213234567", "3458", "0987654", "7689546432", "0398576", 518 "98432", "5" }, 519 { 520 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", 521 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 522 + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" }, 523 { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa", 524 "abbbAbbbliceaaa", "Alice" }, 525 { "a123", "bnxnvgds156", "for", "while", "if", "struct" }, 526 { "xy" }, { "xy" }, { "xcy" } 527 528 }; 529 530 for (int i = 0; i < testPatterns.length; i++) { 531 for (int j = 0; j < posSeq[i].length; j++) { 532 assertTrue("Incorrect match: " + testPatterns[i] + " vs " 533 + posSeq[i][j], Pattern.matches(testPatterns[i], 534 posSeq[i][j])); 535 } 536 } 537 } 538 539 public void testMatchesException() { 540 /* Negative assertion test. */ 541 for (String aPattern : wrongTestPatterns) { 542 try { 543 Pattern.matches(aPattern, "Foo"); 544 fail("PatternSyntaxException is expected"); 545 } catch (PatternSyntaxException pse) { 546 /* OKAY */ 547 } catch (Exception e) { 548 fail("Unexpected exception: " + e); 549 } 550 } 551 } 552 553 public void testTimeZoneIssue() { 554 Pattern p = Pattern.compile("GMT(\\+|\\-)(\\d+)(:(\\d+))?"); 555 Matcher m = p.matcher("GMT-9:45"); 556 assertTrue(m.matches()); 557 assertEquals("-", m.group(1)); 558 assertEquals("9", m.group(2)); 559 assertEquals(":45", m.group(3)); 560 assertEquals("45", m.group(4)); 561 } 562 563 // BEGIN android-changed 564 // Removed one pattern that is buggy on the JDK. We don't want to duplicate that. 565 public void testCompileRanges() { 566 String[] correctTestPatterns = { "[^]*abb]*", /* "[^a-d[^m-p]]*abb", */ 567 "[a-d\\d]*abb", "[abc]*abb", "[a-e&&[de]]*abb", "[^abc]*abb", 568 "[a-e&&[^de]]*abb", "[a-z&&[^m-p]]*abb", "[a-d[m-p]]*abb", 569 "[a-zA-Z]*abb", "[+*?]*abb", "[^+*?]*abb" }; 570 571 String[] inputSecuence = { "kkkk", /* "admpabb", */ "abcabcd124654abb", 572 "abcabccbacababb", "dededededededeedabb", "gfdhfghgdfghabb", 573 "accabacbcbaabb", "acbvfgtyabb", "adbcacdbmopabcoabb", 574 "jhfkjhaSDFGHJkdfhHNJMjkhfabb", "+*??+*abb", "sdfghjkabb" }; 575 576 Pattern pat; 577 578 for (int i = 0; i < correctTestPatterns.length; i++) { 579 assertTrue("pattern: " + correctTestPatterns[i] + " input: " 580 + inputSecuence[i], Pattern.matches(correctTestPatterns[i], 581 inputSecuence[i])); 582 583 } 584 585 String[] wrongInputSecuence = { "]", /* "admpkk", */ "abcabcd124k654abb", 586 "abwcabccbacababb", "abababdeababdeabb", "abcabcacbacbabb", 587 "acdcbecbaabb", "acbotyabb", "adbcaecdbmopabcoabb", 588 "jhfkjhaSDFGHJk;dfhHNJMjkhfabb", "+*?a?+*abb", "sdf+ghjkabb" }; 589 590 for (int i = 0; i < correctTestPatterns.length; i++) { 591 assertFalse("pattern: " + correctTestPatterns[i] + " input: " 592 + wrongInputSecuence[i], Pattern.matches( 593 correctTestPatterns[i], wrongInputSecuence[i])); 594 595 } 596 } 597 598 public void testRangesSpecialCases() { 599 String neg_patterns[] = { "[a-&&[b-c]]", "[a-\\w]", "[b-a]", "[]" }; 600 601 for (String element : neg_patterns) { 602 try { 603 Pattern.compile(element); 604 fail("PatternSyntaxException was expected: " + element); 605 } catch (PatternSyntaxException pse) { 606 } 607 } 608 609 String pos_patterns[] = { "[-]+", "----", "[a-]+", "a-a-a-a-aa--", 610 "[\\w-a]+", "123-2312--aaa-213", "[a-]]+", "-]]]]]]]]]]]]]]]" }; 611 612 for (int i = 0; i < pos_patterns.length; i++) { 613 String pat = pos_patterns[i++]; 614 String inp = pos_patterns[i]; 615 assertTrue("pattern: " + pat + " input: " + inp, Pattern.matches( 616 pat, inp)); 617 } 618 } 619 // END android-changed 620 621 public void testZeroSymbols() { 622 assertTrue(Pattern.matches("[\0]*abb", "\0\0\0\0\0\0abb")); 623 } 624 625 public void testEscapes() { 626 Pattern pat = Pattern.compile("\\Q{]()*?"); 627 Matcher mat = pat.matcher("{]()*?"); 628 629 assertTrue(mat.matches()); 630 } 631 632 public void test_bug_181() { 633 Pattern.compile("[\\t-\\r]"); 634 } 635 636 // https://code.google.com/p/android/issues/detail?id=40103 637 public void test_bug_40103() { 638 Pattern.compile("(?<!abc {1,100}|def {1,100}|ghi {1,100})jkl"); 639 640 // Looks like harmony had a similar "Bug187"... 641 Pattern.compile("|(?idmsux-idmsux)|(?idmsux-idmsux)|[^|\\[-\\0274|\\,-\\\\[^|W\\}\\nq\\x65\\002\\xFE\\05\\06\\00\\x66\\x47i\\,\\xF2\\=\\06\\u0EA4\\x9B\\x3C\\f\\|\\{\\xE5\\05\\r\\u944A\\xCA\\e|\\x19\\04\\x07\\04\\u607B\\023\\0073\\x91Tr\\0150\\x83]]?(?idmsux-idmsux:\\p{Alpha}{7}?)||(?<=[^\\uEC47\\01\\02\\u3421\\a\\f\\a\\013q\\035w\\e])(?<=\\p{Punct}{0,}?)(?=^\\p{Lower})(?!\\b{8,14})(?<![|\\00-\\0146[^|\\04\\01\\04\\060\\f\\u224DO\\x1A\\xC4\\00\\02\\0315\\0351\\u84A8\\xCBt\\xCC\\06|\\0141\\00\\=\\e\\f\\x6B\\0026Tb\\040\\x76xJ&&[\\\\-\\]\\05\\07\\02\\u2DAF\\t\\x9C\\e\\0023\\02\\,X\\e|\\u6058flY\\u954C]]]{5}?)(?<=\\p{Sc}{8}+)[^|\\026-\\u89BA|o\\u6277\\t\\07\\x50&&\\p{Punct}]{8,14}+((?<=^\\p{Punct})|(?idmsux-idmsux)||(?>[\\x3E-\\]])|(?idmsux-idmsux:\\p{Punct})|(?<![\\0111\\0371\\xDF\\u6A49\\07\\u2A4D\\00\\0212\\02Xd-\\xED[^\\a-\\0061|\\0257\\04\\f\\[\\0266\\043\\03\\x2D\\042&&[^\\f-\\]&&\\s]]])|(?>[|\\n\\042\\uB09F\\06\\u0F2B\\uC96D\\x89\\uC166\\xAA|\\04-\\][^|\\a\\|\\rx\\04\\uA770\\n\\02\\t\\052\\056\\0274\\|\\=\\07\\e|\\00-\\x1D&&[^\\005\\uB15B\\uCDAC\\n\\x74\\0103\\0147\\uD91B\\n\\062G\\u9B4B\\077\\}\\0324&&[^\\0302\\,\\0221\\04\\u6D16\\04xy\\uD193\\[\\061\\06\\045\\x0F|\\e\\xBB\\f\\u1B52\\023\\u3AD2\\033\\007\\022\\}\\x66\\uA63FJ-\\0304]]]]{0,0})||(?<![^|\\0154U\\u0877\\03\\fy\\n\\|\\0147\\07-\\=[|q\\u69BE\\0243\\rp\\053\\02\\x33I\\u5E39\\u9C40\\052-\\xBC[|\\0064-\\?|\\uFC0C\\x30\\0060\\x45\\\\\\02\\?p\\xD8\\0155\\07\\0367\\04\\uF07B\\000J[^|\\0051-\\{|\\u9E4E\\u7328\\]\\u6AB8\\06\\x71\\a\\]\\e\\|KN\\u06AA\\0000\\063\\u2523&&[\\005\\0277\\x41U\\034\\}R\\u14C7\\u4767\\x09\\n\\054Ev\\0144\\<\\f\\,Q-\\xE4]]]]]{3}+)|(?>^+)|(?![^|\\|\\nJ\\t\\<\\04E\\\\\\t\\01\\\\\\02\\|\\=\\}\\xF3\\uBEC2\\032K\\014\\uCC5F\\072q\\|\\0153\\xD9\\0322\\uC6C8[^\\t\\0342\\x34\\x91\\06\\{\\xF1\\a\\u1710\\?\\xE7\\uC106\\02pF\\<&&[^|\\]\\064\\u381D\\u50CF\\eO&&[^|\\06\\x2F\\04\\045\\032\\u8536W\\0377\\0017|\\x06\\uE5FA\\05\\xD4\\020\\04c\\xFC\\02H\\x0A\\r]]]]+?)(?idmsux-idmsux)|(?<![|\\r-\\,&&[I\\t\\r\\0201\\xDB\\e&&[^|\\02\\06\\00\\<\\a\\u7952\\064\\051\\073\\x41\\?n\\040\\0053\\031&&[\\x15-\\|]]]]{8,11}?)(?![^|\\<-\\uA74B\\xFA\\u7CD2\\024\\07n\\<\\x6A\\0042\\uE4FF\\r\\u896B\\[\\=\\042Y&&^\\p{ASCII}]++)|(?<![R-\\|&&[\\a\\0120A\\u6145\\<\\050-d[|\\e-\\uA07C|\\016-\\u80D9]]]{1,}+)|(?idmsux-idmsux)|(?idmsux-idmsux)|(?idmsux-idmsux:\\B{6,}?)|(?<=\\D{5,8}?)|(?>[\\{-\\0207|\\06-\\0276\\p{XDigit}])(?idmsux-idmsux:[^|\\x52\\0012\\]u\\xAD\\0051f\\0142\\\\l\\|\\050\\05\\f\\t\\u7B91\\r\\u7763\\{|h\\0104\\a\\f\\0234\\u2D4F&&^\\P{InGreek}]))"); 642 } 643 644 public void test_bug_4472() { 645 // HARMONY-4472 646 Pattern.compile("a*.+"); 647 } 648 649 public void test_bug_5858() { 650 // HARMONY-5858 651 Pattern.compile("\\u6211", Pattern.LITERAL); 652 } 653 654 public void testOrphanQuantifiers() { 655 try { 656 Pattern.compile("+++++"); 657 fail("PatternSyntaxException expected"); 658 } catch (PatternSyntaxException pse) { 659 } 660 } 661 662 public void testOrphanQuantifiers2() { 663 try { 664 Pattern pat = Pattern.compile("\\d+*"); 665 fail("PatternSyntaxException expected"); 666 } catch (PatternSyntaxException pse) { 667 } 668 } 669 670 public void testBug197() { 671 Object[] vals = { ":", new Integer(2), 672 new String[] { "boo", "and:foo" }, ":", new Integer(5), 673 new String[] { "boo", "and", "foo" }, ":", new Integer(-2), 674 new String[] { "boo", "and", "foo" }, ":", new Integer(3), 675 new String[] { "boo", "and", "foo" }, ":", new Integer(1), 676 new String[] { "boo:and:foo" }, "o", new Integer(5), 677 new String[] { "b", "", ":and:f", "", "" }, "o", 678 new Integer(4), new String[] { "b", "", ":and:f", "o" }, "o", 679 new Integer(-2), new String[] { "b", "", ":and:f", "", "" }, 680 "o", new Integer(0), new String[] { "b", "", ":and:f" } }; 681 682 for (int i = 0; i < vals.length / 3;) { 683 String[] res = Pattern.compile(vals[i++].toString()).split( 684 "boo:and:foo", ((Integer) vals[i++]).intValue()); 685 String[] expectedRes = (String[]) vals[i++]; 686 687 assertEquals(expectedRes.length, res.length); 688 689 for (int j = 0; j < expectedRes.length; j++) { 690 assertEquals(expectedRes[j], res[j]); 691 } 692 } 693 } 694 695 public void testURIPatterns() { 696 String URI_REGEXP_STR = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; 697 String SCHEME_REGEXP_STR = "^[a-zA-Z]{1}[\\w+-.]+$"; 698 String REL_URI_REGEXP_STR = "^(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; 699 String IPV6_REGEXP_STR = "^[0-9a-fA-F\\:\\.]+(\\%\\w+)?$"; 700 String IPV6_REGEXP_STR2 = "^\\[[0-9a-fA-F\\:\\.]+(\\%\\w+)?\\]$"; 701 String IPV4_REGEXP_STR = "^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$"; 702 String HOSTNAME_REGEXP_STR = "\\w+[\\w\\-\\.]*"; 703 704 Pattern.compile(URI_REGEXP_STR); 705 Pattern.compile(REL_URI_REGEXP_STR); 706 Pattern.compile(SCHEME_REGEXP_STR); 707 Pattern.compile(IPV4_REGEXP_STR); 708 Pattern.compile(IPV6_REGEXP_STR); 709 Pattern.compile(IPV6_REGEXP_STR2); 710 Pattern.compile(HOSTNAME_REGEXP_STR); 711 } 712 713 public void testFindBoundaryCases1() { 714 Pattern pat = Pattern.compile(".*\n"); 715 Matcher mat = pat.matcher("a\n"); 716 717 mat.find(); 718 assertEquals("a\n", mat.group()); 719 } 720 721 public void testFindBoundaryCases2() { 722 Pattern pat = Pattern.compile(".*A"); 723 Matcher mat = pat.matcher("aAa"); 724 725 mat.find(); 726 assertEquals("aA", mat.group()); 727 } 728 729 public void testFindBoundaryCases3() { 730 Pattern pat = Pattern.compile(".*A"); 731 Matcher mat = pat.matcher("a\naA\n"); 732 733 mat.find(); 734 assertEquals("aA", mat.group()); 735 } 736 737 public void testFindBoundaryCases4() { 738 Pattern pat = Pattern.compile("A.*"); 739 Matcher mat = pat.matcher("A\n"); 740 741 mat.find(); 742 assertEquals("A", mat.group()); 743 } 744 745 public void testFindBoundaryCases5() { 746 Pattern pat = Pattern.compile(".*A.*"); 747 Matcher mat = pat.matcher("\nA\naaa\nA\naaAaa\naaaA\n"); 748 // Matcher mat = pat.matcher("\nA\n"); 749 String[] res = { "A", "A", "aaAaa", "aaaA" }; 750 int k = 0; 751 for (; mat.find(); k++) { 752 assertEquals(res[k], mat.group()); 753 } 754 } 755 756 public void testFindBoundaryCases6() { 757 String[] res = { "", "a", "", "" }; 758 Pattern pat = Pattern.compile(".*"); 759 Matcher mat = pat.matcher("\na\n"); 760 int k = 0; 761 for (; mat.find(); k++) { 762 assertEquals(res[k], mat.group()); 763 } 764 assertEquals(4, k); 765 } 766 767 public void testBackReferences() { 768 Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))"); 769 Matcher mat = pat.matcher("(start1: word :start1)(start2: word :start2)"); 770 int k = 1; 771 for (; mat.find(); k++) { 772 assertEquals("start" + k, mat.group(2)); 773 assertEquals(" word ", mat.group(3)); 774 assertEquals("start" + k, mat.group(4)); 775 } 776 777 assertEquals(3, k); 778 pat = Pattern.compile(".*(.)\\1"); 779 mat = pat.matcher("saa"); 780 assertTrue(mat.matches()); 781 } 782 783 public void testNewLine() { 784 Pattern pat = Pattern.compile("(^$)*\n", Pattern.MULTILINE); 785 Matcher mat = pat.matcher("\r\n\n"); 786 int counter = 0; 787 while (mat.find()) { 788 counter++; 789 } 790 assertEquals(2, counter); 791 } 792 793 public void testFindGreedy() { 794 Pattern pat = Pattern.compile(".*aaa", Pattern.DOTALL); 795 Matcher mat = pat.matcher("aaaa\naaa\naaaaaa"); 796 mat.matches(); 797 assertEquals(15, mat.end()); 798 } 799 800 public void testSerialization() throws Exception { 801 Pattern pat = Pattern.compile("a*bc"); 802 SerializableAssert comparator = new SerializableAssert() { 803 public void assertDeserialized(Serializable initial, 804 Serializable deserialized) { 805 assertEquals(((Pattern) initial).toString(), 806 ((Pattern) deserialized).toString()); 807 } 808 }; 809 SerializationTest.verifyGolden(this, pat, comparator); 810 SerializationTest.verifySelf(pat, comparator); 811 } 812 813 public void testSOLQuant() { 814 Pattern pat = Pattern.compile("$*", Pattern.MULTILINE); 815 Matcher mat = pat.matcher("\n\n"); 816 int counter = 0; 817 while (mat.find()) { 818 counter++; 819 } 820 821 assertEquals(3, counter); 822 } 823 824 public void testIllegalEscape() { 825 try { 826 Pattern.compile("\\y"); 827 fail("PatternSyntaxException expected"); 828 } catch (PatternSyntaxException pse) { 829 } 830 } 831 832 public void testEmptyFamily() { 833 Pattern.compile("\\p{Lower}"); 834 } 835 836 public void testNonCaptConstr() { 837 // Flags 838 Pattern pat = Pattern.compile("(?i)b*(?-i)a*"); 839 assertTrue(pat.matcher("bBbBaaaa").matches()); 840 assertFalse(pat.matcher("bBbBAaAa").matches()); 841 842 // Non-capturing groups 843 pat = Pattern.compile("(?i:b*)a*"); 844 assertTrue(pat.matcher("bBbBaaaa").matches()); 845 assertFalse(pat.matcher("bBbBAaAa").matches()); 846 847 pat = Pattern 848 // 1 2 3 4 5 6 7 8 9 10 11 849 .compile("(?:-|(-?\\d+\\d\\d\\d))?(?:-|-(\\d\\d))?(?:-|-(\\d\\d))?(T)?(?:(\\d\\d):(\\d\\d):(\\d\\d)(\\.\\d+)?)?(?:(?:((?:\\+|\\-)\\d\\d):(\\d\\d))|(Z))?"); 850 Matcher mat = pat.matcher("-1234-21-31T41:51:61.789+71:81"); 851 assertTrue(mat.matches()); 852 assertEquals("-1234", mat.group(1)); 853 assertEquals("21", mat.group(2)); 854 assertEquals("31", mat.group(3)); 855 assertEquals("T", mat.group(4)); 856 assertEquals("41", mat.group(5)); 857 assertEquals("51", mat.group(6)); 858 assertEquals("61", mat.group(7)); 859 assertEquals(".789", mat.group(8)); 860 assertEquals("+71", mat.group(9)); 861 assertEquals("81", mat.group(10)); 862 863 // positive lookahead 864 pat = Pattern.compile(".*\\.(?=log$).*$"); 865 assertTrue(pat.matcher("a.b.c.log").matches()); 866 assertFalse(pat.matcher("a.b.c.log.").matches()); 867 868 // negative lookahead 869 pat = Pattern.compile(".*\\.(?!log$).*$"); 870 assertFalse(pat.matcher("abc.log").matches()); 871 assertTrue(pat.matcher("abc.logg").matches()); 872 873 // positive lookbehind 874 pat = Pattern.compile(".*(?<=abc)\\.log$"); 875 assertFalse(pat.matcher("cde.log").matches()); 876 assertTrue(pat.matcher("abc.log").matches()); 877 878 // negative lookbehind 879 pat = Pattern.compile(".*(?<!abc)\\.log$"); 880 assertTrue(pat.matcher("cde.log").matches()); 881 assertFalse(pat.matcher("abc.log").matches()); 882 883 // atomic group 884 pat = Pattern.compile("(?>a*)abb"); 885 assertFalse(pat.matcher("aaabb").matches()); 886 pat = Pattern.compile("(?>a*)bb"); 887 assertTrue(pat.matcher("aaabb").matches()); 888 889 pat = Pattern.compile("(?>a|aa)aabb"); 890 assertTrue(pat.matcher("aaabb").matches()); 891 pat = Pattern.compile("(?>aa|a)aabb"); 892 assertFalse(pat.matcher("aaabb").matches()); 893 894 // BEGIN android-removed 895 // Questionable constructs that ICU doesn't support. 896 // // quantifiers over look ahead 897 // pat = Pattern.compile(".*(?<=abc)*\\.log$"); 898 // assertTrue(pat.matcher("cde.log").matches()); 899 // pat = Pattern.compile(".*(?<=abc)+\\.log$"); 900 // assertFalse(pat.matcher("cde.log").matches()); 901 // END android-removed 902 903 } 904 905 public void testCorrectReplacementBackreferencedJointSet() { 906 Pattern.compile("ab(a)*\\1"); 907 Pattern.compile("abc(cd)fg"); 908 Pattern.compile("aba*cd"); 909 Pattern.compile("ab(a)*+cd"); 910 Pattern.compile("ab(a)*?cd"); 911 Pattern.compile("ab(a)+cd"); 912 Pattern.compile(".*(.)\\1"); 913 Pattern.compile("ab((a)|c|d)e"); 914 Pattern.compile("abc((a(b))cd)"); 915 Pattern.compile("ab(a)++cd"); 916 Pattern.compile("ab(a)?(c)d"); 917 Pattern.compile("ab(a)?+cd"); 918 Pattern.compile("ab(a)??cd"); 919 Pattern.compile("ab(a)??cd"); 920 Pattern.compile("ab(a){1,3}?(c)d"); 921 } 922 923 public void testCompilePatternWithTerminatorMark() { 924 Pattern pat = Pattern.compile("a\u0000\u0000cd"); 925 Matcher mat = pat.matcher("a\u0000\u0000cd"); 926 assertTrue(mat.matches()); 927 } 928 929 public void testAlternations() { 930 String baseString = "|a|bc"; 931 Pattern pat = Pattern.compile(baseString); 932 Matcher mat = pat.matcher(""); 933 934 assertTrue(mat.matches()); 935 936 baseString = "a||bc"; 937 pat = Pattern.compile(baseString); 938 mat = pat.matcher(""); 939 assertTrue(mat.matches()); 940 941 baseString = "a|bc|"; 942 pat = Pattern.compile(baseString); 943 mat = pat.matcher(""); 944 assertTrue(mat.matches()); 945 946 baseString = "a|b|"; 947 pat = Pattern.compile(baseString); 948 mat = pat.matcher(""); 949 assertTrue(mat.matches()); 950 951 baseString = "a(|b|cd)e"; 952 pat = Pattern.compile(baseString); 953 mat = pat.matcher("ae"); 954 assertTrue(mat.matches()); 955 956 baseString = "a(b||cd)e"; 957 pat = Pattern.compile(baseString); 958 mat = pat.matcher("ae"); 959 assertTrue(mat.matches()); 960 961 baseString = "a(b|cd|)e"; 962 pat = Pattern.compile(baseString); 963 mat = pat.matcher("ae"); 964 assertTrue(mat.matches()); 965 966 baseString = "a(b|c|)e"; 967 pat = Pattern.compile(baseString); 968 mat = pat.matcher("ae"); 969 assertTrue(mat.matches()); 970 971 baseString = "a(|)e"; 972 pat = Pattern.compile(baseString); 973 mat = pat.matcher("ae"); 974 assertTrue(mat.matches()); 975 976 baseString = "|"; 977 pat = Pattern.compile(baseString); 978 mat = pat.matcher(""); 979 assertTrue(mat.matches()); 980 981 baseString = "a(?:|)e"; 982 pat = Pattern.compile(baseString); 983 mat = pat.matcher("ae"); 984 assertTrue(mat.matches()); 985 986 baseString = "a||||bc"; 987 pat = Pattern.compile(baseString); 988 mat = pat.matcher(""); 989 assertTrue(mat.matches()); 990 991 baseString = "(?i-is)|a"; 992 pat = Pattern.compile(baseString); 993 mat = pat.matcher("a"); 994 assertTrue(mat.matches()); 995 } 996 997 public void testMatchWithGroups() { 998 String baseString = "jwkerhjwehrkwjehrkwjhrwkjehrjwkehrjkwhrkwehrkwhrkwrhwkhrwkjehr"; 999 String pattern = ".*(..).*\\1.*"; 1000 assertTrue(Pattern.compile(pattern).matcher(baseString).matches()); 1001 1002 baseString = "saa"; 1003 pattern = ".*(.)\\1"; 1004 assertTrue(Pattern.compile(pattern).matcher(baseString).matches()); 1005 assertTrue(Pattern.compile(pattern).matcher(baseString).find()); 1006 } 1007 1008 public void testSplitEmptyCharSequence() { 1009 String s1 = ""; 1010 String[] arr = s1.split(":"); 1011 assertEquals(arr.length, 1); 1012 } 1013 1014 public void testSplitEndsWithPattern() { 1015 assertEquals(",,".split(",", 3).length, 3); 1016 assertEquals(",,".split(",", 4).length, 3); 1017 1018 assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5); 1019 assertEquals(Pattern.compile("b").split("ab", -1).length, 2); 1020 } 1021 1022 public void testCaseInsensitiveFlag() { 1023 assertTrue(Pattern.matches("(?i-:AbC)", "ABC")); 1024 } 1025 1026 public void testEmptyGroups() { 1027 Pattern pat = Pattern.compile("ab(?>)cda"); 1028 Matcher mat = pat.matcher("abcda"); 1029 assertTrue(mat.matches()); 1030 1031 pat = Pattern.compile("ab()"); 1032 mat = pat.matcher("ab"); 1033 assertTrue(mat.matches()); 1034 1035 pat = Pattern.compile("abc(?:)(..)"); 1036 mat = pat.matcher("abcgf"); 1037 assertTrue(mat.matches()); 1038 } 1039 1040 public void testEmbeddedFlags() { 1041 String baseString = "(?i)((?s)a)"; 1042 String testString = "A"; 1043 Pattern pat = Pattern.compile(baseString); 1044 Matcher mat = pat.matcher(testString); 1045 assertTrue(mat.matches()); 1046 1047 baseString = "(?x)(?i)(?s)(?d)a"; 1048 testString = "A"; 1049 pat = Pattern.compile(baseString); 1050 mat = pat.matcher(testString); 1051 assertTrue(mat.matches()); 1052 1053 baseString = "(?x)(?i)(?s)(?d)a."; 1054 testString = "a\n"; 1055 pat = Pattern.compile(baseString); 1056 mat = pat.matcher(testString); 1057 assertTrue(mat.matches()); 1058 1059 baseString = "abc(?x:(?i)(?s)(?d)a.)"; 1060 testString = "abcA\n"; 1061 pat = Pattern.compile(baseString); 1062 mat = pat.matcher(testString); 1063 assertTrue(mat.matches()); 1064 1065 baseString = "abc((?x)d)(?i)(?s)a"; 1066 testString = "abcdA"; 1067 pat = Pattern.compile(baseString); 1068 mat = pat.matcher(testString); 1069 assertTrue(mat.matches()); 1070 } 1071 1072 public void testAltWithFlags() { 1073 Pattern.compile("|(?i-xi)|()"); 1074 } 1075 1076 public void testRestoreFlagsAfterGroup() { 1077 String baseString = "abc((?x)d) a"; 1078 String testString = "abcd a"; 1079 Pattern pat = Pattern.compile(baseString); 1080 Matcher mat = pat.matcher(testString); 1081 1082 assertTrue(mat.matches()); 1083 } 1084 1085 /* 1086 * Verify if the Pattern support the following character classes: 1087 * \p{javaLowerCase} \p{javaUpperCase} \p{javaWhitespace} \p{javaMirrored} 1088 */ 1089 public void testCompileCharacterClass() { 1090 // Regression for HARMONY-606, 696 1091 Pattern pattern = Pattern.compile("\\p{javaLowerCase}"); 1092 assertNotNull(pattern); 1093 1094 pattern = Pattern.compile("\\p{javaUpperCase}"); 1095 assertNotNull(pattern); 1096 1097 pattern = Pattern.compile("\\p{javaWhitespace}"); 1098 assertNotNull(pattern); 1099 1100 pattern = Pattern.compile("\\p{javaMirrored}"); 1101 assertNotNull(pattern); 1102 1103 pattern = Pattern.compile("\\p{javaDefined}"); 1104 assertNotNull(pattern); 1105 1106 pattern = Pattern.compile("\\p{javaDigit}"); 1107 assertNotNull(pattern); 1108 1109 pattern = Pattern.compile("\\p{javaIdentifierIgnorable}"); 1110 assertNotNull(pattern); 1111 1112 pattern = Pattern.compile("\\p{javaISOControl}"); 1113 assertNotNull(pattern); 1114 1115 pattern = Pattern.compile("\\p{javaJavaIdentifierPart}"); 1116 assertNotNull(pattern); 1117 1118 pattern = Pattern.compile("\\p{javaJavaIdentifierStart}"); 1119 assertNotNull(pattern); 1120 1121 pattern = Pattern.compile("\\p{javaLetter}"); 1122 assertNotNull(pattern); 1123 1124 pattern = Pattern.compile("\\p{javaLetterOrDigit}"); 1125 assertNotNull(pattern); 1126 1127 pattern = Pattern.compile("\\p{javaSpaceChar}"); 1128 assertNotNull(pattern); 1129 1130 pattern = Pattern.compile("\\p{javaTitleCase}"); 1131 assertNotNull(pattern); 1132 1133 pattern = Pattern.compile("\\p{javaUnicodeIdentifierPart}"); 1134 assertNotNull(pattern); 1135 1136 pattern = Pattern.compile("\\p{javaUnicodeIdentifierStart}"); 1137 assertNotNull(pattern); 1138 } 1139 1140 public void testRangesWithSurrogatesSupplementary() { 1141 String patString = "[abc\uD8D2]"; 1142 String testString = "\uD8D2"; 1143 Pattern pat = Pattern.compile(patString); 1144 Matcher mat = pat.matcher(testString); 1145 assertTrue(mat.matches()); 1146 1147 testString = "a"; 1148 mat = pat.matcher(testString); 1149 assertTrue(mat.matches()); 1150 1151 testString = "ef\uD8D2\uDD71gh"; 1152 mat = pat.matcher(testString); 1153 assertFalse(mat.find()); 1154 1155 testString = "ef\uD8D2gh"; 1156 mat = pat.matcher(testString); 1157 assertTrue(mat.find()); 1158 1159 patString = "[abc\uD8D3&&[c\uD8D3]]"; 1160 testString = "c"; 1161 pat = Pattern.compile(patString); 1162 mat = pat.matcher(testString); 1163 assertTrue(mat.matches()); 1164 1165 testString = "a"; 1166 mat = pat.matcher(testString); 1167 assertFalse(mat.matches()); 1168 1169 testString = "ef\uD8D3\uDD71gh"; 1170 mat = pat.matcher(testString); 1171 assertFalse(mat.find()); 1172 1173 testString = "ef\uD8D3gh"; 1174 mat = pat.matcher(testString); 1175 assertTrue(mat.find()); 1176 1177 patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]"; 1178 testString = "c"; 1179 pat = Pattern.compile(patString); 1180 mat = pat.matcher(testString); 1181 assertTrue(mat.matches()); 1182 1183 testString = "\uDBEE\uDF0C"; 1184 mat = pat.matcher(testString); 1185 assertTrue(mat.matches()); 1186 1187 testString = "ef\uD8D3\uDD71gh"; 1188 mat = pat.matcher(testString); 1189 assertFalse(mat.find()); 1190 1191 testString = "ef\uD8D3gh"; 1192 mat = pat.matcher(testString); 1193 assertTrue(mat.find()); 1194 1195 patString = "[abc\uDBFC]\uDDC2cd"; 1196 testString = "\uDBFC\uDDC2cd"; 1197 pat = Pattern.compile(patString); 1198 mat = pat.matcher(testString); 1199 assertFalse(mat.matches()); 1200 1201 testString = "a\uDDC2cd"; 1202 mat = pat.matcher(testString); 1203 assertTrue(mat.matches()); 1204 } 1205 1206 public void testSequencesWithSurrogatesSupplementary() { 1207 String patString = "abcd\uD8D3"; 1208 String testString = "abcd\uD8D3\uDFFC"; 1209 Pattern pat = Pattern.compile(patString); 1210 Matcher mat = pat.matcher(testString); 1211 // BEGIN android-changed 1212 // This one really doesn't make sense, as the above is a corrupt surrogate. 1213 // Even if it's matched by the JDK, it's more of a bug than of a behavior one 1214 // might want to duplicate. 1215 // assertFalse(mat.find()); 1216 // END android-changed 1217 1218 testString = "abcd\uD8D3abc"; 1219 mat = pat.matcher(testString); 1220 assertTrue(mat.find()); 1221 1222 patString = "ab\uDBEFcd"; 1223 testString = "ab\uDBEFcd"; 1224 pat = Pattern.compile(patString); 1225 mat = pat.matcher(testString); 1226 assertTrue(mat.matches()); 1227 1228 patString = "\uDFFCabcd"; 1229 testString = "\uD8D3\uDFFCabcd"; 1230 pat = Pattern.compile(patString); 1231 mat = pat.matcher(testString); 1232 assertFalse(mat.find()); 1233 1234 testString = "abc\uDFFCabcdecd"; 1235 mat = pat.matcher(testString); 1236 assertTrue(mat.find()); 1237 1238 patString = "\uD8D3\uDFFCabcd"; 1239 testString = "abc\uD8D3\uD8D3\uDFFCabcd"; 1240 pat = Pattern.compile(patString); 1241 mat = pat.matcher(testString); 1242 assertTrue(mat.find()); 1243 } 1244 1245 public void testPredefinedClassesWithSurrogatesSupplementary() { 1246 String patString = "[123\\D]"; 1247 String testString = "a"; 1248 Pattern pat = Pattern.compile(patString); 1249 Matcher mat = pat.matcher(testString); 1250 assertTrue(mat.find()); 1251 1252 testString = "5"; 1253 mat = pat.matcher(testString); 1254 assertFalse(mat.find()); 1255 1256 testString = "3"; 1257 mat = pat.matcher(testString); 1258 assertTrue(mat.find()); 1259 1260 // low surrogate 1261 testString = "\uDFC4"; 1262 mat = pat.matcher(testString); 1263 assertTrue(mat.find()); 1264 1265 // high surrogate 1266 testString = "\uDADA"; 1267 mat = pat.matcher(testString); 1268 assertTrue(mat.find()); 1269 1270 testString = "\uDADA\uDFC4"; 1271 mat = pat.matcher(testString); 1272 assertTrue(mat.find()); 1273 1274 patString = "[123[^\\p{javaDigit}]]"; 1275 testString = "a"; 1276 pat = Pattern.compile(patString); 1277 mat = pat.matcher(testString); 1278 assertTrue(mat.find()); 1279 1280 testString = "5"; 1281 mat = pat.matcher(testString); 1282 assertFalse(mat.find()); 1283 1284 testString = "3"; 1285 mat = pat.matcher(testString); 1286 assertTrue(mat.find()); 1287 1288 // low surrogate 1289 testString = "\uDFC4"; 1290 mat = pat.matcher(testString); 1291 assertTrue(mat.find()); 1292 1293 // high surrogate 1294 testString = "\uDADA"; 1295 mat = pat.matcher(testString); 1296 assertTrue(mat.find()); 1297 1298 testString = "\uDADA\uDFC4"; 1299 mat = pat.matcher(testString); 1300 assertTrue(mat.find()); 1301 1302 // surrogate characters 1303 patString = "\\p{Cs}"; 1304 testString = "\uD916\uDE27"; 1305 pat = Pattern.compile(patString); 1306 mat = pat.matcher(testString); 1307 1308 /* 1309 * see http://www.unicode.org/reports/tr18/#Supplementary_Characters we 1310 * have to treat text as code points not code units. \\p{Cs} matches any 1311 * surrogate character but here testString is a one code point 1312 * consisting of two code units (two surrogate characters) so we find 1313 * nothing 1314 */ 1315 // assertFalse(mat.find()); 1316 // swap low and high surrogates 1317 testString = "\uDE27\uD916"; 1318 mat = pat.matcher(testString); 1319 assertTrue(mat.find()); 1320 1321 patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]"; 1322 testString = "1"; 1323 pat = Pattern.compile(patString); 1324 mat = pat.matcher(testString); 1325 assertTrue(mat.find()); 1326 1327 testString = "\uD916"; 1328 pat = Pattern.compile(patString); 1329 mat = pat.matcher(testString); 1330 assertFalse(mat.find()); 1331 1332 testString = "\uD916\uDE27"; 1333 pat = Pattern.compile(patString); 1334 mat = pat.matcher(testString); 1335 assertTrue(mat.find()); 1336 1337 // \uD9A0\uDE8E=\u7828E 1338 // \u78281=\uD9A0\uDE81 1339 patString = "[a-\uD9A0\uDE8E]"; 1340 testString = "\uD9A0\uDE81"; 1341 pat = Pattern.compile(patString); 1342 mat = pat.matcher(testString); 1343 assertTrue(mat.matches()); 1344 } 1345 1346 public void testDotConstructionWithSurrogatesSupplementary() { 1347 String patString = "."; 1348 String testString = "\uD9A0\uDE81"; 1349 Pattern pat = Pattern.compile(patString); 1350 Matcher mat = pat.matcher(testString); 1351 assertTrue(mat.matches()); 1352 1353 testString = "\uDE81"; 1354 mat = pat.matcher(testString); 1355 assertTrue(mat.matches()); 1356 1357 testString = "\uD9A0"; 1358 mat = pat.matcher(testString); 1359 assertTrue(mat.matches()); 1360 1361 testString = "\n"; 1362 mat = pat.matcher(testString); 1363 assertFalse(mat.matches()); 1364 1365 patString = ".*\uDE81"; 1366 testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81"; 1367 pat = Pattern.compile(patString); 1368 mat = pat.matcher(testString); 1369 assertFalse(mat.matches()); 1370 1371 testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81"; 1372 mat = pat.matcher(testString); 1373 assertTrue(mat.matches()); 1374 1375 patString = ".*"; 1376 testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81"; 1377 pat = Pattern.compile(patString, Pattern.DOTALL); 1378 mat = pat.matcher(testString); 1379 assertTrue(mat.matches()); 1380 } 1381 1382 public void test_quoteLjava_lang_String() { 1383 for (String aPattern : testPatterns) { 1384 Pattern p = Pattern.compile(aPattern); 1385 try { 1386 assertEquals("quote was wrong for plain text", "\\Qtest\\E", p 1387 .quote("test")); 1388 assertEquals("quote was wrong for text with quote sign", 1389 "\\Q\\Qtest\\E", p.quote("\\Qtest")); 1390 assertEquals("quote was wrong for quotted text", 1391 "\\Q\\Qtest\\E\\\\E\\Q\\E", p.quote("\\Qtest\\E")); 1392 } catch (Exception e) { 1393 fail("Unexpected exception: " + e); 1394 } 1395 } 1396 } 1397 1398 public void test_matcherLjava_lang_StringLjava_lang_CharSequence() { 1399 String[][] posSeq = { 1400 { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" }, 1401 { "213567", "12324567", "1234567", "213213567", 1402 "21312312312567", "444444567" }, 1403 { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" }, 1404 { "213234567", "3458", "0987654", "7689546432", "0398576", 1405 "98432", "5" }, 1406 { 1407 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", 1408 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 1409 + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" }, 1410 { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa", 1411 "abbbAbbbliceaaa", "Alice" }, 1412 { "a123", "bnxnvgds156", "for", "while", "if", "struct" }, 1413 { "xy" }, { "xy" }, { "xcy" } 1414 1415 }; 1416 1417 for (int i = 0; i < testPatterns.length; i++) { 1418 for (int j = 0; j < posSeq[i].length; j++) { 1419 assertTrue("Incorrect match: " + testPatterns[i] + " vs " 1420 + posSeq[i][j], Pattern.compile(testPatterns[i]) 1421 .matcher(posSeq[i][j]).matches()); 1422 } 1423 } 1424 } 1425 1426 public void testQuantifiersWithSurrogatesSupplementary() { 1427 String patString = "\uD9A0\uDE81*abc"; 1428 String testString = "\uD9A0\uDE81\uD9A0\uDE81abc"; 1429 Pattern pat = Pattern.compile(patString); 1430 Matcher mat = pat.matcher(testString); 1431 assertTrue(mat.matches()); 1432 1433 testString = "abc"; 1434 mat = pat.matcher(testString); 1435 assertTrue(mat.matches()); 1436 } 1437 1438 public void testAlternationsWithSurrogatesSupplementary() { 1439 String patString = "\uDE81|\uD9A0\uDE81|\uD9A0"; 1440 String testString = "\uD9A0"; 1441 Pattern pat = Pattern.compile(patString); 1442 Matcher mat = pat.matcher(testString); 1443 assertTrue(mat.matches()); 1444 1445 testString = "\uDE81"; 1446 mat = pat.matcher(testString); 1447 assertTrue(mat.matches()); 1448 1449 testString = "\uD9A0\uDE81"; 1450 mat = pat.matcher(testString); 1451 assertTrue(mat.matches()); 1452 1453 testString = "\uDE81\uD9A0"; 1454 mat = pat.matcher(testString); 1455 assertFalse(mat.matches()); 1456 } 1457 1458 public void testGroupsWithSurrogatesSupplementary() { 1459 1460 //this pattern matches nothing 1461 String patString = "(\uD9A0)\uDE81"; 1462 String testString = "\uD9A0\uDE81"; 1463 Pattern pat = Pattern.compile(patString); 1464 Matcher mat = pat.matcher(testString); 1465 assertFalse(mat.matches()); 1466 1467 patString = "(\uD9A0)"; 1468 testString = "\uD9A0\uDE81"; 1469 pat = Pattern.compile(patString, Pattern.DOTALL); 1470 mat = pat.matcher(testString); 1471 assertFalse(mat.find()); 1472 } 1473 1474 /* 1475 * Regression test for HARMONY-688 1476 */ 1477 public void testUnicodeCategoryWithSurrogatesSupplementary() { 1478 Pattern p = Pattern.compile("\\p{javaLowerCase}"); 1479 Matcher matcher = p.matcher("\uD801\uDC28"); 1480 assertTrue(matcher.find()); 1481 } 1482 1483 public void testSplitEmpty() { 1484 1485 Pattern pat = Pattern.compile(""); 1486 String[] s = pat.split("", -1); 1487 1488 assertEquals(1, s.length); 1489 assertEquals("", s[0]); 1490 } 1491 1492 public void testToString() { 1493 for (int i = 0; i < testPatterns.length; i++) { 1494 Pattern p = Pattern.compile(testPatterns[i]); 1495 assertEquals(testPatterns[i], p.toString()); 1496 } 1497 } 1498 1499 // http://code.google.com/p/android/issues/detail?id=19308 1500 public void test_hitEnd() { 1501 Pattern p = Pattern.compile("^2(2[4-9]|3\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$"); 1502 Matcher m = p.matcher("224.."); 1503 boolean isPartialMatch = !m.matches() && m.hitEnd(); 1504 assertFalse(isPartialMatch); 1505 } 1506 1507 public void testCommentsInPattern() { 1508 Pattern p = Pattern.compile("ab# this is a comment\ncd", Pattern.COMMENTS); 1509 assertTrue(p.matcher("abcd").matches()); 1510 } 1511 1512 public void testCompileNonCaptGroup() { 1513 // icu4c doesn't support CANON_EQ. 1514 Pattern.compile("(?:)"/*, Pattern.CANON_EQ*/); 1515 Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.DOTALL); 1516 Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.CASE_INSENSITIVE); 1517 Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.COMMENTS | Pattern.UNIX_LINES); 1518 } 1519 1520 public void testFlagsMethod() { 1521 // icu4c doesn't count inline flags that span the entire regex as being global flags. 1522 // Android just returns those flags actually passed to Pattern.compile. 1523 if (true) { 1524 return; 1525 } 1526 1527 String baseString; 1528 Pattern pat; 1529 1530 // These tests are for compatibility with RI only. Logically we have to 1531 // return only flags specified during the compilation. For example 1532 // pat.flags() == 0 when we compile Pattern pat = 1533 // Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled 1534 // in a case insensitive manner. So there is little sense to do calls to 1535 // flags() now. 1536 baseString = "(?-i)"; 1537 pat = Pattern.compile(baseString); 1538 1539 baseString = "(?idmsux)abc(?-i)vg(?-dmu)"; 1540 pat = Pattern.compile(baseString); 1541 assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS); 1542 1543 baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)"; 1544 pat = Pattern.compile(baseString); 1545 assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS); 1546 1547 baseString = "(?is)a((?x)b.)"; 1548 pat = Pattern.compile(baseString); 1549 assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE); 1550 1551 baseString = "(?i)a((?-i))"; 1552 pat = Pattern.compile(baseString); 1553 assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE); 1554 1555 baseString = "((?i)a)"; 1556 pat = Pattern.compile(baseString); 1557 assertEquals(pat.flags(), 0); 1558 1559 pat = Pattern.compile("(?is)abc"); 1560 assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL); 1561 } 1562 1563 public void testCanonEqFlag() { 1564 // icu4c doesn't support CANON_EQ. 1565 if (true) { 1566 return; 1567 } 1568 1569 // for decompositions see 1570 // http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt 1571 // http://www.unicode.org/reports/tr15/#Decomposition 1572 String baseString; 1573 String testString; 1574 Pattern pat; 1575 Matcher mat; 1576 1577 baseString = "ab(a*)\\1"; 1578 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1579 1580 baseString = "a(abcdf)d"; 1581 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1582 1583 baseString = "aabcdfd"; 1584 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1585 1586 // \u01E0 -> \u0226\u0304 ->\u0041\u0307\u0304 1587 // \u00CC -> \u0049\u0300 1588 1589 baseString = "\u01E0\u00CCcdb(ac)"; 1590 testString = "\u0226\u0304\u0049\u0300cdbac"; 1591 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1592 mat = pat.matcher(testString); 1593 assertTrue(mat.matches()); 1594 1595 baseString = "\u01E0cdb(a\u00CCc)"; 1596 testString = "\u0041\u0307\u0304cdba\u0049\u0300c"; 1597 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1598 mat = pat.matcher(testString); 1599 assertTrue(mat.matches()); 1600 1601 baseString = "a\u00CC"; 1602 testString = "a\u0049\u0300"; 1603 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1604 mat = pat.matcher(testString); 1605 assertTrue(mat.matches()); 1606 1607 baseString = "\u0226\u0304cdb(ac\u0049\u0300)"; 1608 testString = "\u01E0cdbac\u00CC"; 1609 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1610 mat = pat.matcher(testString); 1611 assertTrue(mat.matches()); 1612 1613 baseString = "cdb(?:\u0041\u0307\u0304\u00CC)"; 1614 testString = "cdb\u0226\u0304\u0049\u0300"; 1615 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1616 mat = pat.matcher(testString); 1617 assertTrue(mat.matches()); 1618 1619 baseString = "\u01E0[a-c]\u0049\u0300cdb(ac)"; 1620 testString = "\u01E0b\u00CCcdbac"; 1621 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1622 mat = pat.matcher(testString); 1623 assertTrue(mat.matches()); 1624 1625 baseString = "\u01E0|\u00CCcdb(ac)"; 1626 testString = "\u0041\u0307\u0304"; 1627 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1628 mat = pat.matcher(testString); 1629 assertTrue(mat.matches()); 1630 1631 baseString = "\u00CC?cdb(ac)*(\u01E0)*[a-c]"; 1632 testString = "cdb\u0041\u0307\u0304b"; 1633 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1634 mat = pat.matcher(testString); 1635 assertTrue(mat.matches()); 1636 1637 baseString = "a\u0300"; 1638 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1639 mat = pat.matcher("a\u00E0a"); 1640 assertTrue(mat.find()); 1641 1642 baseString = "\u7B20\uF9F8abc"; 1643 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1644 mat = pat.matcher("\uF9F8\uF9F8abc"); 1645 assertTrue(mat.matches()); 1646 1647 // \u01F9 -> \u006E\u0300 1648 // \u00C3 -> \u0041\u0303 1649 1650 baseString = "cdb(?:\u00C3\u006E\u0300)"; 1651 testString = "cdb\u0041\u0303\u01F9"; 1652 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1653 mat = pat.matcher(testString); 1654 assertTrue(mat.matches()); 1655 1656 // \u014C -> \u004F\u0304 1657 // \u0163 -> \u0074\u0327 1658 1659 baseString = "cdb(?:\u0163\u004F\u0304)"; 1660 testString = "cdb\u0074\u0327\u014C"; 1661 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1662 mat = pat.matcher(testString); 1663 assertTrue(mat.matches()); 1664 1665 // \u00E1->a\u0301 1666 // canonical ordering takes place \u0301\u0327 -> \u0327\u0301 1667 1668 baseString = "c\u0327\u0301"; 1669 testString = "c\u0301\u0327"; 1670 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1671 mat = pat.matcher(testString); 1672 assertTrue(mat.matches()); 1673 1674 /* 1675 Hangul decompositions 1676 */ 1677 // \uD4DB->\u1111\u1171\u11B6 1678 // \uD21E->\u1110\u116D\u11B5 1679 // \uD264->\u1110\u1170 1680 // not Hangul:\u0453->\u0433\u0301 1681 baseString = "a\uD4DB\u1111\u1171\u11B6\uD264"; 1682 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1683 1684 baseString = "\u0453c\uD4DB"; 1685 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1686 1687 baseString = "a\u1110\u116D\u11B5b\uD21Ebc"; 1688 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1689 1690 baseString = "\uD4DB\uD21E\u1110\u1170cdb(ac)"; 1691 testString = "\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac"; 1692 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1693 mat = pat.matcher(testString); 1694 assertTrue(mat.matches()); 1695 1696 baseString = "\uD4DB\uD264cdb(a\uD21Ec)"; 1697 testString = "\u1111\u1171\u11B6\u1110\u1170cdba\u1110\u116D\u11B5c"; 1698 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1699 mat = pat.matcher(testString); 1700 assertTrue(mat.matches()); 1701 1702 baseString = "a\uD4DB"; 1703 testString = "a\u1111\u1171\u11B6"; 1704 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1705 mat = pat.matcher(testString); 1706 assertTrue(mat.matches()); 1707 1708 baseString = "a\uD21E"; 1709 testString = "a\u1110\u116D\u11B5"; 1710 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1711 mat = pat.matcher(testString); 1712 assertTrue(mat.matches()); 1713 1714 baseString = "\u1111\u1171\u11B6cdb(ac\u1110\u116D\u11B5)"; 1715 testString = "\uD4DBcdbac\uD21E"; 1716 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1717 mat = pat.matcher(testString); 1718 assertTrue(mat.matches()); 1719 1720 baseString = "cdb(?:\u1111\u1171\u11B6\uD21E)"; 1721 testString = "cdb\uD4DB\u1110\u116D\u11B5"; 1722 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1723 mat = pat.matcher(testString); 1724 assertTrue(mat.matches()); 1725 1726 baseString = "\uD4DB[a-c]\u1110\u116D\u11B5cdb(ac)"; 1727 testString = "\uD4DBb\uD21Ecdbac"; 1728 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1729 mat = pat.matcher(testString); 1730 assertTrue(mat.matches()); 1731 1732 baseString = "\uD4DB|\u00CCcdb(ac)"; 1733 testString = "\u1111\u1171\u11B6"; 1734 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1735 mat = pat.matcher(testString); 1736 assertTrue(mat.matches()); 1737 1738 baseString = "\uD4DB|\u00CCcdb(ac)"; 1739 testString = "\u1111\u1171"; 1740 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1741 mat = pat.matcher(testString); 1742 assertFalse(mat.matches()); 1743 1744 baseString = "\u00CC?cdb(ac)*(\uD4DB)*[a-c]"; 1745 testString = "cdb\u1111\u1171\u11B6b"; 1746 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1747 mat = pat.matcher(testString); 1748 assertTrue(mat.matches()); 1749 1750 baseString = "\uD4DB"; 1751 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1752 mat = pat.matcher("a\u1111\u1171\u11B6a"); 1753 assertTrue(mat.find()); 1754 1755 baseString = "\u1111"; 1756 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1757 mat = pat.matcher("bcda\uD4DBr"); 1758 assertFalse(mat.find()); 1759 } 1760 1761 public void testIndexesCanonicalEq() { 1762 // icu4c doesn't support CANON_EQ. 1763 if (true) { 1764 return; 1765 } 1766 1767 String baseString; 1768 String testString; 1769 Pattern pat; 1770 Matcher mat; 1771 1772 baseString = "\uD4DB"; 1773 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1774 mat = pat.matcher("bcda\u1111\u1171\u11B6awr"); 1775 assertTrue(mat.find()); 1776 assertEquals(mat.start(), 4); 1777 assertEquals(mat.end(), 7); 1778 1779 baseString = "\uD4DB\u1111\u1171\u11B6"; 1780 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1781 mat = pat.matcher("bcda\u1111\u1171\u11B6\uD4DBawr"); 1782 assertTrue(mat.find()); 1783 assertEquals(mat.start(), 4); 1784 assertEquals(mat.end(), 8); 1785 1786 baseString = "\uD4DB\uD21E\u1110\u1170"; 1787 testString = "abcabc\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac"; 1788 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1789 mat = pat.matcher(testString); 1790 assertTrue(mat.find()); 1791 assertEquals(mat.start(), 6); 1792 assertEquals(mat.end(), 13); 1793 } 1794 1795 public void testCanonEqFlagWithSupplementaryCharacters() { 1796 // icu4c doesn't support CANON_EQ. 1797 if (true) { 1798 return; 1799 } 1800 1801 /* 1802 \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32 1803 \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F 1804 ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16 1805 */ 1806 String patString = "abc\uD834\uDDBFef"; 1807 String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; 1808 Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ); 1809 Matcher mat = pat.matcher(testString); 1810 assertTrue(mat.matches()); 1811 1812 testString = "abc\uD834\uDDBB\uD834\uDD6Fef"; 1813 mat = pat.matcher(testString); 1814 assertTrue(mat.matches()); 1815 1816 patString = "abc\uD834\uDDBB\uD834\uDD6Fef"; 1817 testString = "abc\uD834\uDDBFef"; 1818 pat = Pattern.compile(patString, Pattern.CANON_EQ); 1819 mat = pat.matcher(testString); 1820 assertTrue(mat.matches()); 1821 1822 testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; 1823 mat = pat.matcher(testString); 1824 assertTrue(mat.matches()); 1825 1826 patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; 1827 testString = "abc\uD834\uDDBFef"; 1828 pat = Pattern.compile(patString, Pattern.CANON_EQ); 1829 mat = pat.matcher(testString); 1830 assertTrue(mat.matches()); 1831 1832 testString = "abc\uD834\uDDBB\uD834\uDD6Fef"; 1833 mat = pat.matcher(testString); 1834 assertTrue(mat.matches()); 1835 1836 // Test supplementary characters with no decomposition 1837 patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef"; 1838 testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef"; 1839 pat = Pattern.compile(patString, Pattern.CANON_EQ); 1840 mat = pat.matcher(testString); 1841 assertTrue(mat.matches()); 1842 } 1843 1844 public void testAsPredicate() { 1845 String[][] posSeq = { 1846 { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" }, 1847 { "213567", "12324567", "1234567", "213213567", 1848 "21312312312567", "444444567" }, 1849 { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" }, 1850 { "213234567", "3458", "0987654", "7689546432", "0398576", 1851 "98432", "5" }, 1852 { 1853 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", 1854 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 1855 + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" }, 1856 { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa", 1857 "abbbAbbbliceaaa", "Alice" }, 1858 { "a123", "bnxnvgds156", "for", "while", "if", "struct" }, 1859 { "xy" }, { "xy" }, { "xcy" } 1860 }; 1861 1862 for (int i = 0; i < testPatterns.length; i++) { 1863 Pattern p = Pattern.compile(testPatterns[i]); 1864 for (int j = 0; j < posSeq[i].length; j++) { 1865 assertTrue(p.asPredicate().test(posSeq[i][j])); 1866 } 1867 } 1868 } 1869 1870 public void testSplitAsStream() { 1871 String s[]; 1872 Pattern pat = Pattern.compile("b"); 1873 s = pat.splitAsStream("abccbadfebb").toArray(String[]::new); 1874 assertEquals(s.length, 3); 1875 s = pat.splitAsStream("").toArray(String[]::new); 1876 assertEquals(s.length, 0); 1877 pat = Pattern.compile(""); 1878 s = pat.splitAsStream("").toArray(String[]::new); 1879 assertEquals(s.length, 0); 1880 s = pat.splitAsStream("abccbadfe").toArray(String[]::new); 1881 assertEquals(s.length, 9); 1882 } 1883 } 1884