1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package java.util.regex; 18 19 /** 20 * The result of applying a {@code Pattern} to a given input. See {@link Pattern} for 21 * example uses. 22 */ 23 public final class Matcher implements MatchResult { 24 25 /** 26 * Holds the pattern, that is, the compiled regular expression. 27 */ 28 private Pattern pattern; 29 30 /** 31 * The address of the native peer. 32 * Uses of this must be manually synchronized to avoid native crashes. 33 */ 34 private long address; 35 36 /** 37 * Holds the input text. 38 */ 39 private String input; 40 41 /** 42 * Holds the start of the region, or 0 if the matching should start at the 43 * beginning of the text. 44 */ 45 private int regionStart; 46 47 /** 48 * Holds the end of the region, or input.length() if the matching should 49 * go until the end of the input. 50 */ 51 private int regionEnd; 52 53 /** 54 * Holds the position where the next append operation will take place. 55 */ 56 private int appendPos; 57 58 /** 59 * Reflects whether a match has been found during the most recent find 60 * operation. 61 */ 62 private boolean matchFound; 63 64 /** 65 * Holds the offsets for the most recent match. 66 */ 67 private int[] matchOffsets; 68 69 /** 70 * Reflects whether the bounds of the region are anchoring. 71 */ 72 private boolean anchoringBounds = true; 73 74 /** 75 * Reflects whether the bounds of the region are transparent. 76 */ 77 private boolean transparentBounds; 78 79 /** 80 * Creates a matcher for a given combination of pattern and input. Both 81 * elements can be changed later on. 82 * 83 * @param pattern 84 * the pattern to use. 85 * @param input 86 * the input to use. 87 */ 88 Matcher(Pattern pattern, CharSequence input) { 89 usePattern(pattern); 90 reset(input); 91 } 92 93 /** 94 * Appends a literal part of the input plus a replacement for the current 95 * match to a given {@link StringBuffer}. The literal part is exactly the 96 * part of the input between the previous match and the current match. The 97 * method can be used in conjunction with {@link #find()} and 98 * {@link #appendTail(StringBuffer)} to walk through the input and replace 99 * all occurrences of the {@code Pattern} with something else. 100 * 101 * @param buffer 102 * the {@code StringBuffer} to append to. 103 * @param replacement 104 * the replacement text. 105 * @return the {@code Matcher} itself. 106 * @throws IllegalStateException 107 * if no successful match has been made. 108 */ 109 public Matcher appendReplacement(StringBuffer buffer, String replacement) { 110 buffer.append(input.substring(appendPos, start())); 111 appendEvaluated(buffer, replacement); 112 appendPos = end(); 113 114 return this; 115 } 116 117 /** 118 * Internal helper method to append a given string to a given string buffer. 119 * If the string contains any references to groups, these are replaced by 120 * the corresponding group's contents. 121 * 122 * @param buffer 123 * the string buffer. 124 * @param s 125 * the string to append. 126 */ 127 private void appendEvaluated(StringBuffer buffer, String s) { 128 boolean escape = false; 129 boolean dollar = false; 130 131 for (int i = 0; i < s.length(); i++) { 132 char c = s.charAt(i); 133 if (c == '\\' && !escape) { 134 escape = true; 135 } else if (c == '$' && !escape) { 136 dollar = true; 137 } else if (c >= '0' && c <= '9' && dollar) { 138 buffer.append(group(c - '0')); 139 dollar = false; 140 } else { 141 buffer.append(c); 142 dollar = false; 143 escape = false; 144 } 145 } 146 147 // This seemingly stupid piece of code reproduces a JDK bug. 148 if (escape) { 149 throw new ArrayIndexOutOfBoundsException(s.length()); 150 } 151 } 152 153 /** 154 * Resets the {@code Matcher}. This results in the region being set to the 155 * whole input. Results of a previous find get lost. The next attempt to 156 * find an occurrence of the {@link Pattern} in the string will start at the 157 * beginning of the input. 158 * 159 * @return the {@code Matcher} itself. 160 */ 161 public Matcher reset() { 162 return reset(input, 0, input.length()); 163 } 164 165 /** 166 * Provides a new input and resets the {@code Matcher}. This results in the 167 * region being set to the whole input. Results of a previous find get lost. 168 * The next attempt to find an occurrence of the {@link Pattern} in the 169 * string will start at the beginning of the input. 170 * 171 * @param input 172 * the new input sequence. 173 * 174 * @return the {@code Matcher} itself. 175 */ 176 public Matcher reset(CharSequence input) { 177 return reset(input, 0, input.length()); 178 } 179 180 /** 181 * Resets the Matcher. A new input sequence and a new region can be 182 * specified. Results of a previous find get lost. The next attempt to find 183 * an occurrence of the Pattern in the string will start at the beginning of 184 * the region. This is the internal version of reset() to which the several 185 * public versions delegate. 186 * 187 * @param input 188 * the input sequence. 189 * @param start 190 * the start of the region. 191 * @param end 192 * the end of the region. 193 * 194 * @return the matcher itself. 195 */ 196 private Matcher reset(CharSequence input, int start, int end) { 197 if (input == null) { 198 throw new IllegalArgumentException("input == null"); 199 } 200 201 if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) { 202 throw new IndexOutOfBoundsException(); 203 } 204 205 this.input = input.toString(); 206 this.regionStart = start; 207 this.regionEnd = end; 208 resetForInput(); 209 210 matchFound = false; 211 appendPos = 0; 212 213 return this; 214 } 215 216 /** 217 * Sets a new pattern for the {@code Matcher}. Results of a previous find 218 * get lost. The next attempt to find an occurrence of the {@link Pattern} 219 * in the string will start at the beginning of the input. 220 * 221 * @param pattern 222 * the new {@code Pattern}. 223 * 224 * @return the {@code Matcher} itself. 225 */ 226 public Matcher usePattern(Pattern pattern) { 227 if (pattern == null) { 228 throw new IllegalArgumentException("pattern == null"); 229 } 230 231 this.pattern = pattern; 232 233 synchronized (this) { 234 if (address != 0) { 235 closeImpl(address); 236 address = 0; // In case openImpl throws. 237 } 238 address = openImpl(pattern.address); 239 } 240 241 if (input != null) { 242 resetForInput(); 243 } 244 245 matchOffsets = new int[(groupCount() + 1) * 2]; 246 matchFound = false; 247 return this; 248 } 249 250 private void resetForInput() { 251 synchronized (this) { 252 setInputImpl(address, input, regionStart, regionEnd); 253 useAnchoringBoundsImpl(address, anchoringBounds); 254 useTransparentBoundsImpl(address, transparentBounds); 255 } 256 } 257 258 /** 259 * Resets this matcher and sets a region. Only characters inside the region 260 * are considered for a match. 261 * 262 * @param start 263 * the first character of the region. 264 * @param end 265 * the first character after the end of the region. 266 * @return the {@code Matcher} itself. 267 */ 268 public Matcher region(int start, int end) { 269 return reset(input, start, end); 270 } 271 272 /** 273 * Appends the (unmatched) remainder of the input to the given 274 * {@link StringBuffer}. The method can be used in conjunction with 275 * {@link #find()} and {@link #appendReplacement(StringBuffer, String)} to 276 * walk through the input and replace all matches of the {@code Pattern} 277 * with something else. 278 * 279 * @param buffer 280 * the {@code StringBuffer} to append to. 281 * @return the {@code StringBuffer}. 282 * @throws IllegalStateException 283 * if no successful match has been made. 284 */ 285 public StringBuffer appendTail(StringBuffer buffer) { 286 if (appendPos < regionEnd) { 287 buffer.append(input.substring(appendPos, regionEnd)); 288 } 289 return buffer; 290 } 291 292 /** 293 * Replaces the first occurrence of this matcher's pattern in the input with 294 * a given string. 295 * 296 * @param replacement 297 * the replacement text. 298 * @return the modified input string. 299 */ 300 public String replaceFirst(String replacement) { 301 reset(); 302 StringBuffer buffer = new StringBuffer(input.length()); 303 if (find()) { 304 appendReplacement(buffer, replacement); 305 } 306 return appendTail(buffer).toString(); 307 } 308 309 /** 310 * Replaces all occurrences of this matcher's pattern in the input with a 311 * given string. 312 * 313 * @param replacement 314 * the replacement text. 315 * @return the modified input string. 316 */ 317 public String replaceAll(String replacement) { 318 reset(); 319 StringBuffer buffer = new StringBuffer(input.length()); 320 while (find()) { 321 appendReplacement(buffer, replacement); 322 } 323 return appendTail(buffer).toString(); 324 } 325 326 /** 327 * Returns the {@link Pattern} instance used inside this matcher. 328 * 329 * @return the {@code Pattern} instance. 330 */ 331 public Pattern pattern() { 332 return pattern; 333 } 334 335 /** 336 * Returns the text that matched a given group of the regular expression. 337 * Explicit capturing groups in the pattern are numbered left to right in order 338 * of their <i>opening</i> parenthesis, starting at 1. 339 * The special group 0 represents the entire match (as if the entire pattern is surrounded 340 * by an implicit capturing group). 341 * For example, "a((b)c)" matching "abc" would give the following groups: 342 * <pre> 343 * 0 "abc" 344 * 1 "bc" 345 * 2 "b" 346 * </pre> 347 * 348 * <p>An optional capturing group that failed to match as part of an overall 349 * successful match (for example, "a(b)?c" matching "ac") returns null. 350 * A capturing group that matched the empty string (for example, "a(b?)c" matching "ac") 351 * returns the empty string. 352 * 353 * @throws IllegalStateException 354 * if no successful match has been made. 355 */ 356 public String group(int group) { 357 ensureMatch(); 358 int from = matchOffsets[group * 2]; 359 int to = matchOffsets[(group * 2) + 1]; 360 if (from == -1 || to == -1) { 361 return null; 362 } else { 363 return input.substring(from, to); 364 } 365 } 366 367 /** 368 * Returns the text that matched the whole regular expression. 369 * 370 * @return the text. 371 * @throws IllegalStateException 372 * if no successful match has been made. 373 */ 374 public String group() { 375 return group(0); 376 } 377 378 /** 379 * Returns true if there is another match in the input, starting 380 * from the given position. The region is ignored. 381 * 382 * @throws IndexOutOfBoundsException if {@code start < 0 || start > input.length()} 383 */ 384 public boolean find(int start) { 385 if (start < 0 || start > input.length()) { 386 throw new IndexOutOfBoundsException("start=" + start + "; length=" + input.length()); 387 } 388 389 synchronized (this) { 390 matchFound = findImpl(address, input, start, matchOffsets); 391 } 392 return matchFound; 393 } 394 395 /** 396 * Returns the next occurrence of the {@link Pattern} in the input. If a 397 * previous match was successful, the method continues the search from the 398 * first character following that match in the input. Otherwise it searches 399 * either from the region start (if one has been set), or from position 0. 400 * 401 * @return true if (and only if) a match has been found. 402 */ 403 public boolean find() { 404 synchronized (this) { 405 matchFound = findNextImpl(address, input, matchOffsets); 406 } 407 return matchFound; 408 } 409 410 /** 411 * Tries to match the {@link Pattern}, starting from the beginning of the 412 * region (or the beginning of the input, if no region has been set). 413 * Doesn't require the {@code Pattern} to match against the whole region. 414 * 415 * @return true if (and only if) the {@code Pattern} matches. 416 */ 417 public boolean lookingAt() { 418 synchronized (this) { 419 matchFound = lookingAtImpl(address, input, matchOffsets); 420 } 421 return matchFound; 422 } 423 424 /** 425 * Tries to match the {@link Pattern} against the entire region (or the 426 * entire input, if no region has been set). 427 * 428 * @return true if (and only if) the {@code Pattern} matches the entire 429 * region. 430 */ 431 public boolean matches() { 432 synchronized (this) { 433 matchFound = matchesImpl(address, input, matchOffsets); 434 } 435 return matchFound; 436 } 437 438 /** 439 * Returns the index of the first character of the text that matched a given 440 * group. 441 * 442 * @param group 443 * the group, ranging from 0 to groupCount() - 1, with 0 444 * representing the whole pattern. 445 * @return the character index. 446 * @throws IllegalStateException 447 * if no successful match has been made. 448 */ 449 public int start(int group) throws IllegalStateException { 450 ensureMatch(); 451 return matchOffsets[group * 2]; 452 } 453 454 /** 455 * Returns the index of the first character following the text that matched 456 * a given group. 457 * 458 * @param group 459 * the group, ranging from 0 to groupCount() - 1, with 0 460 * representing the whole pattern. 461 * @return the character index. 462 * @throws IllegalStateException 463 * if no successful match has been made. 464 */ 465 public int end(int group) { 466 ensureMatch(); 467 return matchOffsets[(group * 2) + 1]; 468 } 469 470 /** 471 * Returns a replacement string for the given one that has all backslashes 472 * and dollar signs escaped. 473 * 474 * @param s 475 * the input string. 476 * @return the input string, with all backslashes and dollar signs having 477 * been escaped. 478 */ 479 public static String quoteReplacement(String s) { 480 StringBuilder result = new StringBuilder(s.length()); 481 for (int i = 0; i < s.length(); i++) { 482 char c = s.charAt(i); 483 if (c == '\\' || c == '$') { 484 result.append('\\'); 485 } 486 result.append(c); 487 } 488 return result.toString(); 489 } 490 491 /** 492 * Returns the index of the first character of the text that matched the 493 * whole regular expression. 494 * 495 * @return the character index. 496 * @throws IllegalStateException 497 * if no successful match has been made. 498 */ 499 public int start() { 500 return start(0); 501 } 502 503 /** 504 * Returns the number of groups in the results, which is always equal to 505 * the number of groups in the original regular expression. 506 * 507 * @return the number of groups. 508 */ 509 public int groupCount() { 510 synchronized (this) { 511 return groupCountImpl(address); 512 } 513 } 514 515 /** 516 * Returns the index of the first character following the text that matched 517 * the whole regular expression. 518 * 519 * @return the character index. 520 * @throws IllegalStateException 521 * if no successful match has been made. 522 */ 523 public int end() { 524 return end(0); 525 } 526 527 /** 528 * Converts the current match into a separate {@link MatchResult} instance 529 * that is independent from this matcher. The new object is unaffected when 530 * the state of this matcher changes. 531 * 532 * @return the new {@code MatchResult}. 533 * @throws IllegalStateException 534 * if no successful match has been made. 535 */ 536 public MatchResult toMatchResult() { 537 ensureMatch(); 538 return new MatchResultImpl(input, matchOffsets); 539 } 540 541 /** 542 * Determines whether this matcher has anchoring bounds enabled or not. When 543 * anchoring bounds are enabled, the start and end of the input match the 544 * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled 545 * by default. 546 * 547 * @param value 548 * the new value for anchoring bounds. 549 * @return the {@code Matcher} itself. 550 */ 551 public Matcher useAnchoringBounds(boolean value) { 552 synchronized (this) { 553 anchoringBounds = value; 554 useAnchoringBoundsImpl(address, value); 555 } 556 return this; 557 } 558 559 /** 560 * Returns true if this matcher has anchoring bounds enabled. When 561 * anchoring bounds are enabled, the start and end of the input match the 562 * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled 563 * by default. 564 */ 565 public boolean hasAnchoringBounds() { 566 return anchoringBounds; 567 } 568 569 /** 570 * Determines whether this matcher has transparent bounds enabled or not. 571 * When transparent bounds are enabled, the parts of the input outside the 572 * region are subject to lookahead and lookbehind, otherwise they are not. 573 * Transparent bounds are disabled by default. 574 * 575 * @param value 576 * the new value for transparent bounds. 577 * @return the {@code Matcher} itself. 578 */ 579 public Matcher useTransparentBounds(boolean value) { 580 synchronized (this) { 581 transparentBounds = value; 582 useTransparentBoundsImpl(address, value); 583 } 584 return this; 585 } 586 587 /** 588 * Makes sure that a successful match has been made. Is invoked internally 589 * from various places in the class. 590 * 591 * @throws IllegalStateException 592 * if no successful match has been made. 593 */ 594 private void ensureMatch() { 595 if (!matchFound) { 596 throw new IllegalStateException("No successful match so far"); 597 } 598 } 599 600 /** 601 * Returns true if this matcher has transparent bounds enabled. When 602 * transparent bounds are enabled, the parts of the input outside the region 603 * are subject to lookahead and lookbehind, otherwise they are not. 604 * Transparent bounds are disabled by default. 605 */ 606 public boolean hasTransparentBounds() { 607 return transparentBounds; 608 } 609 610 /** 611 * Returns this matcher's region start, that is, the index of the first character that is 612 * considered for a match. 613 */ 614 public int regionStart() { 615 return regionStart; 616 } 617 618 /** 619 * Returns this matcher's region end, that is, the index of the first character that is 620 * not considered for a match. 621 */ 622 public int regionEnd() { 623 return regionEnd; 624 } 625 626 /** 627 * Returns true if the most recent match succeeded and additional input could cause 628 * it to fail. If this method returns false and a match was found, then more input 629 * might change the match but the match won't be lost. If a match was not found, 630 * then requireEnd has no meaning. 631 */ 632 public boolean requireEnd() { 633 synchronized (this) { 634 return requireEndImpl(address); 635 } 636 } 637 638 /** 639 * Returns true if the most recent matching operation attempted to access 640 * additional text beyond the available input, meaning that additional input 641 * could change the results of the match. 642 */ 643 public boolean hitEnd() { 644 synchronized (this) { 645 return hitEndImpl(address); 646 } 647 } 648 649 @Override protected void finalize() throws Throwable { 650 try { 651 synchronized (this) { 652 closeImpl(address); 653 } 654 } finally { 655 super.finalize(); 656 } 657 } 658 659 /** 660 * Returns a string representing this {@code Matcher}. 661 * The format of this string is unspecified. 662 */ 663 @Override public String toString() { 664 return getClass().getName() + "[pattern=" + pattern() + 665 " region=" + regionStart() + "," + regionEnd() + 666 " lastmatch=" + (matchFound ? group() : "") + "]"; 667 } 668 669 private static native void closeImpl(long addr); 670 private static native boolean findImpl(long addr, String s, int startIndex, int[] offsets); 671 private static native boolean findNextImpl(long addr, String s, int[] offsets); 672 private static native int groupCountImpl(long addr); 673 private static native boolean hitEndImpl(long addr); 674 private static native boolean lookingAtImpl(long addr, String s, int[] offsets); 675 private static native boolean matchesImpl(long addr, String s, int[] offsets); 676 private static native long openImpl(long patternAddr); 677 private static native boolean requireEndImpl(long addr); 678 private static native void setInputImpl(long addr, String s, int start, int end); 679 private static native void useAnchoringBoundsImpl(long addr, boolean value); 680 private static native void useTransparentBoundsImpl(long addr, boolean value); 681 } 682