1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package java.util.regex; 18 19 /** 20 * The result of applying a {@code Pattern} to a given input. See {@link Pattern} for 21 * example uses. 22 */ 23 public final class Matcher implements MatchResult { 24 25 /** 26 * Holds the pattern, that is, the compiled regular expression. 27 */ 28 private Pattern pattern; 29 30 /** 31 * Holds the handle for the native version of the pattern. 32 */ 33 private int address; 34 35 /** 36 * Holds the input text. 37 */ 38 private String input; 39 40 /** 41 * Holds the start of the region, or 0 if the matching should start at the 42 * beginning of the text. 43 */ 44 private int regionStart; 45 46 /** 47 * Holds the end of the region, or input.length() if the matching should 48 * go until the end of the input. 49 */ 50 private int regionEnd; 51 52 /** 53 * Holds the position where the next find operation will take place. 54 */ 55 private int findPos; 56 57 /** 58 * Holds the position where the next append operation will take place. 59 */ 60 private int appendPos; 61 62 /** 63 * Reflects whether a match has been found during the most recent find 64 * operation. 65 */ 66 private boolean matchFound; 67 68 /** 69 * Holds the offsets for the most recent match. 70 */ 71 private int[] matchOffsets; 72 73 /** 74 * Reflects whether the bounds of the region are anchoring. 75 */ 76 private boolean anchoringBounds = true; 77 78 /** 79 * Reflects whether the bounds of the region are transparent. 80 */ 81 private boolean transparentBounds; 82 83 /** 84 * Creates a matcher for a given combination of pattern and input. Both 85 * elements can be changed later on. 86 * 87 * @param pattern 88 * the pattern to use. 89 * @param input 90 * the input to use. 91 */ 92 Matcher(Pattern pattern, CharSequence input) { 93 usePattern(pattern); 94 reset(input); 95 } 96 97 /** 98 * Appends a literal part of the input plus a replacement for the current 99 * match to a given {@link StringBuffer}. The literal part is exactly the 100 * part of the input between the previous match and the current match. The 101 * method can be used in conjunction with {@link #find()} and 102 * {@link #appendTail(StringBuffer)} to walk through the input and replace 103 * all occurrences of the {@code Pattern} with something else. 104 * 105 * @param buffer 106 * the {@code StringBuffer} to append to. 107 * @param replacement 108 * the replacement text. 109 * @return the {@code Matcher} itself. 110 * @throws IllegalStateException 111 * if no successful match has been made. 112 */ 113 public Matcher appendReplacement(StringBuffer buffer, String replacement) { 114 buffer.append(input.substring(appendPos, start())); 115 appendEvaluated(buffer, replacement); 116 appendPos = end(); 117 118 return this; 119 } 120 121 /** 122 * Internal helper method to append a given string to a given string buffer. 123 * If the string contains any references to groups, these are replaced by 124 * the corresponding group's contents. 125 * 126 * @param buffer 127 * the string buffer. 128 * @param s 129 * the string to append. 130 */ 131 private void appendEvaluated(StringBuffer buffer, String s) { 132 boolean escape = false; 133 boolean dollar = false; 134 135 for (int i = 0; i < s.length(); i++) { 136 char c = s.charAt(i); 137 if (c == '\\' && !escape) { 138 escape = true; 139 } else if (c == '$' && !escape) { 140 dollar = true; 141 } else if (c >= '0' && c <= '9' && dollar) { 142 buffer.append(group(c - '0')); 143 dollar = false; 144 } else { 145 buffer.append(c); 146 dollar = false; 147 escape = false; 148 } 149 } 150 151 // This seemingly stupid piece of code reproduces a JDK bug. 152 if (escape) { 153 throw new ArrayIndexOutOfBoundsException(s.length()); 154 } 155 } 156 157 /** 158 * Resets the {@code Matcher}. This results in the region being set to the 159 * whole input. Results of a previous find get lost. The next attempt to 160 * find an occurrence of the {@link Pattern} in the string will start at the 161 * beginning of the input. 162 * 163 * @return the {@code Matcher} itself. 164 */ 165 public Matcher reset() { 166 return reset(input, 0, input.length()); 167 } 168 169 /** 170 * Provides a new input and resets the {@code Matcher}. This results in the 171 * region being set to the whole input. Results of a previous find get lost. 172 * The next attempt to find an occurrence of the {@link Pattern} in the 173 * string will start at the beginning of the input. 174 * 175 * @param input 176 * the new input sequence. 177 * 178 * @return the {@code Matcher} itself. 179 */ 180 public Matcher reset(CharSequence input) { 181 return reset(input, 0, input.length()); 182 } 183 184 /** 185 * Resets the Matcher. A new input sequence and a new region can be 186 * specified. Results of a previous find get lost. The next attempt to find 187 * an occurrence of the Pattern in the string will start at the beginning of 188 * the region. This is the internal version of reset() to which the several 189 * public versions delegate. 190 * 191 * @param input 192 * the input sequence. 193 * @param start 194 * the start of the region. 195 * @param end 196 * the end of the region. 197 * 198 * @return the matcher itself. 199 */ 200 private Matcher reset(CharSequence input, int start, int end) { 201 if (input == null) { 202 throw new IllegalArgumentException(); 203 } 204 205 if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) { 206 throw new IndexOutOfBoundsException(); 207 } 208 209 this.input = input.toString(); 210 this.regionStart = start; 211 this.regionEnd = end; 212 resetForInput(); 213 214 matchFound = false; 215 findPos = regionStart; 216 appendPos = 0; 217 218 return this; 219 } 220 221 /** 222 * Sets a new pattern for the {@code Matcher}. Results of a previous find 223 * get lost. The next attempt to find an occurrence of the {@link Pattern} 224 * in the string will start at the beginning of the input. 225 * 226 * @param pattern 227 * the new {@code Pattern}. 228 * 229 * @return the {@code Matcher} itself. 230 */ 231 public Matcher usePattern(Pattern pattern) { 232 if (pattern == null) { 233 throw new IllegalArgumentException(); 234 } 235 236 this.pattern = pattern; 237 238 if (address != 0) { 239 closeImpl(address); 240 address = 0; 241 } 242 address = openImpl(pattern.address); 243 244 if (input != null) { 245 resetForInput(); 246 } 247 248 matchOffsets = new int[(groupCount() + 1) * 2]; 249 matchFound = false; 250 return this; 251 } 252 253 private void resetForInput() { 254 setInputImpl(address, input, regionStart, regionEnd); 255 useAnchoringBoundsImpl(address, anchoringBounds); 256 useTransparentBoundsImpl(address, transparentBounds); 257 } 258 259 /** 260 * Resets this matcher and sets a region. Only characters inside the region 261 * are considered for a match. 262 * 263 * @param start 264 * the first character of the region. 265 * @param end 266 * the first character after the end of the region. 267 * @return the {@code Matcher} itself. 268 */ 269 public Matcher region(int start, int end) { 270 return reset(input, start, end); 271 } 272 273 /** 274 * Appends the (unmatched) remainder of the input to the given 275 * {@link StringBuffer}. The method can be used in conjunction with 276 * {@link #find()} and {@link #appendReplacement(StringBuffer, String)} to 277 * walk through the input and replace all matches of the {@code Pattern} 278 * with something else. 279 * 280 * @param buffer 281 * the {@code StringBuffer} to append to. 282 * @return the {@code StringBuffer}. 283 * @throws IllegalStateException 284 * if no successful match has been made. 285 */ 286 public StringBuffer appendTail(StringBuffer buffer) { 287 if (appendPos < regionEnd) { 288 buffer.append(input.substring(appendPos, regionEnd)); 289 } 290 return buffer; 291 } 292 293 /** 294 * Replaces the first occurrence of this matcher's pattern in the input with 295 * a given string. 296 * 297 * @param replacement 298 * the replacement text. 299 * @return the modified input string. 300 */ 301 public String replaceFirst(String replacement) { 302 reset(); 303 StringBuffer buffer = new StringBuffer(input.length()); 304 if (find()) { 305 appendReplacement(buffer, replacement); 306 } 307 return appendTail(buffer).toString(); 308 } 309 310 /** 311 * Replaces all occurrences of this matcher's pattern in the input with a 312 * given string. 313 * 314 * @param replacement 315 * the replacement text. 316 * @return the modified input string. 317 */ 318 public String replaceAll(String replacement) { 319 reset(); 320 StringBuffer buffer = new StringBuffer(input.length()); 321 while (find()) { 322 appendReplacement(buffer, replacement); 323 } 324 return appendTail(buffer).toString(); 325 } 326 327 /** 328 * Returns the {@link Pattern} instance used inside this matcher. 329 * 330 * @return the {@code Pattern} instance. 331 */ 332 public Pattern pattern() { 333 return pattern; 334 } 335 336 /** 337 * Returns the text that matched a given group of the regular expression. 338 * Explicit capturing groups in the pattern are numbered left to right in order 339 * of their <i>opening</i> parenthesis, starting at 1. 340 * The special group 0 represents the entire match (as if the entire pattern is surrounded 341 * by an implicit capturing group). 342 * For example, "a((b)c)" matching "abc" would give the following groups: 343 * <pre> 344 * 0 "abc" 345 * 1 "bc" 346 * 2 "b" 347 * </pre> 348 * 349 * <p>An optional capturing group that failed to match as part of an overall 350 * successful match (for example, "a(b)?c" matching "ac") returns null. 351 * A capturing group that matched the empty string (for example, "a(b?)c" matching "ac") 352 * returns the empty string. 353 * 354 * @throws IllegalStateException 355 * if no successful match has been made. 356 */ 357 public String group(int group) { 358 ensureMatch(); 359 int from = matchOffsets[group * 2]; 360 int to = matchOffsets[(group * 2) + 1]; 361 if (from == -1 || to == -1) { 362 return null; 363 } else { 364 return input.substring(from, to); 365 } 366 } 367 368 /** 369 * Returns the text that matched the whole regular expression. 370 * 371 * @return the text. 372 * @throws IllegalStateException 373 * if no successful match has been made. 374 */ 375 public String group() { 376 return group(0); 377 } 378 379 /** 380 * Returns the next occurrence of the {@link Pattern} in the input. The 381 * method starts the search from the given character in the input. 382 * 383 * @param start 384 * The index in the input at which the find operation is to 385 * begin. If this is less than the start of the region, it is 386 * automatically adjusted to that value. If it is beyond the end 387 * of the region, the method will fail. 388 * @return true if (and only if) a match has been found. 389 */ 390 public boolean find(int start) { 391 findPos = start; 392 393 if (findPos < regionStart) { 394 findPos = regionStart; 395 } else if (findPos >= regionEnd) { 396 matchFound = false; 397 return false; 398 } 399 400 matchFound = findImpl(address, input, findPos, matchOffsets); 401 if (matchFound) { 402 findPos = matchOffsets[1]; 403 } 404 return matchFound; 405 } 406 407 /** 408 * Returns the next occurrence of the {@link Pattern} in the input. If a 409 * previous match was successful, the method continues the search from the 410 * first character following that match in the input. Otherwise it searches 411 * either from the region start (if one has been set), or from position 0. 412 * 413 * @return true if (and only if) a match has been found. 414 */ 415 public boolean find() { 416 matchFound = findNextImpl(address, input, matchOffsets); 417 if (matchFound) { 418 findPos = matchOffsets[1]; 419 } 420 return matchFound; 421 } 422 423 /** 424 * Tries to match the {@link Pattern}, starting from the beginning of the 425 * region (or the beginning of the input, if no region has been set). 426 * Doesn't require the {@code Pattern} to match against the whole region. 427 * 428 * @return true if (and only if) the {@code Pattern} matches. 429 */ 430 public boolean lookingAt() { 431 matchFound = lookingAtImpl(address, input, matchOffsets); 432 if (matchFound) { 433 findPos = matchOffsets[1]; 434 } 435 return matchFound; 436 } 437 438 /** 439 * Tries to match the {@link Pattern} against the entire region (or the 440 * entire input, if no region has been set). 441 * 442 * @return true if (and only if) the {@code Pattern} matches the entire 443 * region. 444 */ 445 public boolean matches() { 446 matchFound = matchesImpl(address, input, matchOffsets); 447 if (matchFound) { 448 findPos = matchOffsets[1]; 449 } 450 return matchFound; 451 } 452 453 /** 454 * Returns the index of the first character of the text that matched a given 455 * group. 456 * 457 * @param group 458 * the group, ranging from 0 to groupCount() - 1, with 0 459 * representing the whole pattern. 460 * @return the character index. 461 * @throws IllegalStateException 462 * if no successful match has been made. 463 */ 464 public int start(int group) throws IllegalStateException { 465 ensureMatch(); 466 return matchOffsets[group * 2]; 467 } 468 469 /** 470 * Returns the index of the first character following the text that matched 471 * a given group. 472 * 473 * @param group 474 * the group, ranging from 0 to groupCount() - 1, with 0 475 * representing the whole pattern. 476 * @return the character index. 477 * @throws IllegalStateException 478 * if no successful match has been made. 479 */ 480 public int end(int group) { 481 ensureMatch(); 482 return matchOffsets[(group * 2) + 1]; 483 } 484 485 /** 486 * Returns a replacement string for the given one that has all backslashes 487 * and dollar signs escaped. 488 * 489 * @param s 490 * the input string. 491 * @return the input string, with all backslashes and dollar signs having 492 * been escaped. 493 */ 494 public static String quoteReplacement(String s) { 495 StringBuilder result = new StringBuilder(s.length()); 496 for (int i = 0; i < s.length(); i++) { 497 char c = s.charAt(i); 498 if (c == '\\' || c == '$') { 499 result.append('\\'); 500 } 501 result.append(c); 502 } 503 return result.toString(); 504 } 505 506 /** 507 * Returns the index of the first character of the text that matched the 508 * whole regular expression. 509 * 510 * @return the character index. 511 * @throws IllegalStateException 512 * if no successful match has been made. 513 */ 514 public int start() { 515 return start(0); 516 } 517 518 /** 519 * Returns the number of groups in the results, which is always equal to 520 * the number of groups in the original regular expression. 521 * 522 * @return the number of groups. 523 */ 524 public int groupCount() { 525 return groupCountImpl(address); 526 } 527 528 /** 529 * Returns the index of the first character following the text that matched 530 * the whole regular expression. 531 * 532 * @return the character index. 533 * @throws IllegalStateException 534 * if no successful match has been made. 535 */ 536 public int end() { 537 return end(0); 538 } 539 540 /** 541 * Converts the current match into a separate {@link MatchResult} instance 542 * that is independent from this matcher. The new object is unaffected when 543 * the state of this matcher changes. 544 * 545 * @return the new {@code MatchResult}. 546 * @throws IllegalStateException 547 * if no successful match has been made. 548 */ 549 public MatchResult toMatchResult() { 550 ensureMatch(); 551 return new MatchResultImpl(input, matchOffsets); 552 } 553 554 /** 555 * Determines whether this matcher has anchoring bounds enabled or not. When 556 * anchoring bounds are enabled, the start and end of the input match the 557 * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled 558 * by default. 559 * 560 * @param value 561 * the new value for anchoring bounds. 562 * @return the {@code Matcher} itself. 563 */ 564 public Matcher useAnchoringBounds(boolean value) { 565 anchoringBounds = value; 566 useAnchoringBoundsImpl(address, value); 567 return this; 568 } 569 570 /** 571 * Indicates whether this matcher has anchoring bounds enabled. When 572 * anchoring bounds are enabled, the start and end of the input match the 573 * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled 574 * by default. 575 * 576 * @return true if (and only if) the {@code Matcher} uses anchoring bounds. 577 */ 578 public boolean hasAnchoringBounds() { 579 return anchoringBounds; 580 } 581 582 /** 583 * Determines whether this matcher has transparent bounds enabled or not. 584 * When transparent bounds are enabled, the parts of the input outside the 585 * region are subject to lookahead and lookbehind, otherwise they are not. 586 * Transparent bounds are disabled by default. 587 * 588 * @param value 589 * the new value for transparent bounds. 590 * @return the {@code Matcher} itself. 591 */ 592 public Matcher useTransparentBounds(boolean value) { 593 transparentBounds = value; 594 useTransparentBoundsImpl(address, value); 595 return this; 596 } 597 598 /** 599 * Makes sure that a successful match has been made. Is invoked internally 600 * from various places in the class. 601 * 602 * @throws IllegalStateException 603 * if no successful match has been made. 604 */ 605 private void ensureMatch() { 606 if (!matchFound) { 607 throw new IllegalStateException("No successful match so far"); 608 } 609 } 610 611 /** 612 * Indicates whether this matcher has transparent bounds enabled. When 613 * transparent bounds are enabled, the parts of the input outside the region 614 * are subject to lookahead and lookbehind, otherwise they are not. 615 * Transparent bounds are disabled by default. 616 * 617 * @return true if (and only if) the {@code Matcher} uses anchoring bounds. 618 */ 619 public boolean hasTransparentBounds() { 620 return transparentBounds; 621 } 622 623 /** 624 * Returns this matcher's region start, that is, the first character that is 625 * considered for a match. 626 * 627 * @return the start of the region. 628 */ 629 public int regionStart() { 630 return regionStart; 631 } 632 633 /** 634 * Returns this matcher's region end, that is, the first character that is 635 * not considered for a match. 636 * 637 * @return the end of the region. 638 */ 639 public int regionEnd() { 640 return regionEnd; 641 } 642 643 /** 644 * Indicates whether more input might change a successful match into an 645 * unsuccessful one. 646 * 647 * @return true if (and only if) more input might change a successful match 648 * into an unsuccessful one. 649 */ 650 public boolean requireEnd() { 651 return requireEndImpl(address); 652 } 653 654 /** 655 * Indicates whether the last match hit the end of the input. 656 * 657 * @return true if (and only if) the last match hit the end of the input. 658 */ 659 public boolean hitEnd() { 660 return hitEndImpl(address); 661 } 662 663 @Override protected void finalize() throws Throwable { 664 try { 665 closeImpl(address); 666 } finally { 667 super.finalize(); 668 } 669 } 670 671 private static native void closeImpl(int addr); 672 private static native boolean findImpl(int addr, String s, int startIndex, int[] offsets); 673 private static native boolean findNextImpl(int addr, String s, int[] offsets); 674 private static native int groupCountImpl(int addr); 675 private static native boolean hitEndImpl(int addr); 676 private static native boolean lookingAtImpl(int addr, String s, int[] offsets); 677 private static native boolean matchesImpl(int addr, String s, int[] offsets); 678 private static native int openImpl(int patternAddr); 679 private static native boolean requireEndImpl(int addr); 680 private static native void setInputImpl(int addr, String s, int start, int end); 681 private static native void useAnchoringBoundsImpl(int addr, boolean value); 682 private static native void useTransparentBoundsImpl(int addr, boolean value); 683 } 684