Home | History | Annotate | Download | only in regex
      1 /*
      2  * Copyright (C) 2007 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package java.util.regex;
     18 
     19 /**
     20  * The result of applying a {@code Pattern} to a given input. See {@link Pattern} for
     21  * example uses.
     22  */
     23 public final class Matcher implements MatchResult {
     24 
     25     /**
     26      * Holds the pattern, that is, the compiled regular expression.
     27      */
     28     private Pattern pattern;
     29 
     30     /**
     31      * The address of the native peer.
     32      * Uses of this must be manually synchronized to avoid native crashes.
     33      */
     34     private long address;
     35 
     36     /**
     37      * Holds the input text.
     38      */
     39     private String input;
     40 
     41     /**
     42      * Holds the start of the region, or 0 if the matching should start at the
     43      * beginning of the text.
     44      */
     45     private int regionStart;
     46 
     47     /**
     48      * Holds the end of the region, or input.length() if the matching should
     49      * go until the end of the input.
     50      */
     51     private int regionEnd;
     52 
     53     /**
     54      * Holds the position where the next append operation will take place.
     55      */
     56     private int appendPos;
     57 
     58     /**
     59      * Reflects whether a match has been found during the most recent find
     60      * operation.
     61      */
     62     private boolean matchFound;
     63 
     64     /**
     65      * Holds the offsets for the most recent match.
     66      */
     67     private int[] matchOffsets;
     68 
     69     /**
     70      * Reflects whether the bounds of the region are anchoring.
     71      */
     72     private boolean anchoringBounds = true;
     73 
     74     /**
     75      * Reflects whether the bounds of the region are transparent.
     76      */
     77     private boolean transparentBounds;
     78 
     79     /**
     80      * Creates a matcher for a given combination of pattern and input. Both
     81      * elements can be changed later on.
     82      *
     83      * @param pattern
     84      *            the pattern to use.
     85      * @param input
     86      *            the input to use.
     87      */
     88     Matcher(Pattern pattern, CharSequence input) {
     89         usePattern(pattern);
     90         reset(input);
     91     }
     92 
     93     /**
     94      * Appends a literal part of the input plus a replacement for the current
     95      * match to a given {@link StringBuffer}. The literal part is exactly the
     96      * part of the input between the previous match and the current match. The
     97      * method can be used in conjunction with {@link #find()} and
     98      * {@link #appendTail(StringBuffer)} to walk through the input and replace
     99      * all occurrences of the {@code Pattern} with something else.
    100      *
    101      * @param buffer
    102      *            the {@code StringBuffer} to append to.
    103      * @param replacement
    104      *            the replacement text.
    105      * @return the {@code Matcher} itself.
    106      * @throws IllegalStateException
    107      *             if no successful match has been made.
    108      */
    109     public Matcher appendReplacement(StringBuffer buffer, String replacement) {
    110         buffer.append(input.substring(appendPos, start()));
    111         appendEvaluated(buffer, replacement);
    112         appendPos = end();
    113 
    114         return this;
    115     }
    116 
    117     /**
    118      * Internal helper method to append a given string to a given string buffer.
    119      * If the string contains any references to groups, these are replaced by
    120      * the corresponding group's contents.
    121      *
    122      * @param buffer
    123      *            the string buffer.
    124      * @param s
    125      *            the string to append.
    126      */
    127     private void appendEvaluated(StringBuffer buffer, String s) {
    128         boolean escape = false;
    129         boolean dollar = false;
    130 
    131         for (int i = 0; i < s.length(); i++) {
    132             char c = s.charAt(i);
    133             if (c == '\\' && !escape) {
    134                 escape = true;
    135             } else if (c == '$' && !escape) {
    136                 dollar = true;
    137             } else if (c >= '0' && c <= '9' && dollar) {
    138                 buffer.append(group(c - '0'));
    139                 dollar = false;
    140             } else {
    141                 buffer.append(c);
    142                 dollar = false;
    143                 escape = false;
    144             }
    145         }
    146 
    147         // This seemingly stupid piece of code reproduces a JDK bug.
    148         if (escape) {
    149             throw new ArrayIndexOutOfBoundsException(s.length());
    150         }
    151     }
    152 
    153     /**
    154      * Resets the {@code Matcher}. This results in the region being set to the
    155      * whole input. Results of a previous find get lost. The next attempt to
    156      * find an occurrence of the {@link Pattern} in the string will start at the
    157      * beginning of the input.
    158      *
    159      * @return the {@code Matcher} itself.
    160      */
    161     public Matcher reset() {
    162         return reset(input, 0, input.length());
    163     }
    164 
    165     /**
    166      * Provides a new input and resets the {@code Matcher}. This results in the
    167      * region being set to the whole input. Results of a previous find get lost.
    168      * The next attempt to find an occurrence of the {@link Pattern} in the
    169      * string will start at the beginning of the input.
    170      *
    171      * @param input
    172      *            the new input sequence.
    173      *
    174      * @return the {@code Matcher} itself.
    175      */
    176     public Matcher reset(CharSequence input) {
    177         return reset(input, 0, input.length());
    178     }
    179 
    180     /**
    181      * Resets the Matcher. A new input sequence and a new region can be
    182      * specified. Results of a previous find get lost. The next attempt to find
    183      * an occurrence of the Pattern in the string will start at the beginning of
    184      * the region. This is the internal version of reset() to which the several
    185      * public versions delegate.
    186      *
    187      * @param input
    188      *            the input sequence.
    189      * @param start
    190      *            the start of the region.
    191      * @param end
    192      *            the end of the region.
    193      *
    194      * @return the matcher itself.
    195      */
    196     private Matcher reset(CharSequence input, int start, int end) {
    197         if (input == null) {
    198             throw new IllegalArgumentException("input == null");
    199         }
    200 
    201         if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) {
    202             throw new IndexOutOfBoundsException();
    203         }
    204 
    205         this.input = input.toString();
    206         this.regionStart = start;
    207         this.regionEnd = end;
    208         resetForInput();
    209 
    210         matchFound = false;
    211         appendPos = 0;
    212 
    213         return this;
    214     }
    215 
    216     /**
    217      * Sets a new pattern for the {@code Matcher}. Results of a previous find
    218      * get lost. The next attempt to find an occurrence of the {@link Pattern}
    219      * in the string will start at the beginning of the input.
    220      *
    221      * @param pattern
    222      *            the new {@code Pattern}.
    223      *
    224      * @return the {@code Matcher} itself.
    225      */
    226     public Matcher usePattern(Pattern pattern) {
    227         if (pattern == null) {
    228             throw new IllegalArgumentException("pattern == null");
    229         }
    230 
    231         this.pattern = pattern;
    232 
    233         synchronized (this) {
    234             if (address != 0) {
    235                 closeImpl(address);
    236                 address = 0; // In case openImpl throws.
    237             }
    238             address = openImpl(pattern.address);
    239         }
    240 
    241         if (input != null) {
    242             resetForInput();
    243         }
    244 
    245         matchOffsets = new int[(groupCount() + 1) * 2];
    246         matchFound = false;
    247         return this;
    248     }
    249 
    250     private void resetForInput() {
    251         synchronized (this) {
    252             setInputImpl(address, input, regionStart, regionEnd);
    253             useAnchoringBoundsImpl(address, anchoringBounds);
    254             useTransparentBoundsImpl(address, transparentBounds);
    255         }
    256     }
    257 
    258     /**
    259      * Resets this matcher and sets a region. Only characters inside the region
    260      * are considered for a match.
    261      *
    262      * @param start
    263      *            the first character of the region.
    264      * @param end
    265      *            the first character after the end of the region.
    266      * @return the {@code Matcher} itself.
    267      */
    268     public Matcher region(int start, int end) {
    269         return reset(input, start, end);
    270     }
    271 
    272     /**
    273      * Appends the (unmatched) remainder of the input to the given
    274      * {@link StringBuffer}. The method can be used in conjunction with
    275      * {@link #find()} and {@link #appendReplacement(StringBuffer, String)} to
    276      * walk through the input and replace all matches of the {@code Pattern}
    277      * with something else.
    278      *
    279      * @return the {@code StringBuffer}.
    280      * @throws IllegalStateException
    281      *             if no successful match has been made.
    282      */
    283     public StringBuffer appendTail(StringBuffer buffer) {
    284         if (appendPos < regionEnd) {
    285             buffer.append(input.substring(appendPos, regionEnd));
    286         }
    287         return buffer;
    288     }
    289 
    290     /**
    291      * Replaces the first occurrence of this matcher's pattern in the input with
    292      * a given string.
    293      *
    294      * @param replacement
    295      *            the replacement text.
    296      * @return the modified input string.
    297      */
    298     public String replaceFirst(String replacement) {
    299         reset();
    300         StringBuffer buffer = new StringBuffer(input.length());
    301         if (find()) {
    302             appendReplacement(buffer, replacement);
    303         }
    304         return appendTail(buffer).toString();
    305     }
    306 
    307     /**
    308      * Replaces all occurrences of this matcher's pattern in the input with a
    309      * given string.
    310      *
    311      * @param replacement
    312      *            the replacement text.
    313      * @return the modified input string.
    314      */
    315     public String replaceAll(String replacement) {
    316         reset();
    317         StringBuffer buffer = new StringBuffer(input.length());
    318         while (find()) {
    319             appendReplacement(buffer, replacement);
    320         }
    321         return appendTail(buffer).toString();
    322     }
    323 
    324     /**
    325      * Returns the {@link Pattern} instance used inside this matcher.
    326      */
    327     public Pattern pattern() {
    328         return pattern;
    329     }
    330 
    331     /**
    332      * Returns true if there is another match in the input, starting
    333      * from the given position. The region is ignored.
    334      *
    335      * @throws IndexOutOfBoundsException if {@code start < 0 || start > input.length()}
    336      */
    337     public boolean find(int start) {
    338         if (start < 0 || start > input.length()) {
    339             throw new IndexOutOfBoundsException("start=" + start + "; length=" + input.length());
    340         }
    341 
    342         synchronized (this) {
    343             matchFound = findImpl(address, input, start, matchOffsets);
    344         }
    345         return matchFound;
    346     }
    347 
    348     /**
    349      * Moves to the next occurrence of the pattern in the input. If a
    350      * previous match was successful, the method continues the search from the
    351      * first character following that match in the input. Otherwise it searches
    352      * either from the region start (if one has been set), or from position 0.
    353      *
    354      * @return true if (and only if) a match has been found.
    355      */
    356     public boolean find() {
    357         synchronized (this) {
    358             matchFound = findNextImpl(address, input, matchOffsets);
    359         }
    360         return matchFound;
    361     }
    362 
    363     /**
    364      * Tries to match the {@link Pattern}, starting from the beginning of the
    365      * region (or the beginning of the input, if no region has been set).
    366      * Doesn't require the {@code Pattern} to match against the whole region.
    367      *
    368      * @return true if (and only if) the {@code Pattern} matches.
    369      */
    370     public boolean lookingAt() {
    371         synchronized (this) {
    372             matchFound = lookingAtImpl(address, input, matchOffsets);
    373         }
    374         return matchFound;
    375     }
    376 
    377     /**
    378      * Tries to match the {@link Pattern} against the entire region (or the
    379      * entire input, if no region has been set).
    380      *
    381      * @return true if (and only if) the {@code Pattern} matches the entire
    382      *         region.
    383      */
    384     public boolean matches() {
    385         synchronized (this) {
    386             matchFound = matchesImpl(address, input, matchOffsets);
    387         }
    388         return matchFound;
    389     }
    390 
    391     /**
    392      * Returns a replacement string for the given one that has all backslashes
    393      * and dollar signs escaped.
    394      */
    395     public static String quoteReplacement(String s) {
    396         StringBuilder result = new StringBuilder(s.length());
    397         for (int i = 0; i < s.length(); i++) {
    398             char c = s.charAt(i);
    399             if (c == '\\' || c == '$') {
    400                 result.append('\\');
    401             }
    402             result.append(c);
    403         }
    404         return result.toString();
    405     }
    406 
    407     /**
    408      * Converts the current match into a separate {@link MatchResult} instance
    409      * that is independent from this matcher. The new object is unaffected when
    410      * the state of this matcher changes.
    411      *
    412      * @throws IllegalStateException
    413      *             if no successful match has been made.
    414      */
    415     public MatchResult toMatchResult() {
    416         ensureMatch();
    417         return new MatchResultImpl(input, matchOffsets);
    418     }
    419 
    420     /**
    421      * Determines whether this matcher has anchoring bounds enabled or not. When
    422      * anchoring bounds are enabled, the start and end of the input match the
    423      * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
    424      * by default.
    425      *
    426      * @return the {@code Matcher} itself.
    427      */
    428     public Matcher useAnchoringBounds(boolean value) {
    429         synchronized (this) {
    430             anchoringBounds = value;
    431             useAnchoringBoundsImpl(address, value);
    432         }
    433         return this;
    434     }
    435 
    436     /**
    437      * Returns true if this matcher has anchoring bounds enabled. When
    438      * anchoring bounds are enabled, the start and end of the input match the
    439      * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
    440      * by default.
    441      */
    442     public boolean hasAnchoringBounds() {
    443         return anchoringBounds;
    444     }
    445 
    446     /**
    447      * Determines whether this matcher has transparent bounds enabled or not.
    448      * When transparent bounds are enabled, the parts of the input outside the
    449      * region are subject to lookahead and lookbehind, otherwise they are not.
    450      * Transparent bounds are disabled by default.
    451      *
    452      * @return the {@code Matcher} itself.
    453      */
    454     public Matcher useTransparentBounds(boolean value) {
    455         synchronized (this) {
    456             transparentBounds = value;
    457             useTransparentBoundsImpl(address, value);
    458         }
    459         return this;
    460     }
    461 
    462     /**
    463      * Makes sure that a successful match has been made. Is invoked internally
    464      * from various places in the class.
    465      *
    466      * @throws IllegalStateException
    467      *             if no successful match has been made.
    468      */
    469     private void ensureMatch() {
    470         if (!matchFound) {
    471             throw new IllegalStateException("No successful match so far");
    472         }
    473     }
    474 
    475     /**
    476      * Returns true if this matcher has transparent bounds enabled. When
    477      * transparent bounds are enabled, the parts of the input outside the region
    478      * are subject to lookahead and lookbehind, otherwise they are not.
    479      * Transparent bounds are disabled by default.
    480      */
    481     public boolean hasTransparentBounds() {
    482         return transparentBounds;
    483     }
    484 
    485     /**
    486      * Returns this matcher's region start, that is, the index of the first character that is
    487      * considered for a match.
    488      */
    489     public int regionStart() {
    490         return regionStart;
    491     }
    492 
    493     /**
    494      * Returns this matcher's region end, that is, the index of the first character that is
    495      * not considered for a match.
    496      */
    497     public int regionEnd() {
    498         return regionEnd;
    499     }
    500 
    501     /**
    502      * Returns true if the most recent match succeeded and additional input could cause
    503      * it to fail. If this method returns false and a match was found, then more input
    504      * might change the match but the match won't be lost. If a match was not found,
    505      * then requireEnd has no meaning.
    506      */
    507     public boolean requireEnd() {
    508         synchronized (this) {
    509             return requireEndImpl(address);
    510         }
    511     }
    512 
    513     /**
    514      * Returns true if the most recent matching operation attempted to access
    515      * additional text beyond the available input, meaning that additional input
    516      * could change the results of the match.
    517      */
    518     public boolean hitEnd() {
    519         synchronized (this) {
    520             return hitEndImpl(address);
    521         }
    522     }
    523 
    524     @Override protected void finalize() throws Throwable {
    525         try {
    526             synchronized (this) {
    527                 closeImpl(address);
    528             }
    529         } finally {
    530             super.finalize();
    531         }
    532     }
    533 
    534     /**
    535      * Returns a string representing this {@code Matcher}.
    536      * The format of this string is unspecified.
    537      */
    538     @Override public String toString() {
    539         return getClass().getName() + "[pattern=" + pattern() +
    540             " region=" + regionStart() + "," + regionEnd() +
    541             " lastmatch=" + (matchFound ? group() : "") + "]";
    542     }
    543 
    544     /**
    545      * {@inheritDoc}
    546      *
    547      * @throws IllegalStateException if no successful match has been made.
    548      */
    549     public int end() {
    550         return end(0);
    551     }
    552 
    553     /**
    554      * {@inheritDoc}
    555      *
    556      * @throws IllegalStateException if no successful match has been made.
    557      */
    558     public int end(int group) {
    559         ensureMatch();
    560         return matchOffsets[(group * 2) + 1];
    561     }
    562 
    563     /**
    564      * {@inheritDoc}
    565      *
    566      * @throws IllegalStateException if no successful match has been made.
    567      */
    568     public String group() {
    569         return group(0);
    570     }
    571 
    572     /**
    573      * {@inheritDoc}
    574      *
    575      * @throws IllegalStateException if no successful match has been made.
    576      */
    577     public String group(int group) {
    578         ensureMatch();
    579         int from = matchOffsets[group * 2];
    580         int to = matchOffsets[(group * 2) + 1];
    581         if (from == -1 || to == -1) {
    582             return null;
    583         } else {
    584             return input.substring(from, to);
    585         }
    586     }
    587 
    588     /**
    589      * {@inheritDoc}
    590      */
    591     public int groupCount() {
    592         synchronized (this) {
    593             return groupCountImpl(address);
    594         }
    595     }
    596 
    597     /**
    598      * {@inheritDoc}
    599      *
    600      * @throws IllegalStateException if no successful match has been made.
    601      */
    602     public int start() {
    603         return start(0);
    604     }
    605 
    606     /**
    607      * {@inheritDoc}
    608      *
    609      * @throws IllegalStateException if no successful match has been made.
    610      */
    611     public int start(int group) throws IllegalStateException {
    612         ensureMatch();
    613         return matchOffsets[group * 2];
    614     }
    615 
    616     private static native void closeImpl(long addr);
    617     private static native boolean findImpl(long addr, String s, int startIndex, int[] offsets);
    618     private static native boolean findNextImpl(long addr, String s, int[] offsets);
    619     private static native int groupCountImpl(long addr);
    620     private static native boolean hitEndImpl(long addr);
    621     private static native boolean lookingAtImpl(long addr, String s, int[] offsets);
    622     private static native boolean matchesImpl(long addr, String s, int[] offsets);
    623     private static native long openImpl(long patternAddr);
    624     private static native boolean requireEndImpl(long addr);
    625     private static native void setInputImpl(long addr, String s, int start, int end);
    626     private static native void useAnchoringBoundsImpl(long addr, boolean value);
    627     private static native void useTransparentBoundsImpl(long addr, boolean value);
    628 }
    629