Home | History | Annotate | Download | only in regex
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
      4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      5  *
      6  * This code is free software; you can redistribute it and/or modify it
      7  * under the terms of the GNU General Public License version 2 only, as
      8  * published by the Free Software Foundation.  Oracle designates this
      9  * particular file as subject to the "Classpath" exception as provided
     10  * by Oracle in the LICENSE file that accompanied this code.
     11  *
     12  * This code is distributed in the hope that it will be useful, but WITHOUT
     13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  * version 2 for more details (a copy is included in the LICENSE file that
     16  * accompanied this code).
     17  *
     18  * You should have received a copy of the GNU General Public License version
     19  * 2 along with this work; if not, write to the Free Software Foundation,
     20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     21  *
     22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     23  * or visit www.oracle.com if you need additional information or have any
     24  * questions.
     25  */
     26 
     27 package java.util.regex;
     28 
     29 import libcore.util.NativeAllocationRegistry;
     30 
     31 /**
     32  * An engine that performs match operations on a {@link java.lang.CharSequence
     33  * </code>character sequence<code>} by interpreting a {@link Pattern}.
     34  *
     35  * <p> A matcher is created from a pattern by invoking the pattern's {@link
     36  * Pattern#matcher matcher} method.  Once created, a matcher can be used to
     37  * perform three different kinds of match operations:
     38  *
     39  * <ul>
     40  *
     41  *   <li><p> The {@link #matches matches} method attempts to match the entire
     42  *   input sequence against the pattern.  </p></li>
     43  *
     44  *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the
     45  *   input sequence, starting at the beginning, against the pattern.  </p></li>
     46  *
     47  *   <li><p> The {@link #find find} method scans the input sequence looking for
     48  *   the next subsequence that matches the pattern.  </p></li>
     49  *
     50  * </ul>
     51  *
     52  * <p> Each of these methods returns a boolean indicating success or failure.
     53  * More information about a successful match can be obtained by querying the
     54  * state of the matcher.
     55  *
     56  * <p> A matcher finds matches in a subset of its input called the
     57  * <i>region</i>. By default, the region contains all of the matcher's input.
     58  * The region can be modified via the{@link #region region} method and queried
     59  * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}
     60  * methods. The way that the region boundaries interact with some pattern
     61  * constructs can be changed. See {@link #useAnchoringBounds
     62  * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds}
     63  * for more details.
     64  *
     65  * <p> This class also defines methods for replacing matched subsequences with
     66  * new strings whose contents can, if desired, be computed from the match
     67  * result.  The {@link #appendReplacement appendReplacement} and {@link
     68  * #appendTail appendTail} methods can be used in tandem in order to collect
     69  * the result into an existing string buffer, or the more convenient {@link
     70  * #replaceAll replaceAll} method can be used to create a string in which every
     71  * matching subsequence in the input sequence is replaced.
     72  *
     73  * <p> The explicit state of a matcher includes the start and end indices of
     74  * the most recent successful match.  It also includes the start and end
     75  * indices of the input subsequence captured by each <a
     76  * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total
     77  * count of such subsequences.  As a convenience, methods are also provided for
     78  * returning these captured subsequences in string form.
     79  *
     80  * <p> The explicit state of a matcher is initially undefined; attempting to
     81  * query any part of it before a successful match will cause an {@link
     82  * IllegalStateException} to be thrown.  The explicit state of a matcher is
     83  * recomputed by every match operation.
     84  *
     85  * <p> The implicit state of a matcher includes the input character sequence as
     86  * well as the <i>append position</i>, which is initially zero and is updated
     87  * by the {@link #appendReplacement appendReplacement} method.
     88  *
     89  * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
     90  * method or, if a new input sequence is desired, its {@link
     91  * #reset(java.lang.CharSequence) reset(CharSequence)} method.  Resetting a
     92  * matcher discards its explicit state information and sets the append position
     93  * to zero.
     94  *
     95  * <p> Instances of this class are not safe for use by multiple concurrent
     96  * threads. </p>
     97  *
     98  *
     99  * @author      Mike McCloskey
    100  * @author      Mark Reinhold
    101  * @author      JSR-51 Expert Group
    102  * @since       1.4
    103  * @spec        JSR-51
    104  */
    105 
    106 public final class Matcher implements MatchResult {
    107     /**
    108      * The Pattern object that created this Matcher.
    109      */
    110     private Pattern pattern;
    111 
    112     /**
    113      * The address of the native peer.
    114      * Uses of this must be manually synchronized to avoid native crashes.
    115      */
    116     private long address;
    117 
    118     /**
    119      * If non-null, a Runnable that can be used to explicitly deallocate address.
    120      */
    121     private Runnable nativeFinalizer;
    122 
    123     private static final NativeAllocationRegistry registry = new NativeAllocationRegistry(
    124             Matcher.class.getClassLoader(), getNativeFinalizer(), nativeSize());
    125 
    126     /**
    127      * Holds the input text.
    128      */
    129     private String input;
    130 
    131     /**
    132      * Holds the start of the region, or 0 if the matching should start at the
    133      * beginning of the text.
    134      */
    135     private int regionStart;
    136 
    137     /**
    138      * Holds the end of the region, or input.length() if the matching should
    139      * go until the end of the input.
    140      */
    141     private int regionEnd;
    142 
    143     /**
    144      * Holds the position where the next append operation will take place.
    145      */
    146     private int appendPos;
    147 
    148     /**
    149      * Reflects whether a match has been found during the most recent find
    150      * operation.
    151      */
    152     private boolean matchFound;
    153 
    154     /**
    155      * Holds the offsets for the most recent match.
    156      */
    157     private int[] matchOffsets;
    158 
    159     /**
    160      * Reflects whether the bounds of the region are anchoring.
    161      */
    162     private boolean anchoringBounds = true;
    163 
    164     /**
    165      * Reflects whether the bounds of the region are transparent.
    166      */
    167     private boolean transparentBounds;
    168 
    169     /**
    170      * All matchers have the state used by Pattern during a match.
    171      */
    172     Matcher(Pattern parent, CharSequence text) {
    173         usePattern(parent);
    174         reset(text);
    175     }
    176 
    177     /**
    178      * Returns the pattern that is interpreted by this matcher.
    179      *
    180      * @return  The pattern for which this matcher was created
    181      */
    182     public Pattern pattern() {
    183         return pattern;
    184     }
    185 
    186     /**
    187      * Returns the match state of this matcher as a {@link MatchResult}.
    188      * The result is unaffected by subsequent operations performed upon this
    189      * matcher.
    190      *
    191      * @return  a <code>MatchResult</code> with the state of this matcher
    192      * @since 1.5
    193      */
    194     public MatchResult toMatchResult() {
    195         ensureMatch();
    196         return new OffsetBasedMatchResult(input, matchOffsets);
    197     }
    198 
    199     /**
    200       * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to
    201       * find matches with.
    202       *
    203       * <p> This method causes this matcher to lose information
    204       * about the groups of the last match that occurred. The
    205       * matcher's position in the input is maintained and its
    206       * last append position is unaffected.</p>
    207       *
    208       * @param  newPattern
    209       *         The new pattern used by this matcher
    210       * @return  This matcher
    211       * @throws  IllegalArgumentException
    212       *          If newPattern is <tt>null</tt>
    213       * @since 1.5
    214       */
    215     public Matcher usePattern(Pattern newPattern) {
    216         if (newPattern == null) {
    217             throw new IllegalArgumentException("newPattern == null");
    218         }
    219 
    220         this.pattern = newPattern;
    221 
    222         synchronized (this) {
    223             if (nativeFinalizer != null) {
    224                 nativeFinalizer.run();
    225                 address = 0; // In case openImpl throws.
    226                 nativeFinalizer = null;
    227             }
    228             address = openImpl(pattern.address);
    229             nativeFinalizer = registry.registerNativeAllocation(this, address);
    230         }
    231 
    232         if (input != null) {
    233             resetForInput();
    234         }
    235 
    236         matchOffsets = new int[(groupCount() + 1) * 2];
    237         matchFound = false;
    238         return this;
    239     }
    240 
    241     /**
    242      * Returns the offset after the last character matched.  </p>
    243      *
    244      * @return  The offset after the last character matched
    245      *
    246      * @throws  IllegalStateException
    247      *          If no match has yet been attempted,
    248      *          or if the previous match operation failed
    249      */
    250     public int end() {
    251         return end(0);
    252     }
    253 
    254     /**
    255      * Returns the offset after the last character of the subsequence
    256      * captured by the given group during the previous match operation.
    257      *
    258      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
    259      * to right, starting at one.  Group zero denotes the entire pattern, so
    260      * the expression <i>m.</i><tt>end(0)</tt> is equivalent to
    261      * <i>m.</i><tt>end()</tt>.  </p>
    262      *
    263      * @param  group
    264      *         The index of a capturing group in this matcher's pattern
    265      *
    266      * @return  The offset after the last character captured by the group,
    267      *          or <tt>-1</tt> if the match was successful
    268      *          but the group itself did not match anything
    269      *
    270      * @throws  IllegalStateException
    271      *          If no match has yet been attempted,
    272      *          or if the previous match operation failed
    273      *
    274      * @throws  IndexOutOfBoundsException
    275      *          If there is no capturing group in the pattern
    276      *          with the given index
    277      */
    278     public int end(int group) {
    279         ensureMatch();
    280         return matchOffsets[(group * 2) + 1];
    281     }
    282 
    283     /**
    284      * Returns the offset after the last character of the subsequence
    285      * captured by the given <a href="Pattern.html#groupname">named-capturing
    286      * group</a> during the previous match operation.
    287      *
    288      * @param  name
    289      *         The name of a named-capturing group in this matcher's pattern
    290      *
    291      * @return  The offset after the last character captured by the group,
    292      *          or {@code -1} if the match was successful
    293      *          but the group itself did not match anything
    294      *
    295      * @throws  IllegalStateException
    296      *          If no match has yet been attempted,
    297      *          or if the previous match operation failed
    298      *
    299      * @throws  IllegalArgumentException
    300      *          If there is no capturing group in the pattern
    301      *          with the given name
    302      * @since 1.8
    303      */
    304     public int end(String name) {
    305         ensureMatch();
    306         return matchOffsets[getMatchedGroupIndex(pattern.address, name) * 2 + 1];
    307     }
    308 
    309 
    310     /**
    311      * Returns the input subsequence matched by the previous match.
    312      *
    313      * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
    314      * the expressions <i>m.</i><tt>group()</tt> and
    315      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt>&nbsp;<i>m.</i><tt>end())</tt>
    316      * are equivalent.  </p>
    317      *
    318      * <p> Note that some patterns, for example <tt>a*</tt>, match the empty
    319      * string.  This method will return the empty string when the pattern
    320      * successfully matches the empty string in the input.  </p>
    321      *
    322      * @return The (possibly empty) subsequence matched by the previous match,
    323      *         in string form
    324      *
    325      * @throws  IllegalStateException
    326      *          If no match has yet been attempted,
    327      *          or if the previous match operation failed
    328      */
    329     public String group() {
    330         return group(0);
    331     }
    332 
    333     /**
    334      * Returns the input subsequence captured by the given group during the
    335      * previous match operation.
    336      *
    337      * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
    338      * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and
    339      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt>&nbsp;<i>m.</i><tt>end(</tt><i>g</i><tt>))</tt>
    340      * are equivalent.  </p>
    341      *
    342      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
    343      * to right, starting at one.  Group zero denotes the entire pattern, so
    344      * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>.
    345      * </p>
    346      *
    347      * <p> If the match was successful but the group specified failed to match
    348      * any part of the input sequence, then <tt>null</tt> is returned. Note
    349      * that some groups, for example <tt>(a*)</tt>, match the empty string.
    350      * This method will return the empty string when such a group successfully
    351      * matches the empty string in the input.  </p>
    352      *
    353      * @param  group
    354      *         The index of a capturing group in this matcher's pattern
    355      *
    356      * @return  The (possibly empty) subsequence captured by the group
    357      *          during the previous match, or <tt>null</tt> if the group
    358      *          failed to match part of the input
    359      *
    360      * @throws  IllegalStateException
    361      *          If no match has yet been attempted,
    362      *          or if the previous match operation failed
    363      *
    364      * @throws  IndexOutOfBoundsException
    365      *          If there is no capturing group in the pattern
    366      *          with the given index
    367      */
    368     public String group(int group) {
    369         ensureMatch();
    370         int from = matchOffsets[group * 2];
    371         int to = matchOffsets[(group * 2) + 1];
    372         if (from == -1 || to == -1) {
    373             return null;
    374         } else {
    375             return input.substring(from, to);
    376         }
    377     }
    378 
    379     /**
    380      * Returns the input subsequence captured by the given
    381      * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
    382      * match operation.
    383      *
    384      * <p> If the match was successful but the group specified failed to match
    385      * any part of the input sequence, then <tt>null</tt> is returned. Note
    386      * that some groups, for example <tt>(a*)</tt>, match the empty string.
    387      * This method will return the empty string when such a group successfully
    388      * matches the empty string in the input.  </p>
    389      *
    390      * @param  name
    391      *         The name of a named-capturing group in this matcher's pattern
    392      *
    393      * @return  The (possibly empty) subsequence captured by the named group
    394      *          during the previous match, or <tt>null</tt> if the group
    395      *          failed to match part of the input
    396      *
    397      * @throws  IllegalStateException
    398      *          If no match has yet been attempted,
    399      *          or if the previous match operation failed
    400      *
    401      * @throws  IllegalArgumentException
    402      *          If there is no capturing group in the pattern
    403      *          with the given name
    404      * @since 1.7
    405      */
    406     public String group(String name) {
    407         ensureMatch();
    408         int group = getMatchedGroupIndex(pattern.address, name);
    409         int from = matchOffsets[group * 2];
    410         int to = matchOffsets[(group * 2) + 1];
    411         if (from == -1 || to == -1) {
    412             return null;
    413         } else {
    414             return input.substring(from, to);
    415         }
    416     }
    417 
    418     /**
    419      * Returns the number of capturing groups in this matcher's pattern.
    420      *
    421      * <p> Group zero denotes the entire pattern by convention. It is not
    422      * included in this count.
    423      *
    424      * <p> Any non-negative integer smaller than or equal to the value
    425      * returned by this method is guaranteed to be a valid group index for
    426      * this matcher.  </p>
    427      *
    428      * @return The number of capturing groups in this matcher's pattern
    429      */
    430     public int groupCount() {
    431         synchronized (this) {
    432             return groupCountImpl(address);
    433         }
    434     }
    435 
    436     /**
    437      * Attempts to match the entire region against the pattern.
    438      *
    439      * <p> If the match succeeds then more information can be obtained via the
    440      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
    441      *
    442      * @return  <tt>true</tt> if, and only if, the entire region sequence
    443      *          matches this matcher's pattern
    444      */
    445     public boolean matches() {
    446         synchronized (this) {
    447             matchFound = matchesImpl(address, matchOffsets);
    448         }
    449         return matchFound;
    450     }
    451 
    452     /**
    453      * Attempts to find the next subsequence of the input sequence that matches
    454      * the pattern.
    455      *
    456      * <p> This method starts at the beginning of this matcher's region, or, if
    457      * a previous invocation of the method was successful and the matcher has
    458      * not since been reset, at the first character not matched by the previous
    459      * match.
    460      *
    461      * <p> If the match succeeds then more information can be obtained via the
    462      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
    463      *
    464      * @return  <tt>true</tt> if, and only if, a subsequence of the input
    465      *          sequence matches this matcher's pattern
    466      */
    467     public boolean find() {
    468         synchronized (this) {
    469             matchFound = findNextImpl(address, matchOffsets);
    470         }
    471         return matchFound;
    472     }
    473 
    474     /**
    475      * Resets this matcher and then attempts to find the next subsequence of
    476      * the input sequence that matches the pattern, starting at the specified
    477      * index.
    478      *
    479      * <p> If the match succeeds then more information can be obtained via the
    480      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent
    481      * invocations of the {@link #find()} method will start at the first
    482      * character not matched by this match.  </p>
    483      *
    484      * @throws  IndexOutOfBoundsException
    485      *          If start is less than zero or if start is greater than the
    486      *          length of the input sequence.
    487      *
    488      * @return  <tt>true</tt> if, and only if, a subsequence of the input
    489      *          sequence starting at the given index matches this matcher's
    490      *          pattern
    491      */
    492     public boolean find(int start) {
    493         if (start < 0 || start > input.length()) {
    494             throw new IndexOutOfBoundsException("start=" + start + "; length=" + input.length());
    495         }
    496 
    497         synchronized (this) {
    498             matchFound = findImpl(address, start, matchOffsets);
    499         }
    500         return matchFound;
    501     }
    502 
    503     /**
    504      * Attempts to match the input sequence, starting at the beginning of the
    505      * region, against the pattern.
    506      *
    507      * <p> Like the {@link #matches matches} method, this method always starts
    508      * at the beginning of the region; unlike that method, it does not
    509      * require that the entire region be matched.
    510      *
    511      * <p> If the match succeeds then more information can be obtained via the
    512      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
    513      *
    514      * @return  <tt>true</tt> if, and only if, a prefix of the input
    515      *          sequence matches this matcher's pattern
    516      */
    517     public boolean lookingAt() {
    518         synchronized (this) {
    519             matchFound = lookingAtImpl(address, matchOffsets);
    520         }
    521         return matchFound;
    522     }
    523 
    524     /**
    525      * Returns a literal replacement <code>String</code> for the specified
    526      * <code>String</code>.
    527      *
    528      * This method produces a <code>String</code> that will work
    529      * as a literal replacement <code>s</code> in the
    530      * <code>appendReplacement</code> method of the {@link Matcher} class.
    531      * The <code>String</code> produced will match the sequence of characters
    532      * in <code>s</code> treated as a literal sequence. Slashes ('\') and
    533      * dollar signs ('$') will be given no special meaning.
    534      *
    535      * @param  s The string to be literalized
    536      * @return  A literal string replacement
    537      * @since 1.5
    538      */
    539     public static String quoteReplacement(String s) {
    540         if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
    541             return s;
    542         StringBuilder sb = new StringBuilder();
    543         for (int i=0; i<s.length(); i++) {
    544             char c = s.charAt(i);
    545             if (c == '\\' || c == '$') {
    546                 sb.append('\\');
    547             }
    548             sb.append(c);
    549         }
    550         return sb.toString();
    551     }
    552 
    553     /**
    554      * Implements a non-terminal append-and-replace step.
    555      *
    556      * <p> This method performs the following actions: </p>
    557      *
    558      * <ol>
    559      *
    560      *   <li><p> It reads characters from the input sequence, starting at the
    561      *   append position, and appends them to the given string buffer.  It
    562      *   stops after reading the last character preceding the previous match,
    563      *   that is, the character at index {@link
    564      *   #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>.  </p></li>
    565      *
    566      *   <li><p> It appends the given replacement string to the string buffer.
    567      *   </p></li>
    568      *
    569      *   <li><p> It sets the append position of this matcher to the index of
    570      *   the last character matched, plus one, that is, to {@link #end()}.
    571      *   </p></li>
    572      *
    573      * </ol>
    574      *
    575      * <p> The replacement string may contain references to subsequences
    576      * captured during the previous match: Each occurrence of
    577      * <tt>$</tt><i>g</i> will be replaced by the result of evaluating the corresponding
    578      * {@link #group(int) group(g)</tt>} respectively. For  <tt>$</tt><i>g</i><tt></tt>,
    579      * the first number after the <tt>$</tt> is always treated as part of
    580      * the group reference. Subsequent numbers are incorporated into g if
    581      * they would form a legal group reference. Only the numerals '0'
    582      * through '9' are considered as potential components of the group
    583      * reference. If the second group matched the string <tt>"foo"</tt>, for
    584      * example, then passing the replacement string <tt>"$2bar"</tt> would
    585      * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar
    586      * sign (<tt>$</tt>) may be included as a literal in the replacement
    587      * string by preceding it with a backslash (<tt>\$</tt>).
    588      *
    589      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
    590      * the replacement string may cause the results to be different than if it
    591      * were being treated as a literal replacement string. Dollar signs may be
    592      * treated as references to captured subsequences as described above, and
    593      * backslashes are used to escape literal characters in the replacement
    594      * string.
    595      *
    596      * <p> This method is intended to be used in a loop together with the
    597      * {@link #appendTail appendTail} and {@link #find find} methods.  The
    598      * following code, for example, writes <tt>one dog two dogs in the
    599      * yard</tt> to the standard-output stream: </p>
    600      *
    601      * <blockquote><pre>
    602      * Pattern p = Pattern.compile("cat");
    603      * Matcher m = p.matcher("one cat two cats in the yard");
    604      * StringBuffer sb = new StringBuffer();
    605      * while (m.find()) {
    606      *     m.appendReplacement(sb, "dog");
    607      * }
    608      * m.appendTail(sb);
    609      * System.out.println(sb.toString());</pre></blockquote>
    610      *
    611      * @param  sb
    612      *         The target string buffer
    613      *
    614      * @param  replacement
    615      *         The replacement string
    616      *
    617      * @return  This matcher
    618      *
    619      * @throws  IllegalStateException
    620      *          If no match has yet been attempted,
    621      *          or if the previous match operation failed
    622      *
    623      * @throws  IllegalArgumentException
    624      *          If the replacement string refers to a named-capturing
    625      *          group that does not exist in the pattern
    626      *
    627      * @throws  IndexOutOfBoundsException
    628      *          If the replacement string refers to a capturing group
    629      *          that does not exist in the pattern
    630      */
    631     public Matcher appendReplacement(StringBuffer sb, String replacement) {
    632         sb.append(input.substring(appendPos, start()));
    633         appendEvaluated(sb, replacement);
    634         appendPos = end();
    635 
    636         return this;
    637     }
    638 
    639     /**
    640      * Internal helper method to append a given string to a given string buffer.
    641      * If the string contains any references to groups, these are replaced by
    642      * the corresponding group's contents.
    643      *
    644      * @param buffer the string buffer.
    645      * @param s the string to append.
    646      */
    647     private void appendEvaluated(StringBuffer buffer, String s) {
    648         boolean escape = false;
    649         boolean dollar = false;
    650         boolean escapeNamedGroup = false;
    651         int escapeNamedGroupStart = -1;
    652 
    653         for (int i = 0; i < s.length(); i++) {
    654             char c = s.charAt(i);
    655             if (c == '\\' && !escape) {
    656                 escape = true;
    657             } else if (c == '$' && !escape) {
    658                 dollar = true;
    659             } else if (c >= '0' && c <= '9' && dollar) {
    660                 buffer.append(group(c - '0'));
    661                 dollar = false;
    662             } else if (c == '{' && dollar) {
    663                 escapeNamedGroup = true;
    664                 escapeNamedGroupStart = i;
    665             } else if (c == '}' && dollar && escapeNamedGroup) {
    666                 String namedGroupName =
    667                     s.substring(escapeNamedGroupStart + 1, i);
    668                 buffer.append(group(namedGroupName));
    669                 dollar = false;
    670                 escapeNamedGroup = false;
    671             } else if (c != '}' && dollar && escapeNamedGroup) {
    672                 continue;
    673             } else {
    674                 buffer.append(c);
    675                 dollar = false;
    676                 escape = false;
    677                 escapeNamedGroup = false;
    678             }
    679         }
    680 
    681         if (escapeNamedGroup) {
    682             throw new IllegalArgumentException("Missing ending brace '}' from replacement string");
    683         }
    684 
    685         if (escape) {
    686             throw new ArrayIndexOutOfBoundsException(s.length());
    687         }
    688     }
    689 
    690 
    691     /**
    692      * Implements a terminal append-and-replace step.
    693      *
    694      * <p> This method reads characters from the input sequence, starting at
    695      * the append position, and appends them to the given string buffer.  It is
    696      * intended to be invoked after one or more invocations of the {@link
    697      * #appendReplacement appendReplacement} method in order to copy the
    698      * remainder of the input sequence.  </p>
    699      *
    700      * @param  sb
    701      *         The target string buffer
    702      *
    703      * @return  The target string buffer
    704      */
    705     public StringBuffer appendTail(StringBuffer sb) {
    706         if (appendPos < regionEnd) {
    707             sb.append(input.substring(appendPos, regionEnd));
    708         }
    709         return sb;
    710     }
    711 
    712     /**
    713      * Replaces every subsequence of the input sequence that matches the
    714      * pattern with the given replacement string.
    715      *
    716      * <p> This method first resets this matcher.  It then scans the input
    717      * sequence looking for matches of the pattern.  Characters that are not
    718      * part of any match are appended directly to the result string; each match
    719      * is replaced in the result by the replacement string.  The replacement
    720      * string may contain references to captured subsequences as in the {@link
    721      * #appendReplacement appendReplacement} method.
    722      *
    723      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
    724      * the replacement string may cause the results to be different than if it
    725      * were being treated as a literal replacement string. Dollar signs may be
    726      * treated as references to captured subsequences as described above, and
    727      * backslashes are used to escape literal characters in the replacement
    728      * string.
    729      *
    730      * <p> Given the regular expression <tt>a*b</tt>, the input
    731      * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string
    732      * <tt>"-"</tt>, an invocation of this method on a matcher for that
    733      * expression would yield the string <tt>"-foo-foo-foo-"</tt>.
    734      *
    735      * <p> Invoking this method changes this matcher's state.  If the matcher
    736      * is to be used in further matching operations then it should first be
    737      * reset.  </p>
    738      *
    739      * @param  replacement
    740      *         The replacement string
    741      *
    742      * @return  The string constructed by replacing each matching subsequence
    743      *          by the replacement string, substituting captured subsequences
    744      *          as needed
    745      */
    746     public String replaceAll(String replacement) {
    747         reset();
    748         StringBuffer buffer = new StringBuffer(input.length());
    749         while (find()) {
    750             appendReplacement(buffer, replacement);
    751         }
    752         return appendTail(buffer).toString();
    753     }
    754 
    755     /**
    756      * Replaces the first subsequence of the input sequence that matches the
    757      * pattern with the given replacement string.
    758      *
    759      * <p> This method first resets this matcher.  It then scans the input
    760      * sequence looking for a match of the pattern.  Characters that are not
    761      * part of the match are appended directly to the result string; the match
    762      * is replaced in the result by the replacement string.  The replacement
    763      * string may contain references to captured subsequences as in the {@link
    764      * #appendReplacement appendReplacement} method.
    765      *
    766      * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
    767      * the replacement string may cause the results to be different than if it
    768      * were being treated as a literal replacement string. Dollar signs may be
    769      * treated as references to captured subsequences as described above, and
    770      * backslashes are used to escape literal characters in the replacement
    771      * string.
    772      *
    773      * <p> Given the regular expression <tt>dog</tt>, the input
    774      * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string
    775      * <tt>"cat"</tt>, an invocation of this method on a matcher for that
    776      * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>.  </p>
    777      *
    778      * <p> Invoking this method changes this matcher's state.  If the matcher
    779      * is to be used in further matching operations then it should first be
    780      * reset.  </p>
    781      *
    782      * @param  replacement
    783      *         The replacement string
    784      * @return  The string constructed by replacing the first matching
    785      *          subsequence by the replacement string, substituting captured
    786      *          subsequences as needed
    787      */
    788     public String replaceFirst(String replacement) {
    789         reset();
    790         StringBuffer buffer = new StringBuffer(input.length());
    791         if (find()) {
    792             appendReplacement(buffer, replacement);
    793         }
    794         return appendTail(buffer).toString();
    795     }
    796 
    797     /**
    798      * Sets the limits of this matcher's region. The region is the part of the
    799      * input sequence that will be searched to find a match. Invoking this
    800      * method resets the matcher, and then sets the region to start at the
    801      * index specified by the <code>start</code> parameter and end at the
    802      * index specified by the <code>end</code> parameter.
    803      *
    804      * <p>Depending on the transparency and anchoring being used (see
    805      * {@link #useTransparentBounds useTransparentBounds} and
    806      * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
    807      * as anchors may behave differently at or around the boundaries of the
    808      * region.
    809      *
    810      * @param  start
    811      *         The index to start searching at (inclusive)
    812      * @param  end
    813      *         The index to end searching at (exclusive)
    814      * @throws  IndexOutOfBoundsException
    815      *          If start or end is less than zero, if
    816      *          start is greater than the length of the input sequence, if
    817      *          end is greater than the length of the input sequence, or if
    818      *          start is greater than end.
    819      * @return  this matcher
    820      * @since 1.5
    821      */
    822     public Matcher region(int start, int end) {
    823         return reset(input, start, end);
    824     }
    825 
    826     /**
    827      * Reports the start index of this matcher's region. The
    828      * searches this matcher conducts are limited to finding matches
    829      * within {@link #regionStart regionStart} (inclusive) and
    830      * {@link #regionEnd regionEnd} (exclusive).
    831      *
    832      * @return  The starting point of this matcher's region
    833      * @since 1.5
    834      */
    835     public int regionStart() {
    836         return regionStart;
    837     }
    838 
    839     /**
    840      * Reports the end index (exclusive) of this matcher's region.
    841      * The searches this matcher conducts are limited to finding matches
    842      * within {@link #regionStart regionStart} (inclusive) and
    843      * {@link #regionEnd regionEnd} (exclusive).
    844      *
    845      * @return  the ending point of this matcher's region
    846      * @since 1.5
    847      */
    848     public int regionEnd() {
    849         return regionEnd;
    850     }
    851 
    852     /**
    853      * Queries the transparency of region bounds for this matcher.
    854      *
    855      * <p> This method returns <tt>true</tt> if this matcher uses
    856      * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i>
    857      * bounds.
    858      *
    859      * <p> See {@link #useTransparentBounds useTransparentBounds} for a
    860      * description of transparent and opaque bounds.
    861      *
    862      * <p> By default, a matcher uses opaque region boundaries.
    863      *
    864      * @return <tt>true</tt> iff this matcher is using transparent bounds,
    865      *         <tt>false</tt> otherwise.
    866      * @see java.util.regex.Matcher#useTransparentBounds(boolean)
    867      * @since 1.5
    868      */
    869     public boolean hasTransparentBounds() {
    870         return transparentBounds;
    871     }
    872 
    873     /**
    874      * Sets the transparency of region bounds for this matcher.
    875      *
    876      * <p> Invoking this method with an argument of <tt>true</tt> will set this
    877      * matcher to use <i>transparent</i> bounds. If the boolean
    878      * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used.
    879      *
    880      * <p> Using transparent bounds, the boundaries of this
    881      * matcher's region are transparent to lookahead, lookbehind,
    882      * and boundary matching constructs. Those constructs can see beyond the
    883      * boundaries of the region to see if a match is appropriate.
    884      *
    885      * <p> Using opaque bounds, the boundaries of this matcher's
    886      * region are opaque to lookahead, lookbehind, and boundary matching
    887      * constructs that may try to see beyond them. Those constructs cannot
    888      * look past the boundaries so they will fail to match anything outside
    889      * of the region.
    890      *
    891      * <p> By default, a matcher uses opaque bounds.
    892      *
    893      * @param  value a boolean indicating whether to use opaque or transparent
    894      *         regions
    895      * @return this matcher
    896      * @see java.util.regex.Matcher#hasTransparentBounds
    897      * @since 1.5
    898      */
    899     public Matcher useTransparentBounds(boolean value) {
    900         synchronized (this) {
    901             transparentBounds = value;
    902             useTransparentBoundsImpl(address, value);
    903         }
    904         return this;
    905     }
    906 
    907     /**
    908      * Queries the anchoring of region bounds for this matcher.
    909      *
    910      * <p> This method returns <tt>true</tt> if this matcher uses
    911      * <i>anchoring</i> bounds, <tt>false</tt> otherwise.
    912      *
    913      * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
    914      * description of anchoring bounds.
    915      *
    916      * <p> By default, a matcher uses anchoring region boundaries.
    917      *
    918      * @return <tt>true</tt> iff this matcher is using anchoring bounds,
    919      *         <tt>false</tt> otherwise.
    920      * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
    921      * @since 1.5
    922      */
    923     public boolean hasAnchoringBounds() {
    924         return anchoringBounds;
    925     }
    926 
    927     /**
    928      * Sets the anchoring of region bounds for this matcher.
    929      *
    930      * <p> Invoking this method with an argument of <tt>true</tt> will set this
    931      * matcher to use <i>anchoring</i> bounds. If the boolean
    932      * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be
    933      * used.
    934      *
    935      * <p> Using anchoring bounds, the boundaries of this
    936      * matcher's region match anchors such as ^ and $.
    937      *
    938      * <p> Without anchoring bounds, the boundaries of this
    939      * matcher's region will not match anchors such as ^ and $.
    940      *
    941      * <p> By default, a matcher uses anchoring region boundaries.
    942      *
    943      * @param  value a boolean indicating whether or not to use anchoring bounds.
    944      * @return this matcher
    945      * @see java.util.regex.Matcher#hasAnchoringBounds
    946      * @since 1.5
    947      */
    948     public Matcher useAnchoringBounds(boolean value) {
    949         synchronized (this) {
    950             anchoringBounds = value;
    951             useAnchoringBoundsImpl(address, value);
    952         }
    953         return this;
    954     }
    955 
    956     /**
    957      * <p>Returns the string representation of this matcher. The
    958      * string representation of a <code>Matcher</code> contains information
    959      * that may be useful for debugging. The exact format is unspecified.
    960      *
    961      * @return  The string representation of this matcher
    962      * @since 1.5
    963      */
    964     public String toString() {
    965         StringBuilder sb = new StringBuilder();
    966         sb.append("java.util.regex.Matcher");
    967         sb.append("[pattern=" + pattern());
    968         sb.append(" region=");
    969         sb.append(regionStart() + "," + regionEnd());
    970         sb.append(" lastmatch=");
    971         if (matchFound && (group() != null)) {
    972             sb.append(group());
    973         }
    974         sb.append("]");
    975         return sb.toString();
    976     }
    977 
    978     /**
    979      * <p>Returns true if the end of input was hit by the search engine in
    980      * the last match operation performed by this matcher.
    981      *
    982      * <p>When this method returns true, then it is possible that more input
    983      * would have changed the result of the last search.
    984      *
    985      * @return  true iff the end of input was hit in the last match; false
    986      *          otherwise
    987      * @since 1.5
    988      */
    989     public boolean hitEnd() {
    990         synchronized (this) {
    991             return hitEndImpl(address);
    992         }
    993     }
    994 
    995 
    996     /**
    997      * <p>Returns true if more input could change a positive match into a
    998      * negative one.
    999      *
   1000      * <p>If this method returns true, and a match was found, then more
   1001      * input could cause the match to be lost. If this method returns false
   1002      * and a match was found, then more input might change the match but the
   1003      * match won't be lost. If a match was not found, then requireEnd has no
   1004      * meaning.
   1005      *
   1006      * @return  true iff more input could change a positive match into a
   1007      *          negative one.
   1008      * @since 1.5
   1009      */
   1010     public boolean requireEnd() {
   1011         synchronized (this) {
   1012             return requireEndImpl(address);
   1013         }
   1014     }
   1015 
   1016     /**
   1017      * Resets this matcher.
   1018      *
   1019      * <p> Resetting a matcher discards all of its explicit state information
   1020      * and sets its append position to zero. The matcher's region is set to the
   1021      * default region, which is its entire character sequence. The anchoring
   1022      * and transparency of this matcher's region boundaries are unaffected.
   1023      *
   1024      * @return  This matcher
   1025      */
   1026     public Matcher reset() {
   1027         return reset(input, 0, input.length());
   1028     }
   1029 
   1030     /**
   1031      * Resets this matcher with a new input sequence.
   1032      *
   1033      * <p> Resetting a matcher discards all of its explicit state information
   1034      * and sets its append position to zero.  The matcher's region is set to
   1035      * the default region, which is its entire character sequence.  The
   1036      * anchoring and transparency of this matcher's region boundaries are
   1037      * unaffected.
   1038      *
   1039      * @param  input
   1040      *         The new input character sequence
   1041      *
   1042      * @return  This matcher
   1043      */
   1044     public Matcher reset(CharSequence input) {
   1045         return reset(input, 0, input.length());
   1046     }
   1047 
   1048     /**
   1049      * Resets the Matcher. A new input sequence and a new region can be
   1050      * specified. Results of a previous find get lost. The next attempt to find
   1051      * an occurrence of the Pattern in the string will start at the beginning of
   1052      * the region. This is the internal version of reset() to which the several
   1053      * public versions delegate.
   1054      *
   1055      * @param input
   1056      *            the input sequence.
   1057      * @param start
   1058      *            the start of the region.
   1059      * @param end
   1060      *            the end of the region.
   1061      *
   1062      * @return the matcher itself.
   1063      */
   1064     private Matcher reset(CharSequence input, int start, int end) {
   1065         if (input == null) {
   1066             throw new IllegalArgumentException("input == null");
   1067         }
   1068 
   1069         if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) {
   1070             throw new IndexOutOfBoundsException();
   1071         }
   1072 
   1073         this.input = input.toString();
   1074         this.regionStart = start;
   1075         this.regionEnd = end;
   1076         resetForInput();
   1077 
   1078         matchFound = false;
   1079         appendPos = 0;
   1080 
   1081         return this;
   1082     }
   1083 
   1084     private void resetForInput() {
   1085         synchronized (this) {
   1086             setInputImpl(address, input, regionStart, regionEnd);
   1087             useAnchoringBoundsImpl(address, anchoringBounds);
   1088             useTransparentBoundsImpl(address, transparentBounds);
   1089         }
   1090     }
   1091 
   1092     /**
   1093      * Makes sure that a successful match has been made. Is invoked internally
   1094      * from various places in the class.
   1095      *
   1096      * @throws IllegalStateException
   1097      *             if no successful match has been made.
   1098      */
   1099     private void ensureMatch() {
   1100         if (!matchFound) {
   1101             throw new IllegalStateException("No successful match so far");
   1102         }
   1103     }
   1104 
   1105     /**
   1106      * Returns the start index of the previous match.  </p>
   1107      *
   1108      * @return  The index of the first character matched
   1109      *
   1110      * @throws  IllegalStateException
   1111      *          If no match has yet been attempted,
   1112      *          or if the previous match operation failed
   1113      */
   1114     public int start() {
   1115         return start(0);
   1116     }
   1117 
   1118     /**
   1119      * Returns the start index of the subsequence captured by the given group
   1120      * during the previous match operation.
   1121      *
   1122      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
   1123      * to right, starting at one.  Group zero denotes the entire pattern, so
   1124      * the expression <i>m.</i><tt>start(0)</tt> is equivalent to
   1125      * <i>m.</i><tt>start()</tt>.  </p>
   1126      *
   1127      * @param  group
   1128      *         The index of a capturing group in this matcher's pattern
   1129      *
   1130      * @return  The index of the first character captured by the group,
   1131      *          or <tt>-1</tt> if the match was successful but the group
   1132      *          itself did not match anything
   1133      *
   1134      * @throws  IllegalStateException
   1135      *          If no match has yet been attempted,
   1136      *          or if the previous match operation failed
   1137      *
   1138      * @throws  IndexOutOfBoundsException
   1139      *          If there is no capturing group in the pattern
   1140      *          with the given index
   1141      */
   1142     public int start(int group) throws IllegalStateException {
   1143         ensureMatch();
   1144         return matchOffsets[group * 2];
   1145     }
   1146 
   1147 
   1148     /**
   1149      * Returns the start index of the subsequence captured by the given
   1150      * <a href="Pattern.html#groupname">named-capturing group</a> during the
   1151      * previous match operation.
   1152      *
   1153      * @param  name
   1154      *         The name of a named-capturing group in this matcher's pattern
   1155      *
   1156      * @return  The index of the first character captured by the group,
   1157      *          or {@code -1} if the match was successful but the group
   1158      *          itself did not match anything
   1159      *
   1160      * @throws  IllegalStateException
   1161      *          If no match has yet been attempted,
   1162      *          or if the previous match operation failed
   1163      *
   1164      * @throws  IllegalArgumentException
   1165      *          If there is no capturing group in the pattern
   1166      *          with the given name
   1167      * @since 1.8
   1168      */
   1169     public int start(String name) {
   1170         ensureMatch();
   1171         return matchOffsets[getMatchedGroupIndex(pattern.address, name) * 2];
   1172     }
   1173 
   1174     private static int getMatchedGroupIndex(long patternAddr, String name) {
   1175         int result = getMatchedGroupIndex0(patternAddr, name);
   1176         if (result < 0) {
   1177             throw new IllegalArgumentException("No capturing group in the pattern " +
   1178                                                "with the name " + name);
   1179         }
   1180         return result;
   1181     }
   1182 
   1183     private static native int getMatchedGroupIndex0(long patternAddr, String name);
   1184     private static native boolean findImpl(long addr, int startIndex, int[] offsets);
   1185     private static native boolean findNextImpl(long addr, int[] offsets);
   1186     private static native long getNativeFinalizer();
   1187     private static native int groupCountImpl(long addr);
   1188     private static native boolean hitEndImpl(long addr);
   1189     private static native boolean lookingAtImpl(long addr, int[] offsets);
   1190     private static native boolean matchesImpl(long addr, int[] offsets);
   1191     private static native int nativeSize();
   1192     private static native long openImpl(long patternAddr);
   1193     private static native boolean requireEndImpl(long addr);
   1194     private static native void setInputImpl(long addr, String s, int start, int end);
   1195     private static native void useAnchoringBoundsImpl(long addr, boolean value);
   1196     private static native void useTransparentBoundsImpl(long addr, boolean value);
   1197 
   1198     /**
   1199      * A trivial match result implementation that's based on an array of integers
   1200      * representing match offsets. The array is of the form
   1201      * {@code { start1, end1, start2, end2 ....}) where each consecutive pair of elements represents
   1202      * the start and end of a match respectively.
   1203      */
   1204     static final class OffsetBasedMatchResult implements MatchResult {
   1205         private final String input;
   1206         private final int[] offsets;
   1207 
   1208         OffsetBasedMatchResult(String input, int[] offsets) {
   1209             this.input = input;
   1210             this.offsets = offsets.clone();
   1211         }
   1212 
   1213         @Override
   1214         public int start() {
   1215             return start(0);
   1216         }
   1217 
   1218         @Override
   1219         public int start(int group) {
   1220             return offsets[2 * group];
   1221         }
   1222 
   1223         @Override
   1224         public int end() {
   1225             return end(0);
   1226         }
   1227 
   1228         @Override
   1229         public int end(int group) {
   1230             return offsets[2 * group + 1];
   1231         }
   1232 
   1233         @Override
   1234         public String group() {
   1235             return group(0);
   1236         }
   1237 
   1238         @Override
   1239         public String group(int group) {
   1240             final int start = start(group);
   1241             final int end = end(group);
   1242             if (start == -1 || end == -1) {
   1243                 return null;
   1244             }
   1245 
   1246             return input.substring(start, end);
   1247         }
   1248 
   1249         @Override
   1250         public int groupCount() {
   1251             return (offsets.length / 2) - 1;
   1252         }
   1253     }
   1254 }
   1255