Home | History | Annotate | Download | only in base
      1 /*
      2  * Copyright (C) 2008 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.common.base;
     18 
     19 import static com.google.common.base.Preconditions.checkArgument;
     20 import static com.google.common.base.Preconditions.checkNotNull;
     21 
     22 import com.google.common.annotations.Beta;
     23 import com.google.common.annotations.GwtCompatible;
     24 
     25 import java.util.ArrayList;
     26 import java.util.Arrays;
     27 import java.util.List;
     28 
     29 import javax.annotation.CheckReturnValue;
     30 
     31 /**
     32  * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
     33  * for any {@link Object}. Also offers basic text processing methods based on this function.
     34  * Implementations are strongly encouraged to be side-effect-free and immutable.
     35  *
     36  * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
     37  * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
     38  *
     39  * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
     40  * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
     41  * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
     42  * treats these just as two separate characters.
     43  *
     44  * <p>Example usages: <pre>
     45  *   String trimmed = {@link #WHITESPACE WHITESPACE}.{@link #trimFrom trimFrom}(userInput);
     46  *   if ({@link #ASCII ASCII}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
     47  *
     48  * @author Kevin Bourrillion
     49  * @since 1.0
     50  */
     51 @Beta // Possibly change from chars to code points; decide constants vs. methods
     52 @GwtCompatible
     53 public abstract class CharMatcher implements Predicate<Character> {
     54   // Constants
     55 
     56   // Excludes 2000-2000a, which is handled as a range
     57   private static final String BREAKING_WHITESPACE_CHARS =
     58       "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
     59 
     60   // Excludes 2007, which is handled as a gap in a pair of ranges
     61   private static final String NON_BREAKING_WHITESPACE_CHARS =
     62       "\u00a0\u180e\u202f";
     63 
     64   /**
     65    * Determines whether a character is whitespace according to the latest Unicode standard, as
     66    * illustrated
     67    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
     68    * This is not the same definition used by other Java APIs. (See a
     69    * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
     70    * definitions of "whitespace"</a>.)
     71    *
     72    * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
     73    * to date.
     74    */
     75   public static final CharMatcher WHITESPACE =
     76       anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
     77           .or(inRange('\u2000', '\u200a'))
     78           .precomputed();
     79 
     80   /**
     81    * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
     82    * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
     83    * discussion of that term.
     84    *
     85    * @since 2.0
     86    */
     87   public static final CharMatcher BREAKING_WHITESPACE =
     88       anyOf(BREAKING_WHITESPACE_CHARS)
     89           .or(inRange('\u2000', '\u2006'))
     90           .or(inRange('\u2008', '\u200a'))
     91           .precomputed();
     92 
     93   /**
     94    * Determines whether a character is ASCII, meaning that its code point is less than 128.
     95    */
     96   public static final CharMatcher ASCII = inRange('\0', '\u007f');
     97 
     98   /**
     99    * Determines whether a character is a digit according to
    100    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
    101    */
    102   public static final CharMatcher DIGIT;
    103 
    104   static {
    105     CharMatcher digit = inRange('0', '9');
    106     String zeroes =
    107         "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
    108             + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
    109             + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
    110     for (char base : zeroes.toCharArray()) {
    111       digit = digit.or(inRange(base, (char) (base + 9)));
    112     }
    113     DIGIT = digit.precomputed();
    114   }
    115 
    116   /**
    117    * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's
    118    * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
    119    */
    120   public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
    121     @Override public boolean matches(char c) {
    122       return Character.isDigit(c);
    123     }
    124   };
    125 
    126   /**
    127    * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's
    128    * definition}. If you only care to match letters of the Latin alphabet, you can use {@code
    129    * inRange('a', 'z').or(inRange('A', 'Z'))}.
    130    */
    131   public static final CharMatcher JAVA_LETTER = new CharMatcher() {
    132     @Override public boolean matches(char c) {
    133       return Character.isLetter(c);
    134     }
    135   };
    136 
    137   /**
    138    * Determines whether a character is a letter or digit according to {@link
    139    * Character#isLetterOrDigit(char) Java's definition}.
    140    */
    141   public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
    142     @Override public boolean matches(char c) {
    143       return Character.isLetterOrDigit(c);
    144     }
    145   };
    146 
    147   /**
    148    * Determines whether a character is upper case according to {@link Character#isUpperCase(char)
    149    * Java's definition}.
    150    */
    151   public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
    152     @Override public boolean matches(char c) {
    153       return Character.isUpperCase(c);
    154     }
    155   };
    156 
    157   /**
    158    * Determines whether a character is lower case according to {@link Character#isLowerCase(char)
    159    * Java's definition}.
    160    */
    161   public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
    162     @Override public boolean matches(char c) {
    163       return Character.isLowerCase(c);
    164     }
    165   };
    166 
    167   /**
    168    * Determines whether a character is an ISO control character as specified by {@link
    169    * Character#isISOControl(char)}.
    170    */
    171   public static final CharMatcher JAVA_ISO_CONTROL =
    172       inRange('\u0000', '\u001f').or(inRange('\u007f', '\u009f'));
    173 
    174   /**
    175    * Determines whether a character is invisible; that is, if its Unicode category is any of
    176    * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
    177    * PRIVATE_USE according to ICU4J.
    178    */
    179   public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
    180       .or(inRange('\u007f', '\u00a0'))
    181       .or(is('\u00ad'))
    182       .or(inRange('\u0600', '\u0603'))
    183       .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
    184       .or(inRange('\u2000', '\u200f'))
    185       .or(inRange('\u2028', '\u202f'))
    186       .or(inRange('\u205f', '\u2064'))
    187       .or(inRange('\u206a', '\u206f'))
    188       .or(is('\u3000'))
    189       .or(inRange('\ud800', '\uf8ff'))
    190       .or(anyOf("\ufeff\ufff9\ufffa\ufffb"))
    191       .precomputed();
    192 
    193   /**
    194    * Determines whether a character is single-width (not double-width). When in doubt, this matcher
    195    * errs on the side of returning {@code false} (that is, it tends to assume a character is
    196    * double-width).
    197    *
    198    * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
    199    * date.
    200    */
    201   public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
    202       .or(is('\u05be'))
    203       .or(inRange('\u05d0', '\u05ea'))
    204       .or(is('\u05f3'))
    205       .or(is('\u05f4'))
    206       .or(inRange('\u0600', '\u06ff'))
    207       .or(inRange('\u0750', '\u077f'))
    208       .or(inRange('\u0e00', '\u0e7f'))
    209       .or(inRange('\u1e00', '\u20af'))
    210       .or(inRange('\u2100', '\u213a'))
    211       .or(inRange('\ufb50', '\ufdff'))
    212       .or(inRange('\ufe70', '\ufeff'))
    213       .or(inRange('\uff61', '\uffdc'))
    214       .precomputed();
    215 
    216   /** Matches any character. */
    217   public static final CharMatcher ANY =
    218       new CharMatcher() {
    219         @Override public boolean matches(char c) {
    220           return true;
    221         }
    222 
    223         @Override public int indexIn(CharSequence sequence) {
    224           return (sequence.length() == 0) ? -1 : 0;
    225         }
    226 
    227         @Override public int indexIn(CharSequence sequence, int start) {
    228           int length = sequence.length();
    229           Preconditions.checkPositionIndex(start, length);
    230           return (start == length) ? -1 : start;
    231         }
    232 
    233         @Override public int lastIndexIn(CharSequence sequence) {
    234           return sequence.length() - 1;
    235         }
    236 
    237         @Override public boolean matchesAllOf(CharSequence sequence) {
    238           checkNotNull(sequence);
    239           return true;
    240         }
    241 
    242         @Override public boolean matchesNoneOf(CharSequence sequence) {
    243           return sequence.length() == 0;
    244         }
    245 
    246         @Override public String removeFrom(CharSequence sequence) {
    247           checkNotNull(sequence);
    248           return "";
    249         }
    250 
    251         @Override public String replaceFrom(CharSequence sequence, char replacement) {
    252           char[] array = new char[sequence.length()];
    253           Arrays.fill(array, replacement);
    254           return new String(array);
    255         }
    256 
    257         @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
    258           StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
    259           for (int i = 0; i < sequence.length(); i++) {
    260             retval.append(replacement);
    261           }
    262           return retval.toString();
    263         }
    264 
    265         @Override public String collapseFrom(CharSequence sequence, char replacement) {
    266           return (sequence.length() == 0) ? "" : String.valueOf(replacement);
    267         }
    268 
    269         @Override public String trimFrom(CharSequence sequence) {
    270           checkNotNull(sequence);
    271           return "";
    272         }
    273 
    274         @Override public int countIn(CharSequence sequence) {
    275           return sequence.length();
    276         }
    277 
    278         @Override public CharMatcher and(CharMatcher other) {
    279           return checkNotNull(other);
    280         }
    281 
    282         @Override public CharMatcher or(CharMatcher other) {
    283           checkNotNull(other);
    284           return this;
    285         }
    286 
    287         @Override public CharMatcher negate() {
    288           return NONE;
    289         }
    290 
    291         @Override public CharMatcher precomputed() {
    292           return this;
    293         }
    294       };
    295 
    296   /** Matches no characters. */
    297   public static final CharMatcher NONE =
    298       new CharMatcher() {
    299         @Override public boolean matches(char c) {
    300           return false;
    301         }
    302 
    303         @Override public int indexIn(CharSequence sequence) {
    304           checkNotNull(sequence);
    305           return -1;
    306         }
    307 
    308         @Override public int indexIn(CharSequence sequence, int start) {
    309           int length = sequence.length();
    310           Preconditions.checkPositionIndex(start, length);
    311           return -1;
    312         }
    313 
    314         @Override public int lastIndexIn(CharSequence sequence) {
    315           checkNotNull(sequence);
    316           return -1;
    317         }
    318 
    319         @Override public boolean matchesAllOf(CharSequence sequence) {
    320           return sequence.length() == 0;
    321         }
    322 
    323         @Override public boolean matchesNoneOf(CharSequence sequence) {
    324           checkNotNull(sequence);
    325           return true;
    326         }
    327 
    328         @Override public String removeFrom(CharSequence sequence) {
    329           return sequence.toString();
    330         }
    331 
    332         @Override public String replaceFrom(CharSequence sequence, char replacement) {
    333           return sequence.toString();
    334         }
    335 
    336         @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
    337           checkNotNull(replacement);
    338           return sequence.toString();
    339         }
    340 
    341         @Override public String collapseFrom(CharSequence sequence, char replacement) {
    342           return sequence.toString();
    343         }
    344 
    345         @Override public String trimFrom(CharSequence sequence) {
    346           return sequence.toString();
    347         }
    348 
    349         @Override public int countIn(CharSequence sequence) {
    350           checkNotNull(sequence);
    351           return 0;
    352         }
    353 
    354         @Override public CharMatcher and(CharMatcher other) {
    355           checkNotNull(other);
    356           return this;
    357         }
    358 
    359         @Override public CharMatcher or(CharMatcher other) {
    360           return checkNotNull(other);
    361         }
    362 
    363         @Override public CharMatcher negate() {
    364           return ANY;
    365         }
    366 
    367         @Override void setBits(LookupTable table) {}
    368 
    369         @Override public CharMatcher precomputed() {
    370           return this;
    371         }
    372       };
    373 
    374   // Static factories
    375 
    376   /**
    377    * Returns a {@code char} matcher that matches only one specified character.
    378    */
    379   public static CharMatcher is(final char match) {
    380     return new CharMatcher() {
    381       @Override public boolean matches(char c) {
    382         return c == match;
    383       }
    384 
    385       @Override public String replaceFrom(CharSequence sequence, char replacement) {
    386         return sequence.toString().replace(match, replacement);
    387       }
    388 
    389       @Override public CharMatcher and(CharMatcher other) {
    390         return other.matches(match) ? this : NONE;
    391       }
    392 
    393       @Override public CharMatcher or(CharMatcher other) {
    394         return other.matches(match) ? other : super.or(other);
    395       }
    396 
    397       @Override public CharMatcher negate() {
    398         return isNot(match);
    399       }
    400 
    401       @Override void setBits(LookupTable table) {
    402         table.set(match);
    403       }
    404 
    405       @Override public CharMatcher precomputed() {
    406         return this;
    407       }
    408     };
    409   }
    410 
    411   /**
    412    * Returns a {@code char} matcher that matches any character except the one specified.
    413    *
    414    * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
    415    */
    416   public static CharMatcher isNot(final char match) {
    417     return new CharMatcher() {
    418       @Override public boolean matches(char c) {
    419         return c != match;
    420       }
    421 
    422       @Override public CharMatcher and(CharMatcher other) {
    423         return other.matches(match) ? super.and(other) : other;
    424       }
    425 
    426       @Override public CharMatcher or(CharMatcher other) {
    427         return other.matches(match) ? ANY : this;
    428       }
    429 
    430       @Override public CharMatcher negate() {
    431         return is(match);
    432       }
    433     };
    434   }
    435 
    436   /**
    437    * Returns a {@code char} matcher that matches any character present in the given character
    438    * sequence.
    439    */
    440   public static CharMatcher anyOf(final CharSequence sequence) {
    441     switch (sequence.length()) {
    442       case 0:
    443         return NONE;
    444       case 1:
    445         return is(sequence.charAt(0));
    446       case 2:
    447         final char match1 = sequence.charAt(0);
    448         final char match2 = sequence.charAt(1);
    449         return new CharMatcher() {
    450           @Override public boolean matches(char c) {
    451             return c == match1 || c == match2;
    452           }
    453 
    454           @Override void setBits(LookupTable table) {
    455             table.set(match1);
    456             table.set(match2);
    457           }
    458 
    459           @Override public CharMatcher precomputed() {
    460             return this;
    461           }
    462         };
    463     }
    464 
    465     final char[] chars = sequence.toString().toCharArray();
    466     Arrays.sort(chars); // not worth collapsing duplicates
    467 
    468     return new CharMatcher() {
    469       @Override public boolean matches(char c) {
    470         return Arrays.binarySearch(chars, c) >= 0;
    471       }
    472 
    473       @Override void setBits(LookupTable table) {
    474         for (char c : chars) {
    475           table.set(c);
    476         }
    477       }
    478     };
    479   }
    480 
    481   /**
    482    * Returns a {@code char} matcher that matches any character not present in the given character
    483    * sequence.
    484    */
    485   public static CharMatcher noneOf(CharSequence sequence) {
    486     return anyOf(sequence).negate();
    487   }
    488 
    489   /**
    490    * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
    491    * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
    492    * CharMatcher.inRange('a', 'z')}.
    493    *
    494    * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
    495    */
    496   public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
    497     checkArgument(endInclusive >= startInclusive);
    498     return new CharMatcher() {
    499       @Override public boolean matches(char c) {
    500         return startInclusive <= c && c <= endInclusive;
    501       }
    502 
    503       @Override void setBits(LookupTable table) {
    504         char c = startInclusive;
    505         while (true) {
    506           table.set(c);
    507           if (c++ == endInclusive) {
    508             break;
    509           }
    510         }
    511       }
    512 
    513       @Override public CharMatcher precomputed() {
    514         return this;
    515       }
    516     };
    517   }
    518 
    519   /**
    520    * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
    521    * which operates on primitive {@code char} instances instead.
    522    */
    523   public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
    524     checkNotNull(predicate);
    525     if (predicate instanceof CharMatcher) {
    526       return (CharMatcher) predicate;
    527     }
    528     return new CharMatcher() {
    529       @Override public boolean matches(char c) {
    530         return predicate.apply(c);
    531       }
    532 
    533       @Override public boolean apply(Character character) {
    534         return predicate.apply(checkNotNull(character));
    535       }
    536     };
    537   }
    538 
    539   // Constructors
    540 
    541   /**
    542    * Constructor for use by subclasses.
    543    */
    544   protected CharMatcher() {}
    545 
    546   // Abstract methods
    547 
    548   /** Determines a true or false value for the given character. */
    549   public abstract boolean matches(char c);
    550 
    551   // Non-static factories
    552 
    553   /**
    554    * Returns a matcher that matches any character not matched by this matcher.
    555    */
    556   public CharMatcher negate() {
    557     final CharMatcher original = this;
    558     return new CharMatcher() {
    559       @Override public boolean matches(char c) {
    560         return !original.matches(c);
    561       }
    562 
    563       @Override public boolean matchesAllOf(CharSequence sequence) {
    564         return original.matchesNoneOf(sequence);
    565       }
    566 
    567       @Override public boolean matchesNoneOf(CharSequence sequence) {
    568         return original.matchesAllOf(sequence);
    569       }
    570 
    571       @Override public int countIn(CharSequence sequence) {
    572         return sequence.length() - original.countIn(sequence);
    573       }
    574 
    575       @Override public CharMatcher negate() {
    576         return original;
    577       }
    578     };
    579   }
    580 
    581   /**
    582    * Returns a matcher that matches any character matched by both this matcher and {@code other}.
    583    */
    584   public CharMatcher and(CharMatcher other) {
    585     return new And(Arrays.asList(this, checkNotNull(other)));
    586   }
    587 
    588   private static class And extends CharMatcher {
    589     List<CharMatcher> components;
    590 
    591     And(List<CharMatcher> components) {
    592       this.components = components; // Skip defensive copy (private)
    593     }
    594 
    595     @Override public boolean matches(char c) {
    596       for (CharMatcher matcher : components) {
    597         if (!matcher.matches(c)) {
    598           return false;
    599         }
    600       }
    601       return true;
    602     }
    603 
    604     @Override public CharMatcher and(CharMatcher other) {
    605       List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
    606       newComponents.add(checkNotNull(other));
    607       return new And(newComponents);
    608     }
    609   }
    610 
    611   /**
    612    * Returns a matcher that matches any character matched by either this matcher or {@code other}.
    613    */
    614   public CharMatcher or(CharMatcher other) {
    615     return new Or(Arrays.asList(this, checkNotNull(other)));
    616   }
    617 
    618   private static class Or extends CharMatcher {
    619     List<CharMatcher> components;
    620 
    621     Or(List<CharMatcher> components) {
    622       this.components = components; // Skip defensive copy (private)
    623     }
    624 
    625     @Override public boolean matches(char c) {
    626       for (CharMatcher matcher : components) {
    627         if (matcher.matches(c)) {
    628           return true;
    629         }
    630       }
    631       return false;
    632     }
    633 
    634     @Override public CharMatcher or(CharMatcher other) {
    635       List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
    636       newComponents.add(checkNotNull(other));
    637       return new Or(newComponents);
    638     }
    639 
    640     @Override void setBits(LookupTable table) {
    641       for (CharMatcher matcher : components) {
    642         matcher.setBits(table);
    643       }
    644     }
    645   }
    646 
    647   /**
    648    * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
    649    * query than the original; your mileage may vary. Precomputation takes time and is likely to be
    650    * worthwhile only if the precomputed matcher is queried many thousands of times.
    651    *
    652    * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
    653    * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
    654    * worthwhile tradeoff in a browser.
    655    */
    656   public CharMatcher precomputed() {
    657     return Platform.precomputeCharMatcher(this);
    658   }
    659 
    660   /**
    661    * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
    662    * on {@link Platform} so that we can have different behavior in GWT.
    663    *
    664    * <p>The default precomputation is to cache the configuration of the original matcher in an
    665    * eight-kilobyte bit array. In some situations this produces a matcher which is faster to query
    666    * than the original.
    667    *
    668    * <p>The default implementation creates a new bit array and passes it to {@link
    669    * #setBits(LookupTable)}.
    670    */
    671   CharMatcher precomputedInternal() {
    672     final LookupTable table = new LookupTable();
    673     setBits(table);
    674 
    675     return new CharMatcher() {
    676       @Override public boolean matches(char c) {
    677         return table.get(c);
    678       }
    679 
    680       // TODO(kevinb): make methods like negate() smart?
    681 
    682       @Override public CharMatcher precomputed() {
    683         return this;
    684       }
    685     };
    686   }
    687 
    688   /**
    689    * For use by implementors; sets the bit corresponding to each character ('\0' to '{@literal
    690    * \}uFFFF') that matches this matcher in the given bit array, leaving all other bits untouched.
    691    *
    692    * <p>The default implementation loops over every possible character value, invoking {@link
    693    * #matches} for each one.
    694    */
    695   void setBits(LookupTable table) {
    696     char c = Character.MIN_VALUE;
    697     while (true) {
    698       if (matches(c)) {
    699         table.set(c);
    700       }
    701       if (c++ == Character.MAX_VALUE) {
    702         break;
    703       }
    704     }
    705   }
    706 
    707   /**
    708    * A bit array with one bit per {@code char} value, used by {@link CharMatcher#precomputed}.
    709    *
    710    * <p>TODO(kevinb): possibly share a common BitArray class with BloomFilter and others... a
    711    * simpler java.util.BitSet.
    712    */
    713   private static final class LookupTable {
    714     int[] data = new int[2048];
    715 
    716     void set(char index) {
    717       data[index >> 5] |= (1 << index);
    718     }
    719 
    720     boolean get(char index) {
    721       return (data[index >> 5] & (1 << index)) != 0;
    722     }
    723   }
    724 
    725   // Text processing routines
    726 
    727   /**
    728    * Returns {@code true} if a character sequence contains at least one matching character.
    729    * Equivalent to {@code !matchesNoneOf(sequence)}.
    730    *
    731    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
    732    * character, until this returns {@code true} or the end is reached.
    733    *
    734    * @param sequence the character sequence to examine, possibly empty
    735    * @return {@code true} if this matcher matches at least one character in the sequence
    736    * @since 8.0
    737    */
    738   public boolean matchesAnyOf(CharSequence sequence) {
    739     return !matchesNoneOf(sequence);
    740   }
    741 
    742   /**
    743    * Returns {@code true} if a character sequence contains only matching characters.
    744    *
    745    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
    746    * character, until this returns {@code false} or the end is reached.
    747    *
    748    * @param sequence the character sequence to examine, possibly empty
    749    * @return {@code true} if this matcher matches every character in the sequence, including when
    750    *         the sequence is empty
    751    */
    752   public boolean matchesAllOf(CharSequence sequence) {
    753     for (int i = sequence.length() - 1; i >= 0; i--) {
    754       if (!matches(sequence.charAt(i))) {
    755         return false;
    756       }
    757     }
    758     return true;
    759   }
    760 
    761   /**
    762    * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
    763    * {@code !matchesAnyOf(sequence)}.
    764    *
    765    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
    766    * character, until this returns {@code false} or the end is reached.
    767    *
    768    * @param sequence the character sequence to examine, possibly empty
    769    * @return {@code true} if this matcher matches every character in the sequence, including when
    770    *         the sequence is empty
    771    */
    772   public boolean matchesNoneOf(CharSequence sequence) {
    773     return indexIn(sequence) == -1;
    774   }
    775 
    776   // TODO(kevinb): add matchesAnyOf()
    777 
    778   /**
    779    * Returns the index of the first matching character in a character sequence, or {@code -1} if no
    780    * matching character is present.
    781    *
    782    * <p>The default implementation iterates over the sequence in forward order calling {@link
    783    * #matches} for each character.
    784    *
    785    * @param sequence the character sequence to examine from the beginning
    786    * @return an index, or {@code -1} if no character matches
    787    */
    788   public int indexIn(CharSequence sequence) {
    789     int length = sequence.length();
    790     for (int i = 0; i < length; i++) {
    791       if (matches(sequence.charAt(i))) {
    792         return i;
    793       }
    794     }
    795     return -1;
    796   }
    797 
    798   /**
    799    * Returns the index of the first matching character in a character sequence, starting from a
    800    * given position, or {@code -1} if no character matches after that position.
    801    *
    802    * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
    803    * start}, calling {@link #matches} for each character.
    804    *
    805    * @param sequence the character sequence to examine
    806    * @param start the first index to examine; must be nonnegative and no greater than {@code
    807    *        sequence.length()}
    808    * @return the index of the first matching character, guaranteed to be no less than {@code start},
    809    *         or {@code -1} if no character matches
    810    * @throws IndexOutOfBoundsException if start is negative or greater than {@code
    811    *         sequence.length()}
    812    */
    813   public int indexIn(CharSequence sequence, int start) {
    814     int length = sequence.length();
    815     Preconditions.checkPositionIndex(start, length);
    816     for (int i = start; i < length; i++) {
    817       if (matches(sequence.charAt(i))) {
    818         return i;
    819       }
    820     }
    821     return -1;
    822   }
    823 
    824   /**
    825    * Returns the index of the last matching character in a character sequence, or {@code -1} if no
    826    * matching character is present.
    827    *
    828    * <p>The default implementation iterates over the sequence in reverse order calling {@link
    829    * #matches} for each character.
    830    *
    831    * @param sequence the character sequence to examine from the end
    832    * @return an index, or {@code -1} if no character matches
    833    */
    834   public int lastIndexIn(CharSequence sequence) {
    835     for (int i = sequence.length() - 1; i >= 0; i--) {
    836       if (matches(sequence.charAt(i))) {
    837         return i;
    838       }
    839     }
    840     return -1;
    841   }
    842 
    843   /**
    844    * Returns the number of matching characters found in a character sequence.
    845    */
    846   public int countIn(CharSequence sequence) {
    847     int count = 0;
    848     for (int i = 0; i < sequence.length(); i++) {
    849       if (matches(sequence.charAt(i))) {
    850         count++;
    851       }
    852     }
    853     return count;
    854   }
    855 
    856   /**
    857    * Returns a string containing all non-matching characters of a character sequence, in order. For
    858    * example: <pre>   {@code
    859    *
    860    *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
    861    *
    862    * ... returns {@code "bzr"}.
    863    */
    864   @CheckReturnValue
    865   public String removeFrom(CharSequence sequence) {
    866     String string = sequence.toString();
    867     int pos = indexIn(string);
    868     if (pos == -1) {
    869       return string;
    870     }
    871 
    872     char[] chars = string.toCharArray();
    873     int spread = 1;
    874 
    875     // This unusual loop comes from extensive benchmarking
    876     OUT: while (true) {
    877       pos++;
    878       while (true) {
    879         if (pos == chars.length) {
    880           break OUT;
    881         }
    882         if (matches(chars[pos])) {
    883           break;
    884         }
    885         chars[pos - spread] = chars[pos];
    886         pos++;
    887       }
    888       spread++;
    889     }
    890     return new String(chars, 0, pos - spread);
    891   }
    892 
    893   /**
    894    * Returns a string containing all matching characters of a character sequence, in order. For
    895    * example: <pre>   {@code
    896    *
    897    *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
    898    *
    899    * ... returns {@code "aaa"}.
    900    */
    901   @CheckReturnValue
    902   public String retainFrom(CharSequence sequence) {
    903     return negate().removeFrom(sequence);
    904   }
    905 
    906   /**
    907    * Returns a string copy of the input character sequence, with each character that matches this
    908    * matcher replaced by a given replacement character. For example: <pre>   {@code
    909    *
    910    *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
    911    *
    912    * ... returns {@code "rodor"}.
    913    *
    914    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
    915    * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
    916    * character.
    917    *
    918    * @param sequence the character sequence to replace matching characters in
    919    * @param replacement the character to append to the result string in place of each matching
    920    *        character in {@code sequence}
    921    * @return the new string
    922    */
    923   @CheckReturnValue
    924   public String replaceFrom(CharSequence sequence, char replacement) {
    925     String string = sequence.toString();
    926     int pos = indexIn(string);
    927     if (pos == -1) {
    928       return string;
    929     }
    930     char[] chars = string.toCharArray();
    931     chars[pos] = replacement;
    932     for (int i = pos + 1; i < chars.length; i++) {
    933       if (matches(chars[i])) {
    934         chars[i] = replacement;
    935       }
    936     }
    937     return new String(chars);
    938   }
    939 
    940   /**
    941    * Returns a string copy of the input character sequence, with each character that matches this
    942    * matcher replaced by a given replacement sequence. For example: <pre>   {@code
    943    *
    944    *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
    945    *
    946    * ... returns {@code "yoohoo"}.
    947    *
    948    * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
    949    * off calling {@link #replaceFrom(CharSequence, char)} directly.
    950    *
    951    * @param sequence the character sequence to replace matching characters in
    952    * @param replacement the characters to append to the result string in place of each matching
    953    *        character in {@code sequence}
    954    * @return the new string
    955    */
    956   @CheckReturnValue
    957   public String replaceFrom(CharSequence sequence, CharSequence replacement) {
    958     int replacementLen = replacement.length();
    959     if (replacementLen == 0) {
    960       return removeFrom(sequence);
    961     }
    962     if (replacementLen == 1) {
    963       return replaceFrom(sequence, replacement.charAt(0));
    964     }
    965 
    966     String string = sequence.toString();
    967     int pos = indexIn(string);
    968     if (pos == -1) {
    969       return string;
    970     }
    971 
    972     int len = string.length();
    973     StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
    974 
    975     int oldpos = 0;
    976     do {
    977       buf.append(string, oldpos, pos);
    978       buf.append(replacement);
    979       oldpos = pos + 1;
    980       pos = indexIn(string, oldpos);
    981     } while (pos != -1);
    982 
    983     buf.append(string, oldpos, len);
    984     return buf.toString();
    985   }
    986 
    987   /**
    988    * Returns a substring of the input character sequence that omits all characters this matcher
    989    * matches from the beginning and from the end of the string. For example: <pre>   {@code
    990    *
    991    *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
    992    *
    993    * ... returns {@code "cat"}.
    994    *
    995    * <p>Note that: <pre>   {@code
    996    *
    997    *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
    998    *
    999    * ... is equivalent to {@link String#trim()}.
   1000    */
   1001   @CheckReturnValue
   1002   public String trimFrom(CharSequence sequence) {
   1003     int len = sequence.length();
   1004     int first;
   1005     int last;
   1006 
   1007     for (first = 0; first < len; first++) {
   1008       if (!matches(sequence.charAt(first))) {
   1009         break;
   1010       }
   1011     }
   1012     for (last = len - 1; last > first; last--) {
   1013       if (!matches(sequence.charAt(last))) {
   1014         break;
   1015       }
   1016     }
   1017 
   1018     return sequence.subSequence(first, last + 1).toString();
   1019   }
   1020 
   1021   /**
   1022    * Returns a substring of the input character sequence that omits all characters this matcher
   1023    * matches from the beginning of the string. For example: <pre> {@code
   1024    *
   1025    *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
   1026    *
   1027    * ... returns {@code "catbab"}.
   1028    */
   1029   @CheckReturnValue
   1030   public String trimLeadingFrom(CharSequence sequence) {
   1031     int len = sequence.length();
   1032     int first;
   1033 
   1034     for (first = 0; first < len; first++) {
   1035       if (!matches(sequence.charAt(first))) {
   1036         break;
   1037       }
   1038     }
   1039 
   1040     return sequence.subSequence(first, len).toString();
   1041   }
   1042 
   1043   /**
   1044    * Returns a substring of the input character sequence that omits all characters this matcher
   1045    * matches from the end of the string. For example: <pre> {@code
   1046    *
   1047    *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
   1048    *
   1049    * ... returns {@code "abacat"}.
   1050    */
   1051   @CheckReturnValue
   1052   public String trimTrailingFrom(CharSequence sequence) {
   1053     int len = sequence.length();
   1054     int last;
   1055 
   1056     for (last = len - 1; last >= 0; last--) {
   1057       if (!matches(sequence.charAt(last))) {
   1058         break;
   1059       }
   1060     }
   1061 
   1062     return sequence.subSequence(0, last + 1).toString();
   1063   }
   1064 
   1065   /**
   1066    * Returns a string copy of the input character sequence, with each group of consecutive
   1067    * characters that match this matcher replaced by a single replacement character. For example:
   1068    * <pre>   {@code
   1069    *
   1070    *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
   1071    *
   1072    * ... returns {@code "b-p-r"}.
   1073    *
   1074    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
   1075    * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
   1076    * character.
   1077    *
   1078    * @param sequence the character sequence to replace matching groups of characters in
   1079    * @param replacement the character to append to the result string in place of each group of
   1080    *        matching characters in {@code sequence}
   1081    * @return the new string
   1082    */
   1083   @CheckReturnValue
   1084   public String collapseFrom(CharSequence sequence, char replacement) {
   1085     int first = indexIn(sequence);
   1086     if (first == -1) {
   1087       return sequence.toString();
   1088     }
   1089 
   1090     // TODO(kevinb): see if this implementation can be made faster
   1091     StringBuilder builder = new StringBuilder(sequence.length())
   1092         .append(sequence.subSequence(0, first))
   1093         .append(replacement);
   1094     boolean in = true;
   1095     for (int i = first + 1; i < sequence.length(); i++) {
   1096       char c = sequence.charAt(i);
   1097       if (apply(c)) {
   1098         if (!in) {
   1099           builder.append(replacement);
   1100           in = true;
   1101         }
   1102       } else {
   1103         builder.append(c);
   1104         in = false;
   1105       }
   1106     }
   1107     return builder.toString();
   1108   }
   1109 
   1110   /**
   1111    * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
   1112    * groups of matching characters at the start or end of the sequence are removed without
   1113    * replacement.
   1114    */
   1115   @CheckReturnValue
   1116   public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
   1117     int first = negate().indexIn(sequence);
   1118     if (first == -1) {
   1119       return ""; // everything matches. nothing's left.
   1120     }
   1121     StringBuilder builder = new StringBuilder(sequence.length());
   1122     boolean inMatchingGroup = false;
   1123     for (int i = first; i < sequence.length(); i++) {
   1124       char c = sequence.charAt(i);
   1125       if (apply(c)) {
   1126         inMatchingGroup = true;
   1127       } else {
   1128         if (inMatchingGroup) {
   1129           builder.append(replacement);
   1130           inMatchingGroup = false;
   1131         }
   1132         builder.append(c);
   1133       }
   1134     }
   1135     return builder.toString();
   1136   }
   1137 
   1138   // Predicate interface
   1139 
   1140   /**
   1141    * Returns {@code true} if this matcher matches the given character.
   1142    *
   1143    * @throws NullPointerException if {@code character} is null
   1144    */
   1145   @Override public boolean apply(Character character) {
   1146     return matches(character);
   1147   }
   1148 }
   1149