Home | History | Annotate | Download | only in os
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.os;
     18 
     19 import android.util.proto.ProtoOutputStream;
     20 
     21 import java.util.Arrays;
     22 
     23 /**
     24  * A simple pattern matcher, which is safe to use on untrusted data: it does
     25  * not provide full reg-exp support, only simple globbing that can not be
     26  * used maliciously.
     27  */
     28 public class PatternMatcher implements Parcelable {
     29     /**
     30      * Pattern type: the given pattern must exactly match the string it is
     31      * tested against.
     32      */
     33     public static final int PATTERN_LITERAL = 0;
     34 
     35     /**
     36      * Pattern type: the given pattern must match the
     37      * beginning of the string it is tested against.
     38      */
     39     public static final int PATTERN_PREFIX = 1;
     40 
     41     /**
     42      * Pattern type: the given pattern is interpreted with a
     43      * simple glob syntax for matching against the string it is tested against.
     44      * In this syntax, you can use the '*' character to match against zero or
     45      * more occurrences of the character immediately before.  If the
     46      * character before it is '.' it will match any character.  The character
     47      * '\' can be used as an escape.  This essentially provides only the '*'
     48      * wildcard part of a normal regexp.
     49      */
     50     public static final int PATTERN_SIMPLE_GLOB = 2;
     51 
     52     /**
     53      * Pattern type: the given pattern is interpreted with a regular
     54      * expression-like syntax for matching against the string it is tested
     55      * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
     56      * with full support for character ranges and the not ({@code ^}) modifier.
     57      * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
     58      * for one-or-more and full range ({@code {...}}) support. This is a simple
     59      * evaluation implementation in which matching is done against the pattern in
     60      * real time with no backtracking support.
     61      */
     62     public static final int PATTERN_ADVANCED_GLOB = 3;
     63 
     64     // token types for advanced matching
     65     private static final int TOKEN_TYPE_LITERAL = 0;
     66     private static final int TOKEN_TYPE_ANY = 1;
     67     private static final int TOKEN_TYPE_SET = 2;
     68     private static final int TOKEN_TYPE_INVERSE_SET = 3;
     69 
     70     // Return for no match
     71     private static final int NO_MATCH = -1;
     72 
     73     private static final String TAG = "PatternMatcher";
     74 
     75     // Parsed placeholders for advanced patterns
     76     private static final int PARSED_TOKEN_CHAR_SET_START = -1;
     77     private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
     78     private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
     79     private static final int PARSED_TOKEN_CHAR_ANY = -4;
     80     private static final int PARSED_MODIFIER_RANGE_START = -5;
     81     private static final int PARSED_MODIFIER_RANGE_STOP = -6;
     82     private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
     83     private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
     84 
     85     private final String mPattern;
     86     private final int mType;
     87     private final int[] mParsedPattern;
     88 
     89 
     90     private static final int MAX_PATTERN_STORAGE = 2048;
     91     // workspace to use for building a parsed advanced pattern;
     92     private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
     93 
     94     public PatternMatcher(String pattern, int type) {
     95         mPattern = pattern;
     96         mType = type;
     97         if (mType == PATTERN_ADVANCED_GLOB) {
     98             mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
     99         } else {
    100             mParsedPattern = null;
    101         }
    102     }
    103 
    104     public final String getPath() {
    105         return mPattern;
    106     }
    107 
    108     public final int getType() {
    109         return mType;
    110     }
    111 
    112     public boolean match(String str) {
    113         return matchPattern(str, mPattern, mParsedPattern, mType);
    114     }
    115 
    116     public String toString() {
    117         String type = "? ";
    118         switch (mType) {
    119             case PATTERN_LITERAL:
    120                 type = "LITERAL: ";
    121                 break;
    122             case PATTERN_PREFIX:
    123                 type = "PREFIX: ";
    124                 break;
    125             case PATTERN_SIMPLE_GLOB:
    126                 type = "GLOB: ";
    127                 break;
    128             case PATTERN_ADVANCED_GLOB:
    129                 type = "ADVANCED: ";
    130                 break;
    131         }
    132         return "PatternMatcher{" + type + mPattern + "}";
    133     }
    134 
    135     /** @hide */
    136     public void writeToProto(ProtoOutputStream proto, long fieldId) {
    137         long token = proto.start(fieldId);
    138         proto.write(PatternMatcherProto.PATTERN, mPattern);
    139         proto.write(PatternMatcherProto.TYPE, mType);
    140         // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to
    141         // match the current data structure.
    142         proto.end(token);
    143     }
    144 
    145     public int describeContents() {
    146         return 0;
    147     }
    148 
    149     public void writeToParcel(Parcel dest, int flags) {
    150         dest.writeString(mPattern);
    151         dest.writeInt(mType);
    152         dest.writeIntArray(mParsedPattern);
    153     }
    154 
    155     public PatternMatcher(Parcel src) {
    156         mPattern = src.readString();
    157         mType = src.readInt();
    158         mParsedPattern = src.createIntArray();
    159     }
    160 
    161     public static final Parcelable.Creator<PatternMatcher> CREATOR
    162             = new Parcelable.Creator<PatternMatcher>() {
    163         public PatternMatcher createFromParcel(Parcel source) {
    164             return new PatternMatcher(source);
    165         }
    166 
    167         public PatternMatcher[] newArray(int size) {
    168             return new PatternMatcher[size];
    169         }
    170     };
    171 
    172     static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
    173         if (match == null) return false;
    174         if (type == PATTERN_LITERAL) {
    175             return pattern.equals(match);
    176         } if (type == PATTERN_PREFIX) {
    177             return match.startsWith(pattern);
    178         } else if (type == PATTERN_SIMPLE_GLOB) {
    179             return matchGlobPattern(pattern, match);
    180         } else if (type == PATTERN_ADVANCED_GLOB) {
    181             return matchAdvancedPattern(parsedPattern, match);
    182         }
    183         return false;
    184     }
    185 
    186     static boolean matchGlobPattern(String pattern, String match) {
    187         final int NP = pattern.length();
    188         if (NP <= 0) {
    189             return match.length() <= 0;
    190         }
    191         final int NM = match.length();
    192         int ip = 0, im = 0;
    193         char nextChar = pattern.charAt(0);
    194         while ((ip<NP) && (im<NM)) {
    195             char c = nextChar;
    196             ip++;
    197             nextChar = ip < NP ? pattern.charAt(ip) : 0;
    198             final boolean escaped = (c == '\\');
    199             if (escaped) {
    200                 c = nextChar;
    201                 ip++;
    202                 nextChar = ip < NP ? pattern.charAt(ip) : 0;
    203             }
    204             if (nextChar == '*') {
    205                 if (!escaped && c == '.') {
    206                     if (ip >= (NP-1)) {
    207                         // at the end with a pattern match, so
    208                         // all is good without checking!
    209                         return true;
    210                     }
    211                     ip++;
    212                     nextChar = pattern.charAt(ip);
    213                     // Consume everything until the next character in the
    214                     // pattern is found.
    215                     if (nextChar == '\\') {
    216                         ip++;
    217                         nextChar = ip < NP ? pattern.charAt(ip) : 0;
    218                     }
    219                     do {
    220                         if (match.charAt(im) == nextChar) {
    221                             break;
    222                         }
    223                         im++;
    224                     } while (im < NM);
    225                     if (im == NM) {
    226                         // Whoops, the next character in the pattern didn't
    227                         // exist in the match.
    228                         return false;
    229                     }
    230                     ip++;
    231                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
    232                     im++;
    233                 } else {
    234                     // Consume only characters matching the one before '*'.
    235                     do {
    236                         if (match.charAt(im) != c) {
    237                             break;
    238                         }
    239                         im++;
    240                     } while (im < NM);
    241                     ip++;
    242                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
    243                 }
    244             } else {
    245                 if (c != '.' && match.charAt(im) != c) return false;
    246                 im++;
    247             }
    248         }
    249 
    250         if (ip >= NP && im >= NM) {
    251             // Reached the end of both strings, all is good!
    252             return true;
    253         }
    254 
    255         // One last check: we may have finished the match string, but still
    256         // have a '.*' at the end of the pattern, which should still count
    257         // as a match.
    258         if (ip == NP-2 && pattern.charAt(ip) == '.'
    259             && pattern.charAt(ip+1) == '*') {
    260             return true;
    261         }
    262 
    263         return false;
    264     }
    265 
    266     /**
    267      * Parses the advanced pattern and returns an integer array representation of it. The integer
    268      * array treats each field as a character if positive and a unique token placeholder if
    269      * negative. This method will throw on any pattern structure violations.
    270      */
    271     synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
    272         int ip = 0;
    273         final int LP = pattern.length();
    274 
    275         int it = 0;
    276 
    277         boolean inSet = false;
    278         boolean inRange = false;
    279         boolean inCharClass = false;
    280 
    281         boolean addToParsedPattern;
    282 
    283         while (ip < LP) {
    284             if (it > MAX_PATTERN_STORAGE - 3) {
    285                 throw new IllegalArgumentException("Pattern is too large!");
    286             }
    287 
    288             char c = pattern.charAt(ip);
    289             addToParsedPattern = false;
    290 
    291             switch (c) {
    292                 case '[':
    293                     if (inSet) {
    294                         addToParsedPattern = true; // treat as literal or char class in set
    295                     } else {
    296                         if (pattern.charAt(ip + 1) == '^') {
    297                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
    298                             ip++; // skip over the '^'
    299                         } else {
    300                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
    301                         }
    302                         ip++; // move to the next pattern char
    303                         inSet = true;
    304                         continue;
    305                     }
    306                     break;
    307                 case ']':
    308                     if (!inSet) {
    309                         addToParsedPattern = true; // treat as literal outside of set
    310                     } else {
    311                         int parsedToken = sParsedPatternScratch[it - 1];
    312                         if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
    313                             parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
    314                             throw new IllegalArgumentException(
    315                                     "You must define characters in a set.");
    316                         }
    317                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
    318                         inSet = false;
    319                         inCharClass = false;
    320                     }
    321                     break;
    322                 case '{':
    323                     if (!inSet) {
    324                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
    325                             throw new IllegalArgumentException("Modifier must follow a token.");
    326                         }
    327                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
    328                         ip++;
    329                         inRange = true;
    330                     }
    331                     break;
    332                 case '}':
    333                     if (inRange) { // only terminate the range if we're currently in one
    334                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
    335                         inRange = false;
    336                     }
    337                     break;
    338                 case '*':
    339                     if (!inSet) {
    340                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
    341                             throw new IllegalArgumentException("Modifier must follow a token.");
    342                         }
    343                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
    344                     }
    345                     break;
    346                 case '+':
    347                     if (!inSet) {
    348                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
    349                             throw new IllegalArgumentException("Modifier must follow a token.");
    350                         }
    351                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
    352                     }
    353                     break;
    354                 case '.':
    355                     if (!inSet) {
    356                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
    357                     }
    358                     break;
    359                 case '\\': // escape
    360                     if (ip + 1 >= LP) {
    361                         throw new IllegalArgumentException("Escape found at end of pattern!");
    362                     }
    363                     c = pattern.charAt(++ip);
    364                     addToParsedPattern = true;
    365                     break;
    366                 default:
    367                     addToParsedPattern = true;
    368                     break;
    369             }
    370             if (inSet) {
    371                 if (inCharClass) {
    372                     sParsedPatternScratch[it++] = c;
    373                     inCharClass = false;
    374                 } else {
    375                     // look forward for character class
    376                     if (ip + 2 < LP
    377                             && pattern.charAt(ip + 1) == '-'
    378                             && pattern.charAt(ip + 2) != ']') {
    379                         inCharClass = true;
    380                         sParsedPatternScratch[it++] = c; // set first token as lower end of range
    381                         ip++; // advance past dash
    382                     } else { // literal
    383                         sParsedPatternScratch[it++] = c; // set first token as literal
    384                         sParsedPatternScratch[it++] = c; // set second set as literal
    385                     }
    386                 }
    387             } else if (inRange) {
    388                 int endOfSet = pattern.indexOf('}', ip);
    389                 if (endOfSet < 0) {
    390                     throw new IllegalArgumentException("Range not ended with '}'");
    391                 }
    392                 String rangeString = pattern.substring(ip, endOfSet);
    393                 int commaIndex = rangeString.indexOf(',');
    394                 try {
    395                     final int rangeMin;
    396                     final int rangeMax;
    397                     if (commaIndex < 0) {
    398                         int parsedRange = Integer.parseInt(rangeString);
    399                         rangeMin = rangeMax = parsedRange;
    400                     } else {
    401                         rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
    402                         if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
    403                             rangeMax = Integer.MAX_VALUE;
    404                         } else {
    405                             rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
    406                         }
    407                     }
    408                     if (rangeMin > rangeMax) {
    409                         throw new IllegalArgumentException(
    410                             "Range quantifier minimum is greater than maximum");
    411                     }
    412                     sParsedPatternScratch[it++] = rangeMin;
    413                     sParsedPatternScratch[it++] = rangeMax;
    414                 } catch (NumberFormatException e) {
    415                     throw new IllegalArgumentException("Range number format incorrect", e);
    416                 }
    417                 ip = endOfSet;
    418                 continue; // don't increment ip
    419             } else if (addToParsedPattern) {
    420                 sParsedPatternScratch[it++] = c;
    421             }
    422             ip++;
    423         }
    424         if (inSet) {
    425             throw new IllegalArgumentException("Set was not terminated!");
    426         }
    427         return Arrays.copyOf(sParsedPatternScratch, it);
    428     }
    429 
    430     private static boolean isParsedModifier(int parsedChar) {
    431         return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
    432                 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
    433                 parsedChar == PARSED_MODIFIER_RANGE_STOP ||
    434                 parsedChar == PARSED_MODIFIER_RANGE_START;
    435     }
    436 
    437     static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
    438 
    439         // create indexes
    440         int ip = 0, im = 0;
    441 
    442         // one-time length check
    443         final int LP = parsedPattern.length, LM = match.length();
    444 
    445         // The current character being analyzed in the pattern
    446         int patternChar;
    447 
    448         int tokenType;
    449 
    450         int charSetStart = 0, charSetEnd = 0;
    451 
    452         while (ip < LP) { // we still have content in the pattern
    453 
    454             patternChar = parsedPattern[ip];
    455             // get the match type of the next verb
    456 
    457             switch (patternChar) {
    458                 case PARSED_TOKEN_CHAR_ANY:
    459                     tokenType = TOKEN_TYPE_ANY;
    460                     ip++;
    461                     break;
    462                 case PARSED_TOKEN_CHAR_SET_START:
    463                 case PARSED_TOKEN_CHAR_SET_INVERSE_START:
    464                     tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
    465                             ? TOKEN_TYPE_SET
    466                             : TOKEN_TYPE_INVERSE_SET;
    467                     charSetStart = ip + 1; // start from the char after the set start
    468                     while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
    469                     charSetEnd = ip - 1; // we're on the set stop, end is the previous
    470                     ip++; // move the pointer to the next pattern entry
    471                     break;
    472                 default:
    473                     charSetStart = ip;
    474                     tokenType = TOKEN_TYPE_LITERAL;
    475                     ip++;
    476                     break;
    477             }
    478 
    479             final int minRepetition;
    480             final int maxRepetition;
    481 
    482             // look for a match length modifier
    483             if (ip >= LP) {
    484                 minRepetition = maxRepetition = 1;
    485             } else {
    486                 patternChar = parsedPattern[ip];
    487                 switch (patternChar) {
    488                     case PARSED_MODIFIER_ZERO_OR_MORE:
    489                         minRepetition = 0;
    490                         maxRepetition = Integer.MAX_VALUE;
    491                         ip++;
    492                         break;
    493                     case PARSED_MODIFIER_ONE_OR_MORE:
    494                         minRepetition = 1;
    495                         maxRepetition = Integer.MAX_VALUE;
    496                         ip++;
    497                         break;
    498                     case PARSED_MODIFIER_RANGE_START:
    499                         minRepetition = parsedPattern[++ip];
    500                         maxRepetition = parsedPattern[++ip];
    501                         ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
    502                         break;
    503                     default:
    504                         minRepetition = maxRepetition = 1; // implied literal
    505                         break;
    506                 }
    507             }
    508             if (minRepetition > maxRepetition) {
    509                 return false;
    510             }
    511 
    512             // attempt to match as many characters as possible
    513             int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
    514                     parsedPattern, charSetStart, charSetEnd);
    515 
    516             // if we found a conflict, return false immediately
    517             if (matched == NO_MATCH) {
    518                 return false;
    519             }
    520 
    521             // move the match pointer the number of characters matched
    522             im += matched;
    523         }
    524         return ip >= LP && im >= LM; // have parsed entire string and regex
    525     }
    526 
    527     private static int matchChars(String match, int im, final int lm, int tokenType,
    528             int minRepetition, int maxRepetition, int[] parsedPattern,
    529             int tokenStart, int tokenEnd) {
    530         int matched = 0;
    531 
    532         while(matched < maxRepetition
    533                 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
    534                     tokenEnd)) {
    535             matched++;
    536         }
    537 
    538         return matched < minRepetition ? NO_MATCH : matched;
    539     }
    540 
    541     private static boolean matchChar(String match, int im, final int lm, int tokenType,
    542             int[] parsedPattern, int tokenStart, int tokenEnd) {
    543         if (im >= lm) { // we've overrun the string, no match
    544             return false;
    545         }
    546         switch (tokenType) {
    547             case TOKEN_TYPE_ANY:
    548                 return true;
    549             case TOKEN_TYPE_SET:
    550                 for (int i = tokenStart; i < tokenEnd; i += 2) {
    551                     char matchChar = match.charAt(im);
    552                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
    553                         return true;
    554                     }
    555                 }
    556                 return false;
    557             case TOKEN_TYPE_INVERSE_SET:
    558                 for (int i = tokenStart; i < tokenEnd; i += 2) {
    559                     char matchChar = match.charAt(im);
    560                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
    561                         return false;
    562                     }
    563                 }
    564                 return true;
    565             case TOKEN_TYPE_LITERAL:
    566                 return match.charAt(im) == parsedPattern[tokenStart];
    567             default:
    568                 return false;
    569         }
    570     }
    571 }