Home | History | Annotate | Download | only in strings
      1 // Copyright 2015 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/pattern.h"
      6 
      7 #include "base/third_party/icu/icu_utf.h"
      8 
      9 namespace base {
     10 
     11 namespace {
     12 
     13 static bool IsWildcard(base_icu::UChar32 character) {
     14   return character == '*' || character == '?';
     15 }
     16 
     17 // Move the strings pointers to the point where they start to differ.
     18 template <typename CHAR, typename NEXT>
     19 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
     20                          const CHAR** string, const CHAR* string_end,
     21                          NEXT next) {
     22   const CHAR* escape = NULL;
     23   while (*pattern != pattern_end && *string != string_end) {
     24     if (!escape && IsWildcard(**pattern)) {
     25       // We don't want to match wildcard here, except if it's escaped.
     26       return;
     27     }
     28 
     29     // Check if the escapement char is found. If so, skip it and move to the
     30     // next character.
     31     if (!escape && **pattern == '\\') {
     32       escape = *pattern;
     33       next(pattern, pattern_end);
     34       continue;
     35     }
     36 
     37     // Check if the chars match, if so, increment the ptrs.
     38     const CHAR* pattern_next = *pattern;
     39     const CHAR* string_next = *string;
     40     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
     41     if (pattern_char == next(&string_next, string_end) &&
     42         pattern_char != CBU_SENTINEL) {
     43       *pattern = pattern_next;
     44       *string = string_next;
     45     } else {
     46       // Uh oh, it did not match, we are done. If the last char was an
     47       // escapement, that means that it was an error to advance the ptr here,
     48       // let's put it back where it was. This also mean that the MatchPattern
     49       // function will return false because if we can't match an escape char
     50       // here, then no one will.
     51       if (escape) {
     52         *pattern = escape;
     53       }
     54       return;
     55     }
     56 
     57     escape = NULL;
     58   }
     59 }
     60 
     61 template <typename CHAR, typename NEXT>
     62 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
     63   while (*pattern != end) {
     64     if (!IsWildcard(**pattern))
     65       return;
     66     next(pattern, end);
     67   }
     68 }
     69 
     70 template <typename CHAR, typename NEXT>
     71 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
     72                           const CHAR* pattern, const CHAR* pattern_end,
     73                           int depth,
     74                           NEXT next) {
     75   const int kMaxDepth = 16;
     76   if (depth > kMaxDepth)
     77     return false;
     78 
     79   // Eat all the matching chars.
     80   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
     81 
     82   // If the string is empty, then the pattern must be empty too, or contains
     83   // only wildcards.
     84   if (eval == eval_end) {
     85     EatWildcard(&pattern, pattern_end, next);
     86     return pattern == pattern_end;
     87   }
     88 
     89   // Pattern is empty but not string, this is not a match.
     90   if (pattern == pattern_end)
     91     return false;
     92 
     93   // If this is a question mark, then we need to compare the rest with
     94   // the current string or the string with one character eaten.
     95   const CHAR* next_pattern = pattern;
     96   next(&next_pattern, pattern_end);
     97   if (pattern[0] == '?') {
     98     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
     99                       depth + 1, next))
    100       return true;
    101     const CHAR* next_eval = eval;
    102     next(&next_eval, eval_end);
    103     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
    104                       depth + 1, next))
    105       return true;
    106   }
    107 
    108   // This is a *, try to match all the possible substrings with the remainder
    109   // of the pattern.
    110   if (pattern[0] == '*') {
    111     // Collapse duplicate wild cards (********** into *) so that the
    112     // method does not recurse unnecessarily. http://crbug.com/52839
    113     EatWildcard(&next_pattern, pattern_end, next);
    114 
    115     while (eval != eval_end) {
    116       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
    117                         depth + 1, next))
    118         return true;
    119       eval++;
    120     }
    121 
    122     // We reached the end of the string, let see if the pattern contains only
    123     // wildcards.
    124     if (eval == eval_end) {
    125       EatWildcard(&pattern, pattern_end, next);
    126       if (pattern != pattern_end)
    127         return false;
    128       return true;
    129     }
    130   }
    131 
    132   return false;
    133 }
    134 
    135 struct NextCharUTF8 {
    136   base_icu::UChar32 operator()(const char** p, const char* end) {
    137     base_icu::UChar32 c;
    138     int offset = 0;
    139     CBU8_NEXT(*p, offset, end - *p, c);
    140     *p += offset;
    141     return c;
    142   }
    143 };
    144 
    145 struct NextCharUTF16 {
    146   base_icu::UChar32 operator()(const char16** p, const char16* end) {
    147     base_icu::UChar32 c;
    148     int offset = 0;
    149     CBU16_NEXT(*p, offset, end - *p, c);
    150     *p += offset;
    151     return c;
    152   }
    153 };
    154 
    155 }  // namespace
    156 
    157 bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {
    158   return MatchPatternT(eval.data(), eval.data() + eval.size(),
    159                        pattern.data(), pattern.data() + pattern.size(),
    160                        0, NextCharUTF8());
    161 }
    162 
    163 bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) {
    164   return MatchPatternT(eval.data(), eval.data() + eval.size(),
    165                        pattern.data(), pattern.data() + pattern.size(),
    166                        0, NextCharUTF16());
    167 }
    168 
    169 }  // namespace base
    170