1 // Copyright 2015 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/pattern.h" 6 7 #include "base/third_party/icu/icu_utf.h" 8 9 namespace base { 10 11 namespace { 12 13 constexpr bool IsWildcard(base_icu::UChar32 character) { 14 return character == '*' || character == '?'; 15 } 16 17 // Searches for the next subpattern of |pattern| in |string|, up to the given 18 // |maximum_distance|. The subpattern extends from the start of |pattern| up to 19 // the first wildcard character (or the end of the string). If the value of 20 // |maximum_distance| is negative, the maximum distance is considered infinite. 21 template <typename CHAR, typename NEXT> 22 constexpr bool SearchForChars(const CHAR** pattern, 23 const CHAR* pattern_end, 24 const CHAR** string, 25 const CHAR* string_end, 26 int maximum_distance, 27 NEXT next) { 28 const CHAR* pattern_start = *pattern; 29 const CHAR* string_start = *string; 30 bool escape = false; 31 while (true) { 32 if (*pattern == pattern_end) { 33 // If this is the end of the pattern, only accept the end of the string; 34 // anything else falls through to the mismatch case. 35 if (*string == string_end) 36 return true; 37 } else { 38 // If we have found a wildcard, we're done. 39 if (!escape && IsWildcard(**pattern)) 40 return true; 41 42 // Check if the escape character is found. If so, skip it and move to the 43 // next character. 44 if (!escape && **pattern == '\\') { 45 escape = true; 46 next(pattern, pattern_end); 47 continue; 48 } 49 50 escape = false; 51 52 if (*string == string_end) 53 return false; 54 55 // Check if the chars match, if so, increment the ptrs. 56 const CHAR* pattern_next = *pattern; 57 const CHAR* string_next = *string; 58 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); 59 if (pattern_char == next(&string_next, string_end) && 60 pattern_char != CBU_SENTINEL) { 61 *pattern = pattern_next; 62 *string = string_next; 63 continue; 64 } 65 } 66 67 // Mismatch. If we have reached the maximum distance, return false, 68 // otherwise restart at the beginning of the pattern with the next character 69 // in the string. 70 // TODO(bauerb): This is a naive implementation of substring search, which 71 // could be implemented with a more efficient algorithm, e.g. 72 // Knuth-Morris-Pratt (at the expense of requiring preprocessing). 73 if (maximum_distance == 0) 74 return false; 75 76 // Because unlimited distance is represented as -1, this will never reach 0 77 // and therefore fail the match above. 78 maximum_distance--; 79 *pattern = pattern_start; 80 next(&string_start, string_end); 81 *string = string_start; 82 } 83 } 84 85 // Consumes consecutive wildcard characters (? or *). Returns the maximum number 86 // of characters matched by the sequence of wildcards, or -1 if the wildcards 87 // match an arbitrary number of characters (which is the case if it contains at 88 // least one *). 89 template <typename CHAR, typename NEXT> 90 constexpr int EatWildcards(const CHAR** pattern, const CHAR* end, NEXT next) { 91 int num_question_marks = 0; 92 bool has_asterisk = false; 93 while (*pattern != end) { 94 if (**pattern == '?') { 95 num_question_marks++; 96 } else if (**pattern == '*') { 97 has_asterisk = true; 98 } else { 99 break; 100 } 101 102 next(pattern, end); 103 } 104 return has_asterisk ? -1 : num_question_marks; 105 } 106 107 template <typename CHAR, typename NEXT> 108 constexpr bool MatchPatternT(const CHAR* eval, 109 const CHAR* eval_end, 110 const CHAR* pattern, 111 const CHAR* pattern_end, 112 NEXT next) { 113 do { 114 int maximum_wildcard_length = EatWildcards(&pattern, pattern_end, next); 115 if (!SearchForChars(&pattern, pattern_end, &eval, eval_end, 116 maximum_wildcard_length, next)) { 117 return false; 118 } 119 } while (pattern != pattern_end); 120 return true; 121 } 122 123 struct NextCharUTF8 { 124 base_icu::UChar32 operator()(const char** p, const char* end) { 125 base_icu::UChar32 c; 126 int offset = 0; 127 CBU8_NEXT(*p, offset, end - *p, c); 128 *p += offset; 129 return c; 130 } 131 }; 132 133 struct NextCharUTF16 { 134 base_icu::UChar32 operator()(const char16** p, const char16* end) { 135 base_icu::UChar32 c; 136 int offset = 0; 137 CBU16_NEXT(*p, offset, end - *p, c); 138 *p += offset; 139 return c; 140 } 141 }; 142 143 } // namespace 144 145 bool MatchPattern(StringPiece eval, StringPiece pattern) { 146 return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), 147 pattern.data() + pattern.size(), NextCharUTF8()); 148 } 149 150 bool MatchPattern(StringPiece16 eval, StringPiece16 pattern) { 151 return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), 152 pattern.data() + pattern.size(), NextCharUTF16()); 153 } 154 155 } // namespace base 156