1 // Copyright 2015 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/pattern.h" 6 7 #include "base/third_party/icu/icu_utf.h" 8 9 namespace base { 10 11 namespace { 12 13 static bool IsWildcard(base_icu::UChar32 character) { 14 return character == '*' || character == '?'; 15 } 16 17 // Move the strings pointers to the point where they start to differ. 18 template <typename CHAR, typename NEXT> 19 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, 20 const CHAR** string, const CHAR* string_end, 21 NEXT next) { 22 const CHAR* escape = NULL; 23 while (*pattern != pattern_end && *string != string_end) { 24 if (!escape && IsWildcard(**pattern)) { 25 // We don't want to match wildcard here, except if it's escaped. 26 return; 27 } 28 29 // Check if the escapement char is found. If so, skip it and move to the 30 // next character. 31 if (!escape && **pattern == '\\') { 32 escape = *pattern; 33 next(pattern, pattern_end); 34 continue; 35 } 36 37 // Check if the chars match, if so, increment the ptrs. 38 const CHAR* pattern_next = *pattern; 39 const CHAR* string_next = *string; 40 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); 41 if (pattern_char == next(&string_next, string_end) && 42 pattern_char != CBU_SENTINEL) { 43 *pattern = pattern_next; 44 *string = string_next; 45 } else { 46 // Uh oh, it did not match, we are done. If the last char was an 47 // escapement, that means that it was an error to advance the ptr here, 48 // let's put it back where it was. This also mean that the MatchPattern 49 // function will return false because if we can't match an escape char 50 // here, then no one will. 51 if (escape) { 52 *pattern = escape; 53 } 54 return; 55 } 56 57 escape = NULL; 58 } 59 } 60 61 template <typename CHAR, typename NEXT> 62 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { 63 while (*pattern != end) { 64 if (!IsWildcard(**pattern)) 65 return; 66 next(pattern, end); 67 } 68 } 69 70 template <typename CHAR, typename NEXT> 71 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, 72 const CHAR* pattern, const CHAR* pattern_end, 73 int depth, 74 NEXT next) { 75 const int kMaxDepth = 16; 76 if (depth > kMaxDepth) 77 return false; 78 79 // Eat all the matching chars. 80 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); 81 82 // If the string is empty, then the pattern must be empty too, or contains 83 // only wildcards. 84 if (eval == eval_end) { 85 EatWildcard(&pattern, pattern_end, next); 86 return pattern == pattern_end; 87 } 88 89 // Pattern is empty but not string, this is not a match. 90 if (pattern == pattern_end) 91 return false; 92 93 // If this is a question mark, then we need to compare the rest with 94 // the current string or the string with one character eaten. 95 const CHAR* next_pattern = pattern; 96 next(&next_pattern, pattern_end); 97 if (pattern[0] == '?') { 98 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 99 depth + 1, next)) 100 return true; 101 const CHAR* next_eval = eval; 102 next(&next_eval, eval_end); 103 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, 104 depth + 1, next)) 105 return true; 106 } 107 108 // This is a *, try to match all the possible substrings with the remainder 109 // of the pattern. 110 if (pattern[0] == '*') { 111 // Collapse duplicate wild cards (********** into *) so that the 112 // method does not recurse unnecessarily. http://crbug.com/52839 113 EatWildcard(&next_pattern, pattern_end, next); 114 115 while (eval != eval_end) { 116 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 117 depth + 1, next)) 118 return true; 119 eval++; 120 } 121 122 // We reached the end of the string, let see if the pattern contains only 123 // wildcards. 124 if (eval == eval_end) { 125 EatWildcard(&pattern, pattern_end, next); 126 if (pattern != pattern_end) 127 return false; 128 return true; 129 } 130 } 131 132 return false; 133 } 134 135 struct NextCharUTF8 { 136 base_icu::UChar32 operator()(const char** p, const char* end) { 137 base_icu::UChar32 c; 138 int offset = 0; 139 CBU8_NEXT(*p, offset, end - *p, c); 140 *p += offset; 141 return c; 142 } 143 }; 144 145 struct NextCharUTF16 { 146 base_icu::UChar32 operator()(const char16** p, const char16* end) { 147 base_icu::UChar32 c; 148 int offset = 0; 149 CBU16_NEXT(*p, offset, end - *p, c); 150 *p += offset; 151 return c; 152 } 153 }; 154 155 } // namespace 156 157 bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) { 158 return MatchPatternT(eval.data(), eval.data() + eval.size(), 159 pattern.data(), pattern.data() + pattern.size(), 160 0, NextCharUTF8()); 161 } 162 163 bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) { 164 return MatchPatternT(eval.data(), eval.data() + eval.size(), 165 pattern.data(), pattern.data() + pattern.size(), 166 0, NextCharUTF16()); 167 } 168 169 } // namespace base 170