1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "tools/gn/pattern.h" 6 7 #include "tools/gn/value.h" 8 9 const char kPattern_Help[] = 10 "Patterns\n" 11 " Patterns are VERY limited regular expressions that are used in\n" 12 " several places.\n" 13 "\n" 14 " Patterns must match the entire input string to be counted as a match.\n" 15 " In regular expression parlance, there is an implicit \"^...$\"\n" 16 " surrounding your input. If you want to match a substring, you need to\n" 17 " use wildcards at the beginning and end.\n" 18 "\n" 19 " There are only two special tokens understood by the pattern matcher.\n" 20 " Everything else is a literal.\n" 21 "\n" 22 " * Matches zero or more of any character. It does not depend on the\n" 23 " preceeding character (in regular expression parlance it is\n" 24 " equivalent to \".*\").\n" 25 "\n" 26 " \\b Matches a path boundary. This will match the beginning or end of\n" 27 " a string, or a slash.\n" 28 "\n" 29 "Examples\n" 30 "\n" 31 " \"*asdf*\"\n" 32 " Matches a string containing \"asdf\" anywhere.\n" 33 "\n" 34 " \"asdf\"\n" 35 " Matches only the exact string \"asdf\".\n" 36 "\n" 37 " \"*.cc\"\n" 38 " Matches strings ending in the literal \".cc\".\n" 39 "\n" 40 " \"\\bwin/*\"\n" 41 " Matches \"win/foo\" and \"foo/win/bar.cc\" but not \"iwin/foo\".\n"; 42 43 namespace { 44 45 void ParsePattern(const std::string& s, std::vector<Pattern::Subrange>* out) { 46 // Set when the last subrange is a literal so we can just append when we 47 // find another literal. 48 Pattern::Subrange* last_literal = NULL; 49 50 for (size_t i = 0; i < s.size(); i++) { 51 if (s[i] == '*') { 52 // Don't allow two **. 53 if (out->size() == 0 || 54 (*out)[out->size() - 1].type != Pattern::Subrange::ANYTHING) 55 out->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING)); 56 last_literal = NULL; 57 } else if (s[i] == '\\') { 58 if (i < s.size() - 1 && s[i + 1] == 'b') { 59 // "\b" means path boundary. 60 i++; 61 out->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY)); 62 last_literal = NULL; 63 } else { 64 // Backslash + anything else means that literal char. 65 if (!last_literal) { 66 out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL)); 67 last_literal = &(*out)[out->size() - 1]; 68 } 69 if (i < s.size() - 1) { 70 i++; 71 last_literal->literal.push_back(s[i]); 72 } else { 73 // Single backslash at end, use literal backslash. 74 last_literal->literal.push_back('\\'); 75 } 76 } 77 } else { 78 if (!last_literal) { 79 out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL)); 80 last_literal = &(*out)[out->size() - 1]; 81 } 82 last_literal->literal.push_back(s[i]); 83 } 84 } 85 } 86 87 } // namespace 88 89 Pattern::Pattern(const std::string& s) { 90 ParsePattern(s, &subranges_); 91 is_suffix_ = 92 (subranges_.size() == 2 && 93 subranges_[0].type == Subrange::ANYTHING && 94 subranges_[1].type == Subrange::LITERAL); 95 } 96 97 Pattern::~Pattern() { 98 } 99 100 bool Pattern::MatchesString(const std::string& s) const { 101 // Empty pattern matches only empty string. 102 if (subranges_.empty()) 103 return s.empty(); 104 105 if (is_suffix_) { 106 const std::string& suffix = subranges_[1].literal; 107 if (suffix.size() > s.size()) 108 return false; // Too short. 109 return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0; 110 } 111 112 return RecursiveMatch(s, 0, 0, true); 113 } 114 115 // We assume the number of ranges is small so recursive is always reasonable. 116 // Could be optimized to only be recursive for *. 117 bool Pattern::RecursiveMatch(const std::string& s, 118 size_t begin_char, 119 size_t subrange_index, 120 bool allow_implicit_path_boundary) const { 121 if (subrange_index >= subranges_.size()) { 122 // Hit the end of our subranges, the text should also be at the end for a 123 // match. 124 return begin_char == s.size(); 125 } 126 127 const Subrange& sr = subranges_[subrange_index]; 128 switch (sr.type) { 129 case Subrange::LITERAL: { 130 if (s.size() - begin_char < sr.literal.size()) 131 return false; // Not enough room. 132 if (s.compare(begin_char, sr.literal.size(), sr.literal) != 0) 133 return false; // Literal doesn't match. 134 135 // Recursively check the next one. 136 return RecursiveMatch(s, begin_char + sr.literal.size(), 137 subrange_index + 1, true); 138 } 139 140 case Subrange::PATH_BOUNDARY: { 141 // When we can accept an implicit path boundary, we have to check both 142 // a match of the literal and the implicit one. 143 if (allow_implicit_path_boundary && 144 (begin_char == 0 || begin_char == s.size())) { 145 // At implicit path boundary, see if the rest of the pattern matches. 146 if (RecursiveMatch(s, begin_char, subrange_index + 1, false)) 147 return true; 148 } 149 150 // Check for a literal "/". 151 if (begin_char < s.size() && s[begin_char] == '/') { 152 // At explicit boundary, see if the rest of the pattern matches. 153 if (RecursiveMatch(s, begin_char + 1, subrange_index + 1, true)) 154 return true; 155 } 156 return false; 157 } 158 159 case Subrange::ANYTHING: { 160 if (subrange_index == subranges_.size() - 1) 161 return true; // * at the end, consider it matching. 162 163 size_t min_next_size = sr.MinSize(); 164 165 // We don't care about exactly what matched as long as there was a match, 166 // so we can do this front-to-back. If we needed the match, we would 167 // normally want "*" to be greedy so would work backwards. 168 for (size_t i = begin_char; i < s.size() - min_next_size; i++) { 169 // Note: this could probably be faster by detecting the type of the 170 // next match in advance and checking for a match in this loop rather 171 // than doing a full recursive call for each character. 172 if (RecursiveMatch(s, i, subrange_index + 1, true)) 173 return true; 174 } 175 return false; 176 } 177 178 default: 179 NOTREACHED(); 180 } 181 182 return false; 183 } 184 185 PatternList::PatternList() { 186 } 187 188 PatternList::~PatternList() { 189 } 190 191 void PatternList::SetFromValue(const Value& v, Err* err) { 192 patterns_.clear(); 193 194 if (v.type() != Value::LIST) { 195 *err = Err(v.origin(), "This value must be a list."); 196 return; 197 } 198 199 const std::vector<Value>& list = v.list_value(); 200 for (size_t i = 0; i < list.size(); i++) { 201 if (!list[i].VerifyTypeIs(Value::STRING, err)) 202 return; 203 patterns_.push_back(Pattern(list[i].string_value())); 204 } 205 } 206 207 bool PatternList::MatchesString(const std::string& s) const { 208 for (size_t i = 0; i < patterns_.size(); i++) { 209 if (patterns_[i].MatchesString(s)) 210 return true; 211 } 212 return false; 213 } 214 215 bool PatternList::MatchesValue(const Value& v) const { 216 if (v.type() == Value::STRING) 217 return MatchesString(v.string_value()); 218 return false; 219 } 220