1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_CMDLINE_TOKEN_RANGE_H_ 18 #define ART_CMDLINE_TOKEN_RANGE_H_ 19 20 #include <assert.h> 21 #include <vector> 22 #include <string> 23 #include <algorithm> 24 #include <memory> 25 26 namespace art { 27 // A range of tokens to make token matching algorithms easier. 28 // 29 // We try really hard to avoid copying and store only a pointer and iterators to the 30 // interiors of the vector, so a typical copy constructor never ends up doing a deep copy. 31 // It is up to the user to play nice and not to mutate the strings in-place. 32 // 33 // Tokens are only copied if a mutating operation is performed (and even then only 34 // if it *actually* mutates the token). 35 struct TokenRange { 36 // Short-hand for a vector of strings. A single string and a token is synonymous. 37 using TokenList = std::vector<std::string>; 38 39 // Copying-from-vector constructor. 40 explicit TokenRange(const TokenList& token_list) 41 : token_list_(new TokenList(token_list)), 42 begin_(token_list_->begin()), 43 end_(token_list_->end()) 44 {} 45 46 // Copying-from-iterator constructor 47 template <typename ForwardIterator> 48 TokenRange(ForwardIterator it_begin, ForwardIterator it_end) 49 : token_list_(new TokenList(it_begin, it_end)), 50 begin_(token_list_->begin()), 51 end_(token_list_->end()) 52 {} 53 54 #if 0 55 // Copying-from-vector constructor. 56 TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED, 57 TokenList::const_iterator it_begin, 58 TokenList::const_iterator it_end) 59 : token_list_(new TokenList(it_begin, it_end)), 60 begin_(token_list_->begin()), 61 end_(token_list_->end()) { 62 assert(it_begin >= token_list.begin()); 63 assert(it_end <= token_list.end()); 64 } 65 #endif 66 67 // Copying from char array constructor, convertings into tokens (strings) along the way. 68 TokenRange(const char* token_list[], size_t length) 69 : token_list_(new TokenList(&token_list[0], &token_list[length])), 70 begin_(token_list_->begin()), 71 end_(token_list_->end()) 72 {} 73 74 // Non-copying move-from-vector constructor. Takes over the token vector. 75 explicit TokenRange(TokenList&& token_list) 76 : token_list_(new TokenList(std::forward<TokenList>(token_list))), 77 begin_(token_list_->begin()), 78 end_(token_list_->end()) 79 {} 80 81 // Non-copying constructor. Retain reference to existing list of tokens. 82 TokenRange(std::shared_ptr<TokenList> token_list, 83 TokenList::const_iterator it_begin, 84 TokenList::const_iterator it_end) 85 : token_list_(token_list), 86 begin_(it_begin), 87 end_(it_end) { 88 assert(it_begin >= token_list->begin()); 89 assert(it_end <= token_list->end()); 90 } 91 92 // Non-copying copy constructor. 93 TokenRange(const TokenRange&) = default; 94 95 // Non-copying move constructor. 96 TokenRange(TokenRange&&) = default; 97 98 // Non-copying constructor. Retains reference to an existing list of tokens, with offset. 99 explicit TokenRange(std::shared_ptr<TokenList> token_list) 100 : token_list_(token_list), 101 begin_(token_list_->begin()), 102 end_(token_list_->end()) 103 {} 104 105 // Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator. 106 using iterator = TokenList::const_iterator; 107 108 // Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator. 109 using const_iterator = iterator; 110 111 // Create a token range by splitting a string. Each separator gets their own token. 112 // Since the separator are retained as tokens, it might be useful to call 113 // RemoveToken afterwards. 114 static TokenRange Split(const std::string& string, std::initializer_list<char> separators) { 115 TokenList new_token_list; 116 117 std::string tok; 118 for (auto&& c : string) { 119 for (char sep : separators) { 120 if (c == sep) { 121 // We spotted a separator character. 122 // Push back everything before the last separator as a new token. 123 // Push back the separator as a token. 124 if (!tok.empty()) { 125 new_token_list.push_back(tok); 126 tok = ""; 127 } 128 new_token_list.push_back(std::string() + sep); 129 } else { 130 // Build up the token with another character. 131 tok += c; 132 } 133 } 134 } 135 136 if (!tok.empty()) { 137 new_token_list.push_back(tok); 138 } 139 140 return TokenRange(std::move(new_token_list)); 141 } 142 143 // A RandomAccessIterator to the first element in this range. 144 iterator begin() const { 145 return begin_; 146 } 147 148 // A RandomAccessIterator to one past the last element in this range. 149 iterator end() const { 150 return end_; 151 } 152 153 // The size of the range, i.e. how many tokens are in it. 154 size_t Size() const { 155 return std::distance(begin_, end_); 156 } 157 158 // Are there 0 tokens in this range? 159 bool IsEmpty() const { 160 return Size() > 0; 161 } 162 163 // Look up a token by it's offset. 164 const std::string& GetToken(size_t offset) const { 165 assert(offset < Size()); 166 return *(begin_ + offset); 167 } 168 169 // Does this token range equal the other range? 170 // Equality is defined as having both the same size, and 171 // each corresponding token being equal. 172 bool operator==(const TokenRange& other) const { 173 if (this == &other) { 174 return true; 175 } 176 177 if (Size() != other.Size()) { 178 return false; 179 } 180 181 return std::equal(begin(), end(), other.begin()); 182 } 183 184 // Look up the token at the requested index. 185 const std::string& operator[](int index) const { 186 assert(index >= 0 && static_cast<size_t>(index) < Size()); 187 return *(begin() + index); 188 } 189 190 // Does this current range start with the other range? 191 bool StartsWith(const TokenRange& other) const { 192 if (this == &other) { 193 return true; 194 } 195 196 if (Size() < other.Size()) { 197 return false; 198 } 199 200 auto& smaller = Size() < other.Size() ? *this : other; 201 auto& greater = Size() < other.Size() ? other : *this; 202 203 return std::equal(smaller.begin(), smaller.end(), greater.begin()); 204 } 205 206 // Remove all characters 'c' from each token, potentially copying the underlying tokens. 207 TokenRange RemoveCharacter(char c) const { 208 TokenList new_token_list(begin(), end()); 209 210 bool changed = false; 211 for (auto&& token : new_token_list) { 212 auto it = std::remove_if(token.begin(), token.end(), [&](char ch) { 213 if (ch == c) { 214 changed = true; 215 return true; 216 } 217 return false; 218 }); 219 token.erase(it, token.end()); 220 } 221 222 if (!changed) { 223 return *this; 224 } 225 226 return TokenRange(std::move(new_token_list)); 227 } 228 229 // Remove all tokens matching this one, potentially copying the underlying tokens. 230 TokenRange RemoveToken(const std::string& token) { 231 return RemoveIf([&](const std::string& tok) { return tok == token; }); 232 } 233 234 // Discard all empty tokens, potentially copying the underlying tokens. 235 TokenRange DiscardEmpty() const { 236 return RemoveIf([](const std::string& token) { return token.empty(); }); 237 } 238 239 // Create a non-copying subset of this range. 240 // Length is trimmed so that the Slice does not go out of range. 241 TokenRange Slice(size_t offset, size_t length = std::string::npos) const { 242 assert(offset < Size()); 243 244 if (length != std::string::npos && offset + length > Size()) { 245 length = Size() - offset; 246 } 247 248 iterator it_end; 249 if (length == std::string::npos) { 250 it_end = end(); 251 } else { 252 it_end = begin() + offset + length; 253 } 254 255 return TokenRange(token_list_, begin() + offset, it_end); 256 } 257 258 // Try to match the string with tokens from this range. 259 // Each token is used to match exactly once (after which the next token is used, and so on). 260 // The matching happens from left-to-right in a non-greedy fashion. 261 // If the currently-matched token is the wildcard, then the new outputted token will 262 // contain as much as possible until the next token is matched. 263 // 264 // For example, if this == ["a:", "_", "b:] and "_" is the match string, then 265 // MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"] 266 // 267 // Since the string matching can fail (e.g. ["foo"] against "bar"), then this 268 // function can fail, in which cause it will return null. 269 std::unique_ptr<TokenRange> MatchSubstrings(const std::string& string, 270 const std::string& wildcard) const { 271 TokenList new_token_list; 272 273 size_t wildcard_idx = std::string::npos; 274 size_t string_idx = 0; 275 276 // Function to push all the characters matched as a wildcard so far 277 // as a brand new token. It resets the wildcard matching. 278 // Empty wildcards are possible and ok, but only if wildcard matching was on. 279 auto maybe_push_wildcard_token = [&]() { 280 if (wildcard_idx != std::string::npos) { 281 size_t wildcard_length = string_idx - wildcard_idx; 282 std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length); 283 new_token_list.push_back(std::move(wildcard_substr)); 284 285 wildcard_idx = std::string::npos; 286 } 287 }; 288 289 for (iterator it = begin(); it != end(); ++it) { 290 const std::string& tok = *it; 291 292 if (tok == wildcard) { 293 maybe_push_wildcard_token(); 294 wildcard_idx = string_idx; 295 continue; 296 } 297 298 size_t next_token_idx = string.find(tok); 299 if (next_token_idx == std::string::npos) { 300 // Could not find token at all 301 return nullptr; 302 } else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) { 303 // Found the token at a non-starting location, and we weren't 304 // trying to parse the wildcard. 305 return nullptr; 306 } 307 308 new_token_list.push_back(string.substr(next_token_idx, tok.size())); 309 maybe_push_wildcard_token(); 310 string_idx += tok.size(); 311 } 312 313 size_t remaining = string.size() - string_idx; 314 if (remaining > 0) { 315 if (wildcard_idx == std::string::npos) { 316 // Some characters were still remaining in the string, 317 // but it wasn't trying to match a wildcard. 318 return nullptr; 319 } 320 } 321 322 // If some characters are remaining, the rest must be a wildcard. 323 string_idx += remaining; 324 maybe_push_wildcard_token(); 325 326 return std::unique_ptr<TokenRange>(new TokenRange(std::move(new_token_list))); 327 } 328 329 // Do a quick match token-by-token, and see if they match. 330 // Any tokens with a wildcard in them are only matched up until the wildcard. 331 // If this is true, then the wildcard matching later on can still fail, so this is not 332 // a guarantee that the argument is correct, it's more of a strong hint that the 333 // user-provided input *probably* was trying to match this argument. 334 // 335 // Returns how many tokens were either matched (or ignored because there was a 336 // wildcard present). 0 means no match. If the size() tokens are returned. 337 size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const { 338 auto token_it = token_list.begin(); 339 auto token_end = token_list.end(); 340 auto name_it = begin(); 341 auto name_end = end(); 342 343 size_t matched_tokens = 0; 344 345 while (token_it != token_end && name_it != name_end) { 346 // Skip token matching when the corresponding name has a wildcard in it. 347 const std::string& name = *name_it; 348 349 size_t wildcard_idx = name.find(wildcard); 350 if (wildcard_idx == std::string::npos) { // No wildcard present 351 // Did the definition token match the user token? 352 if (name != *token_it) { 353 return matched_tokens; 354 } 355 } else { 356 std::string name_prefix = name.substr(0, wildcard_idx); 357 358 // Did the user token start with the up-to-the-wildcard prefix? 359 if (!StartsWith(*token_it, name_prefix)) { 360 return matched_tokens; 361 } 362 } 363 364 ++token_it; 365 ++name_it; 366 ++matched_tokens; 367 } 368 369 // If we got this far, it's either a full match or the token list was too short. 370 return matched_tokens; 371 } 372 373 // Flatten the token range by joining every adjacent token with the separator character. 374 // e.g. ["hello", "world"].join('$') == "hello$world" 375 std::string Join(char separator) const { 376 TokenList tmp(begin(), end()); 377 return art::Join(tmp, separator); 378 // TODO: Join should probably take an offset or iterators 379 } 380 381 private: 382 static bool StartsWith(const std::string& larger, const std::string& smaller) { 383 if (larger.size() >= smaller.size()) { 384 return std::equal(smaller.begin(), smaller.end(), larger.begin()); 385 } 386 387 return false; 388 } 389 390 template <typename TPredicate> 391 TokenRange RemoveIf(const TPredicate& predicate) const { 392 // If any of the tokens in the token lists are empty, then 393 // we need to remove them and compress the token list into a smaller one. 394 bool remove = false; 395 for (auto it = begin_; it != end_; ++it) { 396 auto&& token = *it; 397 398 if (predicate(token)) { 399 remove = true; 400 break; 401 } 402 } 403 404 // Actually copy the token list and remove the tokens that don't match our predicate. 405 if (remove) { 406 auto token_list = std::make_shared<TokenList>(begin(), end()); 407 TokenList::iterator new_end = 408 std::remove_if(token_list->begin(), token_list->end(), predicate); 409 token_list->erase(new_end, token_list->end()); 410 411 assert(token_list_->size() > token_list->size() && "Nothing was actually removed!"); 412 413 return TokenRange(token_list); 414 } 415 416 return *this; 417 } 418 419 const std::shared_ptr<std::vector<std::string>> token_list_; 420 const iterator begin_; 421 const iterator end_; 422 }; 423 } // namespace art 424 425 #endif // ART_CMDLINE_TOKEN_RANGE_H_ 426