Home | History | Annotate | Download | only in cmdline
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_CMDLINE_TOKEN_RANGE_H_
     18 #define ART_CMDLINE_TOKEN_RANGE_H_
     19 
     20 #include <assert.h>
     21 #include <vector>
     22 #include <string>
     23 #include <algorithm>
     24 #include <memory>
     25 
     26 namespace art {
     27 // A range of tokens to make token matching algorithms easier.
     28 //
     29 // We try really hard to avoid copying and store only a pointer and iterators to the
     30 // interiors of the vector, so a typical copy constructor never ends up doing a deep copy.
     31 // It is up to the user to play nice and not to mutate the strings in-place.
     32 //
     33 // Tokens are only copied if a mutating operation is performed (and even then only
     34 // if it *actually* mutates the token).
     35 struct TokenRange {
     36   // Short-hand for a vector of strings. A single string and a token is synonymous.
     37   using TokenList = std::vector<std::string>;
     38 
     39   // Copying-from-vector constructor.
     40   explicit TokenRange(const TokenList& token_list)
     41     : token_list_(new TokenList(token_list)),
     42       begin_(token_list_->begin()),
     43       end_(token_list_->end())
     44   {}
     45 
     46   // Copying-from-iterator constructor
     47   template <typename ForwardIterator>
     48   TokenRange(ForwardIterator it_begin, ForwardIterator it_end)
     49     : token_list_(new TokenList(it_begin, it_end)),
     50       begin_(token_list_->begin()),
     51       end_(token_list_->end())
     52   {}
     53 
     54 #if 0
     55   // Copying-from-vector constructor.
     56   TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED,
     57              TokenList::const_iterator it_begin,
     58              TokenList::const_iterator it_end)
     59     : token_list_(new TokenList(it_begin, it_end)),
     60       begin_(token_list_->begin()),
     61       end_(token_list_->end()) {
     62     assert(it_begin >= token_list.begin());
     63     assert(it_end <= token_list.end());
     64   }
     65 #endif
     66 
     67   // Copying from char array constructor, convertings into tokens (strings) along the way.
     68   TokenRange(const char* token_list[], size_t length)
     69     : token_list_(new TokenList(&token_list[0], &token_list[length])),
     70       begin_(token_list_->begin()),
     71       end_(token_list_->end())
     72   {}
     73 
     74   // Non-copying move-from-vector constructor. Takes over the token vector.
     75   explicit TokenRange(TokenList&& token_list)
     76     : token_list_(new TokenList(std::forward<TokenList>(token_list))),
     77       begin_(token_list_->begin()),
     78       end_(token_list_->end())
     79   {}
     80 
     81   // Non-copying constructor. Retain reference to existing list of tokens.
     82   TokenRange(std::shared_ptr<TokenList> token_list,
     83              TokenList::const_iterator it_begin,
     84              TokenList::const_iterator it_end)
     85     : token_list_(token_list),
     86       begin_(it_begin),
     87       end_(it_end) {
     88     assert(it_begin >= token_list->begin());
     89     assert(it_end <= token_list->end());
     90   }
     91 
     92   // Non-copying copy constructor.
     93   TokenRange(const TokenRange&) = default;
     94 
     95   // Non-copying move constructor.
     96   TokenRange(TokenRange&&) = default;
     97 
     98   // Non-copying constructor. Retains reference to an existing list of tokens, with offset.
     99   explicit TokenRange(std::shared_ptr<TokenList> token_list)
    100     : token_list_(token_list),
    101       begin_(token_list_->begin()),
    102       end_(token_list_->end())
    103   {}
    104 
    105   // Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator.
    106   using iterator = TokenList::const_iterator;
    107 
    108   // Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator.
    109   using const_iterator = iterator;
    110 
    111   // Create a token range by splitting a string. Each separator gets their own token.
    112   // Since the separator are retained as tokens, it might be useful to call
    113   // RemoveToken afterwards.
    114   static TokenRange Split(const std::string& string, std::initializer_list<char> separators) {
    115     TokenList new_token_list;
    116 
    117     std::string tok;
    118     for (auto&& c : string) {
    119       for (char sep : separators) {
    120         if (c == sep) {
    121           // We spotted a separator character.
    122           // Push back everything before the last separator as a new token.
    123           // Push back the separator as a token.
    124           if (!tok.empty()) {
    125             new_token_list.push_back(tok);
    126             tok = "";
    127           }
    128           new_token_list.push_back(std::string() + sep);
    129         } else {
    130           // Build up the token with another character.
    131           tok += c;
    132         }
    133       }
    134     }
    135 
    136     if (!tok.empty()) {
    137       new_token_list.push_back(tok);
    138     }
    139 
    140     return TokenRange(std::move(new_token_list));
    141   }
    142 
    143   // A RandomAccessIterator to the first element in this range.
    144   iterator begin() const {
    145     return begin_;
    146   }
    147 
    148   // A RandomAccessIterator to one past the last element in this range.
    149   iterator end() const {
    150     return end_;
    151   }
    152 
    153   // The size of the range, i.e. how many tokens are in it.
    154   size_t Size() const {
    155     return std::distance(begin_, end_);
    156   }
    157 
    158   // Are there 0 tokens in this range?
    159   bool IsEmpty() const {
    160     return Size() > 0;
    161   }
    162 
    163   // Look up a token by it's offset.
    164   const std::string& GetToken(size_t offset) const {
    165     assert(offset < Size());
    166     return *(begin_ + offset);
    167   }
    168 
    169   // Does this token range equal the other range?
    170   // Equality is defined as having both the same size, and
    171   // each corresponding token being equal.
    172   bool operator==(const TokenRange& other) const {
    173     if (this == &other) {
    174       return true;
    175     }
    176 
    177     if (Size() != other.Size()) {
    178       return false;
    179     }
    180 
    181     return std::equal(begin(), end(), other.begin());
    182   }
    183 
    184   // Look up the token at the requested index.
    185   const std::string& operator[](int index) const {
    186     assert(index >= 0 && static_cast<size_t>(index) < Size());
    187     return *(begin() + index);
    188   }
    189 
    190   // Does this current range start with the other range?
    191   bool StartsWith(const TokenRange& other) const {
    192     if (this == &other) {
    193       return true;
    194     }
    195 
    196     if (Size() < other.Size()) {
    197       return false;
    198     }
    199 
    200     auto& smaller = Size() < other.Size() ? *this : other;
    201     auto& greater = Size() < other.Size() ? other : *this;
    202 
    203     return std::equal(smaller.begin(), smaller.end(), greater.begin());
    204   }
    205 
    206   // Remove all characters 'c' from each token, potentially copying the underlying tokens.
    207   TokenRange RemoveCharacter(char c) const {
    208     TokenList new_token_list(begin(), end());
    209 
    210     bool changed = false;
    211     for (auto&& token : new_token_list) {
    212       auto it = std::remove_if(token.begin(), token.end(), [&](char ch) {
    213         if (ch == c) {
    214           changed = true;
    215           return true;
    216         }
    217         return false;
    218       });
    219       token.erase(it, token.end());
    220     }
    221 
    222     if (!changed) {
    223       return *this;
    224     }
    225 
    226     return TokenRange(std::move(new_token_list));
    227   }
    228 
    229   // Remove all tokens matching this one, potentially copying the underlying tokens.
    230   TokenRange RemoveToken(const std::string& token) {
    231     return RemoveIf([&](const std::string& tok) { return tok == token; });
    232   }
    233 
    234   // Discard all empty tokens, potentially copying the underlying tokens.
    235   TokenRange DiscardEmpty() const {
    236     return RemoveIf([](const std::string& token) { return token.empty(); });
    237   }
    238 
    239   // Create a non-copying subset of this range.
    240   // Length is trimmed so that the Slice does not go out of range.
    241   TokenRange Slice(size_t offset, size_t length = std::string::npos) const {
    242     assert(offset < Size());
    243 
    244     if (length != std::string::npos && offset + length > Size()) {
    245       length = Size() - offset;
    246     }
    247 
    248     iterator it_end;
    249     if (length == std::string::npos) {
    250       it_end = end();
    251     } else {
    252       it_end = begin() + offset + length;
    253     }
    254 
    255     return TokenRange(token_list_, begin() + offset, it_end);
    256   }
    257 
    258   // Try to match the string with tokens from this range.
    259   // Each token is used to match exactly once (after which the next token is used, and so on).
    260   // The matching happens from left-to-right in a non-greedy fashion.
    261   // If the currently-matched token is the wildcard, then the new outputted token will
    262   // contain as much as possible until the next token is matched.
    263   //
    264   // For example, if this == ["a:", "_", "b:] and "_" is the match string, then
    265   // MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"]
    266   //
    267   // Since the string matching can fail (e.g. ["foo"] against "bar"), then this
    268   // function can fail, in which cause it will return null.
    269   std::unique_ptr<TokenRange> MatchSubstrings(const std::string& string,
    270                                               const std::string& wildcard) const {
    271     TokenList new_token_list;
    272 
    273     size_t wildcard_idx = std::string::npos;
    274     size_t string_idx = 0;
    275 
    276     // Function to push all the characters matched as a wildcard so far
    277     // as a brand new token. It resets the wildcard matching.
    278     // Empty wildcards are possible and ok, but only if wildcard matching was on.
    279     auto maybe_push_wildcard_token = [&]() {
    280       if (wildcard_idx != std::string::npos) {
    281         size_t wildcard_length = string_idx - wildcard_idx;
    282         std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length);
    283         new_token_list.push_back(std::move(wildcard_substr));
    284 
    285         wildcard_idx = std::string::npos;
    286       }
    287     };
    288 
    289     for (iterator it = begin(); it != end(); ++it) {
    290       const std::string& tok = *it;
    291 
    292       if (tok == wildcard) {
    293         maybe_push_wildcard_token();
    294         wildcard_idx = string_idx;
    295         continue;
    296       }
    297 
    298       size_t next_token_idx = string.find(tok);
    299       if (next_token_idx == std::string::npos) {
    300         // Could not find token at all
    301         return nullptr;
    302       } else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) {
    303         // Found the token at a non-starting location, and we weren't
    304         // trying to parse the wildcard.
    305         return nullptr;
    306       }
    307 
    308       new_token_list.push_back(string.substr(next_token_idx, tok.size()));
    309       maybe_push_wildcard_token();
    310       string_idx += tok.size();
    311     }
    312 
    313     size_t remaining = string.size() - string_idx;
    314     if (remaining > 0) {
    315       if (wildcard_idx == std::string::npos) {
    316         // Some characters were still remaining in the string,
    317         // but it wasn't trying to match a wildcard.
    318         return nullptr;
    319       }
    320     }
    321 
    322     // If some characters are remaining, the rest must be a wildcard.
    323     string_idx += remaining;
    324     maybe_push_wildcard_token();
    325 
    326     return std::unique_ptr<TokenRange>(new TokenRange(std::move(new_token_list)));
    327   }
    328 
    329   // Do a quick match token-by-token, and see if they match.
    330   // Any tokens with a wildcard in them are only matched up until the wildcard.
    331   // If this is true, then the wildcard matching later on can still fail, so this is not
    332   // a guarantee that the argument is correct, it's more of a strong hint that the
    333   // user-provided input *probably* was trying to match this argument.
    334   //
    335   // Returns how many tokens were either matched (or ignored because there was a
    336   // wildcard present). 0 means no match. If the size() tokens are returned.
    337   size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const {
    338     auto token_it = token_list.begin();
    339     auto token_end = token_list.end();
    340     auto name_it = begin();
    341     auto name_end = end();
    342 
    343     size_t matched_tokens = 0;
    344 
    345     while (token_it != token_end && name_it != name_end) {
    346       // Skip token matching when the corresponding name has a wildcard in it.
    347       const std::string& name = *name_it;
    348 
    349       size_t wildcard_idx = name.find(wildcard);
    350       if (wildcard_idx == std::string::npos) {  // No wildcard present
    351         // Did the definition token match the user token?
    352         if (name != *token_it) {
    353           return matched_tokens;
    354         }
    355       } else {
    356         std::string name_prefix = name.substr(0, wildcard_idx);
    357 
    358         // Did the user token start with the up-to-the-wildcard prefix?
    359         if (!StartsWith(*token_it, name_prefix)) {
    360           return matched_tokens;
    361         }
    362       }
    363 
    364       ++token_it;
    365       ++name_it;
    366       ++matched_tokens;
    367     }
    368 
    369     // If we got this far, it's either a full match or the token list was too short.
    370     return matched_tokens;
    371   }
    372 
    373   // Flatten the token range by joining every adjacent token with the separator character.
    374   // e.g. ["hello", "world"].join('$') == "hello$world"
    375   std::string Join(char separator) const {
    376     TokenList tmp(begin(), end());
    377     return art::Join(tmp, separator);
    378     // TODO: Join should probably take an offset or iterators
    379   }
    380 
    381  private:
    382   static bool StartsWith(const std::string& larger, const std::string& smaller) {
    383     if (larger.size() >= smaller.size()) {
    384       return std::equal(smaller.begin(), smaller.end(), larger.begin());
    385     }
    386 
    387     return false;
    388   }
    389 
    390   template <typename TPredicate>
    391   TokenRange RemoveIf(const TPredicate& predicate) const {
    392     // If any of the tokens in the token lists are empty, then
    393     // we need to remove them and compress the token list into a smaller one.
    394     bool remove = false;
    395     for (auto it = begin_; it != end_; ++it) {
    396       auto&& token = *it;
    397 
    398       if (predicate(token)) {
    399         remove = true;
    400         break;
    401       }
    402     }
    403 
    404     // Actually copy the token list and remove the tokens that don't match our predicate.
    405     if (remove) {
    406       auto token_list = std::make_shared<TokenList>(begin(), end());
    407       TokenList::iterator new_end =
    408           std::remove_if(token_list->begin(), token_list->end(), predicate);
    409       token_list->erase(new_end, token_list->end());
    410 
    411       assert(token_list_->size() > token_list->size() && "Nothing was actually removed!");
    412 
    413       return TokenRange(token_list);
    414     }
    415 
    416     return *this;
    417   }
    418 
    419   const std::shared_ptr<std::vector<std::string>> token_list_;
    420   const iterator begin_;
    421   const iterator end_;
    422 };
    423 }  // namespace art
    424 
    425 #endif  // ART_CMDLINE_TOKEN_RANGE_H_
    426