Home | History | Annotate | Download | only in cmdline
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_CMDLINE_TOKEN_RANGE_H_
     18 #define ART_CMDLINE_TOKEN_RANGE_H_
     19 
     20 #include <assert.h>
     21 #include <vector>
     22 #include <string>
     23 #include <algorithm>
     24 #include <memory>
     25 
     26 #include "android-base/strings.h"
     27 
     28 namespace art {
     29 // A range of tokens to make token matching algorithms easier.
     30 //
     31 // We try really hard to avoid copying and store only a pointer and iterators to the
     32 // interiors of the vector, so a typical copy constructor never ends up doing a deep copy.
     33 // It is up to the user to play nice and not to mutate the strings in-place.
     34 //
     35 // Tokens are only copied if a mutating operation is performed (and even then only
     36 // if it *actually* mutates the token).
     37 struct TokenRange {
     38   // Short-hand for a vector of strings. A single string and a token is synonymous.
     39   using TokenList = std::vector<std::string>;
     40 
     41   // Copying-from-vector constructor.
     42   explicit TokenRange(const TokenList& token_list)
     43     : token_list_(new TokenList(token_list)),
     44       begin_(token_list_->begin()),
     45       end_(token_list_->end())
     46   {}
     47 
     48   // Copying-from-iterator constructor
     49   template <typename ForwardIterator>
     50   TokenRange(ForwardIterator it_begin, ForwardIterator it_end)
     51     : token_list_(new TokenList(it_begin, it_end)),
     52       begin_(token_list_->begin()),
     53       end_(token_list_->end())
     54   {}
     55 
     56 #if 0
     57   // Copying-from-vector constructor.
     58   TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED,
     59              TokenList::const_iterator it_begin,
     60              TokenList::const_iterator it_end)
     61     : token_list_(new TokenList(it_begin, it_end)),
     62       begin_(token_list_->begin()),
     63       end_(token_list_->end()) {
     64     assert(it_begin >= token_list.begin());
     65     assert(it_end <= token_list.end());
     66   }
     67 #endif
     68 
     69   // Copying from char array constructor, convertings into tokens (strings) along the way.
     70   TokenRange(const char* token_list[], size_t length)
     71     : token_list_(new TokenList(&token_list[0], &token_list[length])),
     72       begin_(token_list_->begin()),
     73       end_(token_list_->end())
     74   {}
     75 
     76   // Non-copying move-from-vector constructor. Takes over the token vector.
     77   explicit TokenRange(TokenList&& token_list)
     78     : token_list_(new TokenList(std::forward<TokenList>(token_list))),
     79       begin_(token_list_->begin()),
     80       end_(token_list_->end())
     81   {}
     82 
     83   // Non-copying constructor. Retain reference to existing list of tokens.
     84   TokenRange(std::shared_ptr<TokenList> token_list,
     85              TokenList::const_iterator it_begin,
     86              TokenList::const_iterator it_end)
     87     : token_list_(token_list),
     88       begin_(it_begin),
     89       end_(it_end) {
     90     assert(it_begin >= token_list->begin());
     91     assert(it_end <= token_list->end());
     92   }
     93 
     94   // Non-copying copy constructor.
     95   TokenRange(const TokenRange&) = default;
     96 
     97   // Non-copying move constructor.
     98   TokenRange(TokenRange&&) = default;
     99 
    100   // Non-copying constructor. Retains reference to an existing list of tokens, with offset.
    101   explicit TokenRange(std::shared_ptr<TokenList> token_list)
    102     : token_list_(token_list),
    103       begin_(token_list_->begin()),
    104       end_(token_list_->end())
    105   {}
    106 
    107   // Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator.
    108   using iterator = TokenList::const_iterator;
    109 
    110   // Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator.
    111   using const_iterator = iterator;
    112 
    113   // Create a token range by splitting a string. Each separator gets their own token.
    114   // Since the separator are retained as tokens, it might be useful to call
    115   // RemoveToken afterwards.
    116   static TokenRange Split(const std::string& string, std::initializer_list<char> separators) {
    117     TokenList new_token_list;
    118 
    119     std::string tok;
    120     for (auto&& c : string) {
    121       for (char sep : separators) {
    122         if (c == sep) {
    123           // We spotted a separator character.
    124           // Push back everything before the last separator as a new token.
    125           // Push back the separator as a token.
    126           if (!tok.empty()) {
    127             new_token_list.push_back(tok);
    128             tok = "";
    129           }
    130           new_token_list.push_back(std::string() + sep);
    131         } else {
    132           // Build up the token with another character.
    133           tok += c;
    134         }
    135       }
    136     }
    137 
    138     if (!tok.empty()) {
    139       new_token_list.push_back(tok);
    140     }
    141 
    142     return TokenRange(std::move(new_token_list));
    143   }
    144 
    145   // A RandomAccessIterator to the first element in this range.
    146   iterator begin() const {
    147     return begin_;
    148   }
    149 
    150   // A RandomAccessIterator to one past the last element in this range.
    151   iterator end() const {
    152     return end_;
    153   }
    154 
    155   // The size of the range, i.e. how many tokens are in it.
    156   size_t Size() const {
    157     return std::distance(begin_, end_);
    158   }
    159 
    160   // Are there 0 tokens in this range?
    161   bool IsEmpty() const {
    162     return Size() > 0;
    163   }
    164 
    165   // Look up a token by it's offset.
    166   const std::string& GetToken(size_t offset) const {
    167     assert(offset < Size());
    168     return *(begin_ + offset);
    169   }
    170 
    171   // Does this token range equal the other range?
    172   // Equality is defined as having both the same size, and
    173   // each corresponding token being equal.
    174   bool operator==(const TokenRange& other) const {
    175     if (this == &other) {
    176       return true;
    177     }
    178 
    179     if (Size() != other.Size()) {
    180       return false;
    181     }
    182 
    183     return std::equal(begin(), end(), other.begin());
    184   }
    185 
    186   // Look up the token at the requested index.
    187   const std::string& operator[](int index) const {
    188     assert(index >= 0 && static_cast<size_t>(index) < Size());
    189     return *(begin() + index);
    190   }
    191 
    192   // Does this current range start with the other range?
    193   bool StartsWith(const TokenRange& other) const {
    194     if (this == &other) {
    195       return true;
    196     }
    197 
    198     if (Size() < other.Size()) {
    199       return false;
    200     }
    201 
    202     auto& smaller = Size() < other.Size() ? *this : other;
    203     auto& greater = Size() < other.Size() ? other : *this;
    204 
    205     return std::equal(smaller.begin(), smaller.end(), greater.begin());
    206   }
    207 
    208   // Remove all characters 'c' from each token, potentially copying the underlying tokens.
    209   TokenRange RemoveCharacter(char c) const {
    210     TokenList new_token_list(begin(), end());
    211 
    212     bool changed = false;
    213     for (auto&& token : new_token_list) {
    214       auto it = std::remove_if(token.begin(), token.end(), [&](char ch) {
    215         if (ch == c) {
    216           changed = true;
    217           return true;
    218         }
    219         return false;
    220       });
    221       token.erase(it, token.end());
    222     }
    223 
    224     if (!changed) {
    225       return *this;
    226     }
    227 
    228     return TokenRange(std::move(new_token_list));
    229   }
    230 
    231   // Remove all tokens matching this one, potentially copying the underlying tokens.
    232   TokenRange RemoveToken(const std::string& token) {
    233     return RemoveIf([&](const std::string& tok) { return tok == token; });
    234   }
    235 
    236   // Discard all empty tokens, potentially copying the underlying tokens.
    237   TokenRange DiscardEmpty() const {
    238     return RemoveIf([](const std::string& token) { return token.empty(); });
    239   }
    240 
    241   // Create a non-copying subset of this range.
    242   // Length is trimmed so that the Slice does not go out of range.
    243   TokenRange Slice(size_t offset, size_t length = std::string::npos) const {
    244     assert(offset < Size());
    245 
    246     if (length != std::string::npos && offset + length > Size()) {
    247       length = Size() - offset;
    248     }
    249 
    250     iterator it_end;
    251     if (length == std::string::npos) {
    252       it_end = end();
    253     } else {
    254       it_end = begin() + offset + length;
    255     }
    256 
    257     return TokenRange(token_list_, begin() + offset, it_end);
    258   }
    259 
    260   // Try to match the string with tokens from this range.
    261   // Each token is used to match exactly once (after which the next token is used, and so on).
    262   // The matching happens from left-to-right in a non-greedy fashion.
    263   // If the currently-matched token is the wildcard, then the new outputted token will
    264   // contain as much as possible until the next token is matched.
    265   //
    266   // For example, if this == ["a:", "_", "b:] and "_" is the match string, then
    267   // MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"]
    268   //
    269   // Since the string matching can fail (e.g. ["foo"] against "bar"), then this
    270   // function can fail, in which cause it will return null.
    271   std::unique_ptr<TokenRange> MatchSubstrings(const std::string& string,
    272                                               const std::string& wildcard) const {
    273     TokenList new_token_list;
    274 
    275     size_t wildcard_idx = std::string::npos;
    276     size_t string_idx = 0;
    277 
    278     // Function to push all the characters matched as a wildcard so far
    279     // as a brand new token. It resets the wildcard matching.
    280     // Empty wildcards are possible and ok, but only if wildcard matching was on.
    281     auto maybe_push_wildcard_token = [&]() {
    282       if (wildcard_idx != std::string::npos) {
    283         size_t wildcard_length = string_idx - wildcard_idx;
    284         std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length);
    285         new_token_list.push_back(std::move(wildcard_substr));
    286 
    287         wildcard_idx = std::string::npos;
    288       }
    289     };
    290 
    291     for (iterator it = begin(); it != end(); ++it) {
    292       const std::string& tok = *it;
    293 
    294       if (tok == wildcard) {
    295         maybe_push_wildcard_token();
    296         wildcard_idx = string_idx;
    297         continue;
    298       }
    299 
    300       size_t next_token_idx = string.find(tok);
    301       if (next_token_idx == std::string::npos) {
    302         // Could not find token at all
    303         return nullptr;
    304       } else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) {
    305         // Found the token at a non-starting location, and we weren't
    306         // trying to parse the wildcard.
    307         return nullptr;
    308       }
    309 
    310       new_token_list.push_back(string.substr(next_token_idx, tok.size()));
    311       maybe_push_wildcard_token();
    312       string_idx += tok.size();
    313     }
    314 
    315     size_t remaining = string.size() - string_idx;
    316     if (remaining > 0) {
    317       if (wildcard_idx == std::string::npos) {
    318         // Some characters were still remaining in the string,
    319         // but it wasn't trying to match a wildcard.
    320         return nullptr;
    321       }
    322     }
    323 
    324     // If some characters are remaining, the rest must be a wildcard.
    325     string_idx += remaining;
    326     maybe_push_wildcard_token();
    327 
    328     return std::unique_ptr<TokenRange>(new TokenRange(std::move(new_token_list)));
    329   }
    330 
    331   // Do a quick match token-by-token, and see if they match.
    332   // Any tokens with a wildcard in them are only matched up until the wildcard.
    333   // If this is true, then the wildcard matching later on can still fail, so this is not
    334   // a guarantee that the argument is correct, it's more of a strong hint that the
    335   // user-provided input *probably* was trying to match this argument.
    336   //
    337   // Returns how many tokens were either matched (or ignored because there was a
    338   // wildcard present). 0 means no match. If the size() tokens are returned.
    339   size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const {
    340     auto token_it = token_list.begin();
    341     auto token_end = token_list.end();
    342     auto name_it = begin();
    343     auto name_end = end();
    344 
    345     size_t matched_tokens = 0;
    346 
    347     while (token_it != token_end && name_it != name_end) {
    348       // Skip token matching when the corresponding name has a wildcard in it.
    349       const std::string& name = *name_it;
    350 
    351       size_t wildcard_idx = name.find(wildcard);
    352       if (wildcard_idx == std::string::npos) {  // No wildcard present
    353         // Did the definition token match the user token?
    354         if (name != *token_it) {
    355           return matched_tokens;
    356         }
    357       } else {
    358         std::string name_prefix = name.substr(0, wildcard_idx);
    359 
    360         // Did the user token start with the up-to-the-wildcard prefix?
    361         if (!StartsWith(*token_it, name_prefix)) {
    362           return matched_tokens;
    363         }
    364       }
    365 
    366       ++token_it;
    367       ++name_it;
    368       ++matched_tokens;
    369     }
    370 
    371     // If we got this far, it's either a full match or the token list was too short.
    372     return matched_tokens;
    373   }
    374 
    375   // Flatten the token range by joining every adjacent token with the separator character.
    376   // e.g. ["hello", "world"].join('$') == "hello$world"
    377   std::string Join(char separator) const {
    378     TokenList tmp(begin(), end());
    379     return android::base::Join(tmp, separator);
    380     // TODO: Join should probably take an offset or iterators
    381   }
    382 
    383  private:
    384   static bool StartsWith(const std::string& larger, const std::string& smaller) {
    385     if (larger.size() >= smaller.size()) {
    386       return std::equal(smaller.begin(), smaller.end(), larger.begin());
    387     }
    388 
    389     return false;
    390   }
    391 
    392   template <typename TPredicate>
    393   TokenRange RemoveIf(const TPredicate& predicate) const {
    394     // If any of the tokens in the token lists are empty, then
    395     // we need to remove them and compress the token list into a smaller one.
    396     bool remove = false;
    397     for (auto it = begin_; it != end_; ++it) {
    398       auto&& token = *it;
    399 
    400       if (predicate(token)) {
    401         remove = true;
    402         break;
    403       }
    404     }
    405 
    406     // Actually copy the token list and remove the tokens that don't match our predicate.
    407     if (remove) {
    408       auto token_list = std::make_shared<TokenList>(begin(), end());
    409       TokenList::iterator new_end =
    410           std::remove_if(token_list->begin(), token_list->end(), predicate);
    411       token_list->erase(new_end, token_list->end());
    412 
    413       assert(token_list_->size() > token_list->size() && "Nothing was actually removed!");
    414 
    415       return TokenRange(token_list);
    416     }
    417 
    418     return *this;
    419   }
    420 
    421   const std::shared_ptr<std::vector<std::string>> token_list_;
    422   const iterator begin_;
    423   const iterator end_;
    424 };
    425 }  // namespace art
    426 
    427 #endif  // ART_CMDLINE_TOKEN_RANGE_H_
    428