Home | History | Annotate | Download | only in detail
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
     18 #define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
     19 
     20 #include <assert.h>
     21 #include <algorithm>
     22 #include <functional>
     23 #include <memory>
     24 #include <numeric>
     25 #include <type_traits>
     26 #include <vector>
     27 
     28 #include "android-base/strings.h"
     29 
     30 #include "cmdline_parse_result.h"
     31 #include "cmdline_types.h"
     32 #include "token_range.h"
     33 #include "unit.h"
     34 
     35 namespace art {
     36 // Implementation details for the parser. Do not look inside if you hate templates.
     37 namespace detail {
     38 // A non-templated base class for argument parsers. Used by the general parser
     39 // to parse arguments, without needing to know the argument type at compile time.
     40 //
     41 // This is an application of the type erasure idiom.
     42 struct CmdlineParseArgumentAny {
     43   virtual ~CmdlineParseArgumentAny() {}
     44 
     45   // Attempt to parse this argument starting at arguments[position].
     46   // If the parsing succeeds, the parsed value will be saved as a side-effect.
     47   //
     48   // In most situations, the parsing will not match by returning kUnknown. In this case,
     49   // no tokens were consumed and the position variable will not be updated.
     50   //
     51   // At other times, parsing may fail due to validation but the initial token was still matched
     52   // (for example an out of range value, or passing in a string where an int was expected).
     53   // In this case the tokens are still consumed, and the position variable will get incremented
     54   // by all the consumed tokens.
     55   //
     56   // The # of tokens consumed by the parse attempt will be set as an out-parameter into
     57   // consumed_tokens. The parser should skip this many tokens before parsing the next
     58   // argument.
     59   virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0;
     60   // How many tokens should be taken off argv for parsing this argument.
     61   // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
     62   //
     63   // A [min,max] range is returned to represent argument definitions with multiple
     64   // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
     65   virtual std::pair<size_t, size_t> GetNumTokens() const = 0;
     66   // Get the run-time typename of the argument type.
     67   virtual const char* GetTypeName() const = 0;
     68   // Try to do a close match, returning how many tokens were matched against this argument
     69   // definition. More tokens is better.
     70   //
     71   // Do a quick match token-by-token, and see if they match.
     72   // Any tokens with a wildcard in them are only matched up until the wildcard.
     73   // If this is true, then the wildcard matching later on can still fail, so this is not
     74   // a guarantee that the argument is correct, it's more of a strong hint that the
     75   // user-provided input *probably* was trying to match this argument.
     76   //
     77   // Returns how many tokens were either matched (or ignored because there was a
     78   // wildcard present). 0 means no match. If the Size() tokens are returned.
     79   virtual size_t MaybeMatches(const TokenRange& tokens) = 0;
     80 };
     81 
     82 template <typename T>
     83 using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>;
     84 
     85 template <typename T>
     86 using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>;
     87 
     88 // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder.
     89 template <typename TArg>
     90 struct CmdlineParserArgumentInfo {
     91   // This version will only be used if TArg is arithmetic and thus has the <= operators.
     92   template <typename T = TArg>  // Necessary to get SFINAE to kick in.
     93   bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = 0) {
     94     if (has_range_) {
     95       return min_ <= value && value <= max_;
     96     }
     97     return true;
     98   }
     99 
    100   // This version will be used at other times when TArg is not arithmetic.
    101   template <typename T = TArg>
    102   bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = 0) {
    103     assert(!has_range_);
    104     return true;
    105   }
    106 
    107   // Do a quick match token-by-token, and see if they match.
    108   // Any tokens with a wildcard in them only match the prefix up until the wildcard.
    109   //
    110   // If this is true, then the wildcard matching later on can still fail, so this is not
    111   // a guarantee that the argument is correct, it's more of a strong hint that the
    112   // user-provided input *probably* was trying to match this argument.
    113   size_t MaybeMatches(const TokenRange& token_list) const {
    114     auto best_match = FindClosestMatch(token_list);
    115 
    116     return best_match.second;
    117   }
    118 
    119   // Attempt to find the closest match (see MaybeMatches).
    120   //
    121   // Returns the token range that was the closest match and the # of tokens that
    122   // this range was matched up until.
    123   std::pair<const TokenRange*, size_t> FindClosestMatch(const TokenRange& token_list) const {
    124     const TokenRange* best_match_ptr = nullptr;
    125 
    126     size_t best_match = 0;
    127     for (auto&& token_range : tokenized_names_) {
    128       size_t this_match = token_range.MaybeMatches(token_list, std::string("_"));
    129 
    130       if (this_match > best_match) {
    131         best_match_ptr = &token_range;
    132         best_match = this_match;
    133       }
    134     }
    135 
    136     return std::make_pair(best_match_ptr, best_match);
    137   }
    138 
    139   // Mark the argument definition as completed, do not mutate the object anymore after this
    140   // call is done.
    141   //
    142   // Performs several sanity checks and token calculations.
    143   void CompleteArgument() {
    144     assert(names_.size() >= 1);
    145     assert(!is_completed_);
    146 
    147     is_completed_ = true;
    148 
    149     size_t blank_count = 0;
    150     size_t token_count = 0;
    151 
    152     size_t global_blank_count = 0;
    153     size_t global_token_count = 0;
    154     for (auto&& name : names_) {
    155       std::string s(name);
    156 
    157       size_t local_blank_count = std::count(s.begin(), s.end(), '_');
    158       size_t local_token_count = std::count(s.begin(), s.end(), ' ');
    159 
    160       if (global_blank_count != 0) {
    161         assert(local_blank_count == global_blank_count
    162                && "Every argument descriptor string must have same amount of blanks (_)");
    163       }
    164 
    165       if (local_blank_count != 0) {
    166         global_blank_count = local_blank_count;
    167         blank_count++;
    168 
    169         assert(local_blank_count == 1 && "More than one blank is not supported");
    170         assert(s.back() == '_' && "The blank character must only be at the end of the string");
    171       }
    172 
    173       if (global_token_count != 0) {
    174         assert(local_token_count == global_token_count
    175                && "Every argument descriptor string must have same amount of tokens (spaces)");
    176       }
    177 
    178       if (local_token_count != 0) {
    179         global_token_count = local_token_count;
    180         token_count++;
    181       }
    182 
    183       // Tokenize every name, turning it from a string to a token list.
    184       tokenized_names_.clear();
    185       for (auto&& name1 : names_) {
    186         // Split along ' ' only, removing any duplicated spaces.
    187         tokenized_names_.push_back(
    188             TokenRange::Split(name1, {' '}).RemoveToken(" "));
    189       }
    190 
    191       // remove the _ character from each of the token ranges
    192       // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""]
    193       // and this is OK because we still need an empty token to simplify
    194       // range comparisons
    195       simple_names_.clear();
    196 
    197       for (auto&& tokenized_name : tokenized_names_) {
    198         simple_names_.push_back(tokenized_name.RemoveCharacter('_'));
    199       }
    200     }
    201 
    202     if (token_count != 0) {
    203       assert(("Every argument descriptor string must have equal amount of tokens (spaces)" &&
    204           token_count == names_.size()));
    205     }
    206 
    207     if (blank_count != 0) {
    208       assert(("Every argument descriptor string must have an equal amount of blanks (_)" &&
    209           blank_count == names_.size()));
    210     }
    211 
    212     using_blanks_ = blank_count > 0;
    213     {
    214       size_t smallest_name_token_range_size =
    215           std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u),
    216                           [](size_t min, const TokenRange& cur) {
    217                             return std::min(min, cur.Size());
    218                           });
    219       size_t largest_name_token_range_size =
    220           std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u,
    221                           [](size_t max, const TokenRange& cur) {
    222                             return std::max(max, cur.Size());
    223                           });
    224 
    225       token_range_size_ = std::make_pair(smallest_name_token_range_size,
    226                                          largest_name_token_range_size);
    227     }
    228 
    229     if (has_value_list_) {
    230       assert(names_.size() == value_list_.size()
    231              && "Number of arg descriptors must match number of values");
    232       assert(!has_value_map_);
    233     }
    234     if (has_value_map_) {
    235       if (!using_blanks_) {
    236         assert(names_.size() == value_map_.size() &&
    237                "Since no blanks were specified, each arg is mapped directly into a mapped "
    238                "value without parsing; sizes must match");
    239       }
    240 
    241       assert(!has_value_list_);
    242     }
    243 
    244     if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) {
    245       assert((has_value_map_ || has_value_list_) &&
    246              "Arguments without a blank (_) must provide either a value map or a value list");
    247     }
    248 
    249     TypedCheck();
    250   }
    251 
    252   // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}.
    253   std::vector<const char*> names_;
    254   // Is there at least 1 wildcard '_' in the argument definition?
    255   bool using_blanks_ = false;
    256   // [min, max] token counts in each arg def
    257   std::pair<size_t, size_t> token_range_size_;
    258 
    259   // contains all the names in a tokenized form, i.e. as a space-delimited list
    260   std::vector<TokenRange> tokenized_names_;
    261 
    262   // contains the tokenized names, but with the _ character stripped
    263   std::vector<TokenRange> simple_names_;
    264 
    265   // For argument definitions created with '.AppendValues()'
    266   // Meaning that parsing should mutate the existing value in-place if possible.
    267   bool appending_values_ = false;
    268 
    269   // For argument definitions created with '.WithRange(min, max)'
    270   bool has_range_ = false;
    271   TArg min_;
    272   TArg max_;
    273 
    274   // For argument definitions created with '.WithValueMap'
    275   bool has_value_map_ = false;
    276   std::vector<std::pair<const char*, TArg>> value_map_;
    277 
    278   // For argument definitions created with '.WithValues'
    279   bool has_value_list_ = false;
    280   std::vector<TArg> value_list_;
    281 
    282   // Make sure there's a default constructor.
    283   CmdlineParserArgumentInfo() = default;
    284 
    285   // Ensure there's a default move constructor.
    286   CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default;
    287 
    288  private:
    289   // Perform type-specific checks at runtime.
    290   template <typename T = TArg>
    291   void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) {
    292     assert(!using_blanks_ &&
    293            "Blanks are not supported in Unit arguments; since a Unit has no parse-able value");
    294   }
    295 
    296   void TypedCheck() {}
    297 
    298   bool is_completed_ = false;
    299 };
    300 
    301 // A virtual-implementation of the necessary argument information in order to
    302 // be able to parse arguments.
    303 template <typename TArg>
    304 struct CmdlineParseArgument : CmdlineParseArgumentAny {
    305   CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info,
    306                        std::function<void(TArg&)>&& save_argument,
    307                        std::function<TArg&(void)>&& load_argument)
    308       : argument_info_(std::forward<decltype(argument_info)>(argument_info)),
    309         save_argument_(std::forward<decltype(save_argument)>(save_argument)),
    310         load_argument_(std::forward<decltype(load_argument)>(load_argument)) {
    311   }
    312 
    313   using UserTypeInfo = CmdlineType<TArg>;
    314 
    315   virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) {
    316     assert(arguments.Size() > 0);
    317     assert(consumed_tokens != nullptr);
    318 
    319     auto closest_match_res = argument_info_.FindClosestMatch(arguments);
    320     size_t best_match_size = closest_match_res.second;
    321     const TokenRange* best_match_arg_def = closest_match_res.first;
    322 
    323     if (best_match_size > arguments.Size()) {
    324       // The best match has more tokens than were provided.
    325       // Shouldn't happen in practice since the outer parser does this check.
    326       return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch");
    327     }
    328 
    329     assert(best_match_arg_def != nullptr);
    330     *consumed_tokens = best_match_arg_def->Size();
    331 
    332     if (!argument_info_.using_blanks_) {
    333       return ParseArgumentSingle(arguments.Join(' '));
    334     }
    335 
    336     // Extract out the blank value from arguments
    337     // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar"
    338     std::string blank_value = "";
    339     size_t idx = 0;
    340     for (auto&& def_token : *best_match_arg_def) {
    341       auto&& arg_token = arguments[idx];
    342 
    343       // Does this definition-token have a wildcard in it?
    344       if (def_token.find('_') == std::string::npos) {
    345         // No, regular token. Match 1:1 against the argument token.
    346         bool token_match = def_token == arg_token;
    347 
    348         if (!token_match) {
    349           return CmdlineResult(CmdlineResult::kFailure,
    350                                std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
    351                                + " at token " + std::to_string(idx));
    352         }
    353       } else {
    354         // This is a wild-carded token.
    355         TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'});
    356 
    357         // Extract the wildcard contents out of the user-provided arg_token.
    358         std::unique_ptr<TokenRange> arg_matches =
    359             def_split_wildcards.MatchSubstrings(arg_token, "_");
    360         if (arg_matches == nullptr) {
    361           return CmdlineResult(CmdlineResult::kFailure,
    362                                std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
    363                                + ", with a wildcard pattern " + def_token
    364                                + " at token " + std::to_string(idx));
    365         }
    366 
    367         // Get the corresponding wildcard tokens from arg_matches,
    368         // and concatenate it to blank_value.
    369         for (size_t sub_idx = 0;
    370             sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) {
    371           if (def_split_wildcards[sub_idx] == "_") {
    372             blank_value += arg_matches->GetToken(sub_idx);
    373           }
    374         }
    375       }
    376 
    377       ++idx;
    378     }
    379 
    380     return ParseArgumentSingle(blank_value);
    381   }
    382 
    383  private:
    384   virtual CmdlineResult ParseArgumentSingle(const std::string& argument) {
    385     // TODO: refactor to use LookupValue for the value lists/maps
    386 
    387     // Handle the 'WithValueMap(...)' argument definition
    388     if (argument_info_.has_value_map_) {
    389       for (auto&& value_pair : argument_info_.value_map_) {
    390         const char* name = value_pair.first;
    391 
    392         if (argument == name) {
    393           return SaveArgument(value_pair.second);
    394         }
    395       }
    396 
    397       // Error case: Fail, telling the user what the allowed values were.
    398       std::vector<std::string> allowed_values;
    399       for (auto&& value_pair : argument_info_.value_map_) {
    400         const char* name = value_pair.first;
    401         allowed_values.push_back(name);
    402       }
    403 
    404       std::string allowed_values_flat = android::base::Join(allowed_values, ',');
    405       return CmdlineResult(CmdlineResult::kFailure,
    406                            "Argument value '" + argument + "' does not match any of known valid"
    407                             "values: {" + allowed_values_flat + "}");
    408     }
    409 
    410     // Handle the 'WithValues(...)' argument definition
    411     if (argument_info_.has_value_list_) {
    412       size_t arg_def_idx = 0;
    413       for (auto&& value : argument_info_.value_list_) {
    414         auto&& arg_def_token = argument_info_.names_[arg_def_idx];
    415 
    416         if (arg_def_token == argument) {
    417           return SaveArgument(value);
    418         }
    419         ++arg_def_idx;
    420       }
    421 
    422       assert(arg_def_idx + 1 == argument_info_.value_list_.size() &&
    423              "Number of named argument definitions must match number of values defined");
    424 
    425       // Error case: Fail, telling the user what the allowed values were.
    426       std::vector<std::string> allowed_values;
    427       for (auto&& arg_name : argument_info_.names_) {
    428         allowed_values.push_back(arg_name);
    429       }
    430 
    431       std::string allowed_values_flat = android::base::Join(allowed_values, ',');
    432       return CmdlineResult(CmdlineResult::kFailure,
    433                            "Argument value '" + argument + "' does not match any of known valid"
    434                             "values: {" + allowed_values_flat + "}");
    435     }
    436 
    437     // Handle the regular case where we parsed an unknown value from a blank.
    438     UserTypeInfo type_parser;
    439 
    440     if (argument_info_.appending_values_) {
    441       TArg& existing = load_argument_();
    442       CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing);
    443 
    444       assert(!argument_info_.has_range_);
    445 
    446       return result;
    447     }
    448 
    449     CmdlineParseResult<TArg> result = type_parser.Parse(argument);
    450 
    451     if (result.IsSuccess()) {
    452       TArg& value = result.GetValue();
    453 
    454       // Do a range check for 'WithRange(min,max)' argument definition.
    455       if (!argument_info_.CheckRange(value)) {
    456         return CmdlineParseResult<TArg>::OutOfRange(
    457             value, argument_info_.min_, argument_info_.max_);
    458       }
    459 
    460       return SaveArgument(value);
    461     }
    462 
    463     // Some kind of type-specific parse error. Pass the result as-is.
    464     CmdlineResult raw_result = std::move(result);
    465     return raw_result;
    466   }
    467 
    468  public:
    469   virtual const char* GetTypeName() const {
    470     // TODO: Obviate the need for each type specialization to hardcode the type name
    471     return UserTypeInfo::Name();
    472   }
    473 
    474   // How many tokens should be taken off argv for parsing this argument.
    475   // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
    476   //
    477   // A [min,max] range is returned to represent argument definitions with multiple
    478   // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
    479   virtual std::pair<size_t, size_t> GetNumTokens() const {
    480     return argument_info_.token_range_size_;
    481   }
    482 
    483   // See if this token range might begin the same as the argument definition.
    484   virtual size_t MaybeMatches(const TokenRange& tokens) {
    485     return argument_info_.MaybeMatches(tokens);
    486   }
    487 
    488  private:
    489   CmdlineResult SaveArgument(const TArg& value) {
    490     assert(!argument_info_.appending_values_
    491            && "If the values are being appended, then the updated parse value is "
    492                "updated by-ref as a side effect and shouldn't be stored directly");
    493     TArg val = value;
    494     save_argument_(val);
    495     return CmdlineResult(CmdlineResult::kSuccess);
    496   }
    497 
    498   CmdlineParserArgumentInfo<TArg> argument_info_;
    499   std::function<void(TArg&)> save_argument_;
    500   std::function<TArg&(void)> load_argument_;
    501 };
    502 }  // namespace detail  // NOLINT [readability/namespace] [5]
    503 }  // namespace art
    504 
    505 #endif  // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
    506