Home | History | Annotate | Download | only in gn
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "tools/gn/pattern.h"
      6 
      7 #include "tools/gn/value.h"
      8 
      9 const char kPattern_Help[] =
     10     "Patterns\n"
     11     "  Patterns are VERY limited regular expressions that are used in\n"
     12     "  several places.\n"
     13     "\n"
     14     "  Patterns must match the entire input string to be counted as a match.\n"
     15     "  In regular expression parlance, there is an implicit \"^...$\"\n"
     16     "  surrounding your input. If you want to match a substring, you need to\n"
     17     "  use wildcards at the beginning and end.\n"
     18     "\n"
     19     "  There are only two special tokens understood by the pattern matcher.\n"
     20     "  Everything else is a literal.\n"
     21     "\n"
     22     "   * Matches zero or more of any character. It does not depend on the\n"
     23     "     preceeding character (in regular expression parlance it is\n"
     24     "     equivalent to \".*\").\n"
     25     "\n"
     26     "  \\b Matches a path boundary. This will match the beginning or end of\n"
     27     "     a string, or a slash.\n"
     28     "\n"
     29     "Examples\n"
     30     "\n"
     31     "  \"*asdf*\"\n"
     32     "      Matches a string containing \"asdf\" anywhere.\n"
     33     "\n"
     34     "  \"asdf\"\n"
     35     "      Matches only the exact string \"asdf\".\n"
     36     "\n"
     37     "  \"*.cc\"\n"
     38     "      Matches strings ending in the literal \".cc\".\n"
     39     "\n"
     40     "  \"\\bwin/*\"\n"
     41     "      Matches \"win/foo\" and \"foo/win/bar.cc\" but not \"iwin/foo\".\n";
     42 
     43 namespace {
     44 
     45 void ParsePattern(const std::string& s, std::vector<Pattern::Subrange>* out) {
     46   // Set when the last subrange is a literal so we can just append when we
     47   // find another literal.
     48   Pattern::Subrange* last_literal = NULL;
     49 
     50   for (size_t i = 0; i < s.size(); i++) {
     51     if (s[i] == '*') {
     52       // Don't allow two **.
     53       if (out->size() == 0 ||
     54           (*out)[out->size() - 1].type != Pattern::Subrange::ANYTHING)
     55         out->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING));
     56       last_literal = NULL;
     57     } else if (s[i] == '\\') {
     58       if (i < s.size() - 1 && s[i + 1] == 'b') {
     59         // "\b" means path boundary.
     60         i++;
     61         out->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY));
     62         last_literal = NULL;
     63       } else {
     64         // Backslash + anything else means that literal char.
     65         if (!last_literal) {
     66           out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
     67           last_literal = &(*out)[out->size() - 1];
     68         }
     69         if (i < s.size() - 1) {
     70           i++;
     71           last_literal->literal.push_back(s[i]);
     72         } else {
     73           // Single backslash at end, use literal backslash.
     74           last_literal->literal.push_back('\\');
     75         }
     76       }
     77     } else {
     78       if (!last_literal) {
     79         out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
     80         last_literal = &(*out)[out->size() - 1];
     81       }
     82       last_literal->literal.push_back(s[i]);
     83     }
     84   }
     85 }
     86 
     87 }  // namespace
     88 
     89 Pattern::Pattern(const std::string& s) {
     90   ParsePattern(s, &subranges_);
     91   is_suffix_ =
     92       (subranges_.size() == 2 &&
     93        subranges_[0].type == Subrange::ANYTHING &&
     94        subranges_[1].type == Subrange::LITERAL);
     95 }
     96 
     97 Pattern::~Pattern() {
     98 }
     99 
    100 bool Pattern::MatchesString(const std::string& s) const {
    101   // Empty pattern matches only empty string.
    102   if (subranges_.empty())
    103     return s.empty();
    104 
    105   if (is_suffix_) {
    106     const std::string& suffix = subranges_[1].literal;
    107     if (suffix.size() > s.size())
    108       return false;  // Too short.
    109     return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
    110   }
    111 
    112   return RecursiveMatch(s, 0, 0, true);
    113 }
    114 
    115 // We assume the number of ranges is small so recursive is always reasonable.
    116 // Could be optimized to only be recursive for *.
    117 bool Pattern::RecursiveMatch(const std::string& s,
    118                              size_t begin_char,
    119                              size_t subrange_index,
    120                              bool allow_implicit_path_boundary) const {
    121   if (subrange_index >= subranges_.size()) {
    122     // Hit the end of our subranges, the text should also be at the end for a
    123     // match.
    124     return begin_char == s.size();
    125   }
    126 
    127   const Subrange& sr = subranges_[subrange_index];
    128   switch (sr.type) {
    129     case Subrange::LITERAL: {
    130       if (s.size() - begin_char < sr.literal.size())
    131         return false;  // Not enough room.
    132       if (s.compare(begin_char, sr.literal.size(), sr.literal) != 0)
    133         return false;  // Literal doesn't match.
    134 
    135       // Recursively check the next one.
    136       return RecursiveMatch(s, begin_char + sr.literal.size(),
    137                             subrange_index + 1, true);
    138     }
    139 
    140     case Subrange::PATH_BOUNDARY: {
    141       // When we can accept an implicit path boundary, we have to check both
    142       // a match of the literal and the implicit one.
    143       if (allow_implicit_path_boundary &&
    144           (begin_char == 0 || begin_char == s.size())) {
    145         // At implicit path boundary, see if the rest of the pattern matches.
    146         if (RecursiveMatch(s, begin_char, subrange_index + 1, false))
    147           return true;
    148       }
    149 
    150       // Check for a literal "/".
    151       if (begin_char < s.size() && s[begin_char] == '/') {
    152         // At explicit boundary, see if the rest of the pattern matches.
    153         if (RecursiveMatch(s, begin_char + 1, subrange_index + 1, true))
    154           return true;
    155       }
    156       return false;
    157     }
    158 
    159     case Subrange::ANYTHING: {
    160       if (subrange_index == subranges_.size() - 1)
    161         return true;  // * at the end, consider it matching.
    162 
    163       size_t min_next_size = sr.MinSize();
    164 
    165       // We don't care about exactly what matched as long as there was a match,
    166       // so we can do this front-to-back. If we needed the match, we would
    167       // normally want "*" to be greedy so would work backwards.
    168       for (size_t i = begin_char; i < s.size() - min_next_size; i++) {
    169         // Note: this could probably be faster by detecting the type of the
    170         // next match in advance and checking for a match in this loop rather
    171         // than doing a full recursive call for each character.
    172         if (RecursiveMatch(s, i, subrange_index + 1, true))
    173           return true;
    174       }
    175       return false;
    176     }
    177 
    178     default:
    179       NOTREACHED();
    180   }
    181 
    182   return false;
    183 }
    184 
    185 PatternList::PatternList() {
    186 }
    187 
    188 PatternList::~PatternList() {
    189 }
    190 
    191 void PatternList::SetFromValue(const Value& v, Err* err) {
    192   patterns_.clear();
    193 
    194   if (v.type() != Value::LIST) {
    195     *err = Err(v.origin(), "This value must be a list.");
    196     return;
    197   }
    198 
    199   const std::vector<Value>& list = v.list_value();
    200   for (size_t i = 0; i < list.size(); i++) {
    201     if (!list[i].VerifyTypeIs(Value::STRING, err))
    202       return;
    203     patterns_.push_back(Pattern(list[i].string_value()));
    204   }
    205 }
    206 
    207 bool PatternList::MatchesString(const std::string& s) const {
    208   for (size_t i = 0; i < patterns_.size(); i++) {
    209     if (patterns_[i].MatchesString(s))
    210       return true;
    211   }
    212   return false;
    213 }
    214 
    215 bool PatternList::MatchesValue(const Value& v) const {
    216   if (v.type() == Value::STRING)
    217     return MatchesString(v.string_value());
    218   return false;
    219 }
    220