Home | History | Annotate | Download | only in phonenumbers
      1 // Copyright (C) 2011 The Libphonenumber Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 // http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // Author: George Yakovlev
     16 //         Philippe Liard
     17 //
     18 // RegExp adapter to allow a pluggable regexp engine. It has been introduced
     19 // during the integration of the open-source version of this library into
     20 // Chromium to be able to use the ICU Regex engine instead of RE2, which is not
     21 // officially supported on Windows.
     22 // Since RE2 was initially used in this library, the interface of this adapter
     23 // is very close to the subset of the RE2 API used in phonenumberutil.cc.
     24 
     25 #ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
     26 #define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
     27 
     28 #include <cstddef>
     29 #include <string>
     30 
     31 namespace i18n {
     32 namespace phonenumbers {
     33 
     34 using std::string;
     35 
     36 // RegExpInput is the interface that abstracts the input that feeds the
     37 // Consume() method of RegExp which may differ depending on its various
     38 // implementations (StringPiece for RE2, UnicodeString for ICU Regex).
     39 class RegExpInput {
     40  public:
     41   virtual ~RegExpInput() {}
     42 
     43   // Converts to a C++ string.
     44   virtual string ToString() const = 0;
     45 };
     46 
     47 // The regular expression abstract class. It supports only functions used in
     48 // phonenumberutil.cc. Consume(), Match() and Replace() methods must be
     49 // implemented.
     50 class RegExp {
     51  public:
     52   virtual ~RegExp() {}
     53 
     54   // Matches string to regular expression, returns true if expression was
     55   // matched, false otherwise, advances position in the match.
     56   // input_string - string to be searched.
     57   // anchor_at_start - if true, match would be successful only if it appears at
     58   // the beginning of the tested region of the string.
     59   // matched_string1 - the first string extracted from the match. Can be NULL.
     60   // matched_string2 - the second string extracted from the match. Can be NULL.
     61   // matched_string3 - the third string extracted from the match. Can be NULL.
     62   virtual bool Consume(RegExpInput* input_string,
     63                        bool anchor_at_start,
     64                        string* matched_string1,
     65                        string* matched_string2,
     66                        string* matched_string3) const = 0;
     67 
     68   // Helper methods calling the Consume method that assume the match must start
     69   // at the beginning.
     70   inline bool Consume(RegExpInput* input_string,
     71                       string* matched_string1,
     72                       string* matched_string2,
     73                       string* matched_string3) const {
     74     return Consume(input_string, true, matched_string1, matched_string2,
     75                    matched_string3);
     76   }
     77 
     78   inline bool Consume(RegExpInput* input_string,
     79                       string* matched_string1,
     80                       string* matched_string2) const {
     81     return Consume(input_string, true, matched_string1, matched_string2, NULL);
     82   }
     83 
     84   inline bool Consume(RegExpInput* input_string, string* matched_string) const {
     85     return Consume(input_string, true, matched_string, NULL, NULL);
     86   }
     87 
     88   inline bool Consume(RegExpInput* input_string) const {
     89     return Consume(input_string, true, NULL, NULL, NULL);
     90   }
     91 
     92   // Helper method calling the Consume method that assumes the match can start
     93   // at any place in the string.
     94   inline bool FindAndConsume(RegExpInput* input_string,
     95                              string* matched_string) const {
     96     return Consume(input_string, false, matched_string, NULL, NULL);
     97   }
     98 
     99   // Matches string to regular expression, returns true if the expression was
    100   // matched, false otherwise.
    101   // input_string - string to be searched.
    102   // full_match - if true, match would be successful only if it matches the
    103   // complete string.
    104   // matched_string - the string extracted from the match. Can be NULL.
    105   virtual bool Match(const string& input_string,
    106                      bool full_match,
    107                      string* matched_string) const = 0;
    108 
    109   // Helper methods calling the Match method with the right arguments.
    110   inline bool PartialMatch(const string& input_string,
    111                            string* matched_string) const {
    112     return Match(input_string, false, matched_string);
    113   }
    114 
    115   inline bool PartialMatch(const string& input_string) const {
    116     return Match(input_string, false, NULL);
    117   }
    118 
    119   inline bool FullMatch(const string& input_string,
    120                         string* matched_string) const {
    121     return Match(input_string, true, matched_string);
    122   }
    123 
    124   inline bool FullMatch(const string& input_string) const {
    125     return Match(input_string, true, NULL);
    126   }
    127 
    128   // Replaces match(es) in 'string_to_process'. If 'global' is true,
    129   // replaces all the matches, otherwise only the first match.
    130   // replacement_string - text the matches are replaced with. The groups in the
    131   // replacement string are referenced with the $[0-9] notation.
    132   // Returns true if the pattern matches and a replacement occurs, false
    133   // otherwise.
    134   virtual bool Replace(string* string_to_process,
    135                        bool global,
    136                        const string& replacement_string) const = 0;
    137 
    138   // Helper methods calling the Replace method with the right arguments.
    139   inline bool Replace(string* string_to_process,
    140                       const string& replacement_string) const {
    141     return Replace(string_to_process, false, replacement_string);
    142   }
    143 
    144   inline bool GlobalReplace(string* string_to_process,
    145                             const string& replacement_string) const {
    146     return Replace(string_to_process, true, replacement_string);
    147   }
    148 };
    149 
    150 // Abstract factory class that lets its subclasses instantiate the classes
    151 // implementing RegExp and RegExpInput.
    152 class AbstractRegExpFactory {
    153  public:
    154   virtual ~AbstractRegExpFactory() {}
    155 
    156   // Creates a new instance of RegExpInput. The deletion of the returned
    157   // instance is under the responsibility of the caller.
    158   virtual RegExpInput* CreateInput(const string& utf8_input) const = 0;
    159 
    160   // Creates a new instance of RegExp. The deletion of the returned instance is
    161   // under the responsibility of the caller.
    162   virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0;
    163 };
    164 
    165 }  // namespace phonenumbers
    166 }  // namespace i18n
    167 
    168 #endif  // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
    169