1 // Copyright (C) 2011 The Libphonenumber Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Author: George Yakovlev 16 // Philippe Liard 17 // 18 // RegExp adapter to allow a pluggable regexp engine. It has been introduced 19 // during the integration of the open-source version of this library into 20 // Chromium to be able to use the ICU Regex engine instead of RE2, which is not 21 // officially supported on Windows. 22 // Since RE2 was initially used in this library, the interface of this adapter 23 // is very close to the subset of the RE2 API used in phonenumberutil.cc. 24 25 #ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ 26 #define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ 27 28 #include <cstddef> 29 #include <string> 30 31 namespace i18n { 32 namespace phonenumbers { 33 34 using std::string; 35 36 // RegExpInput is the interface that abstracts the input that feeds the 37 // Consume() method of RegExp which may differ depending on its various 38 // implementations (StringPiece for RE2, UnicodeString for ICU Regex). 39 class RegExpInput { 40 public: 41 virtual ~RegExpInput() {} 42 43 // Converts to a C++ string. 44 virtual string ToString() const = 0; 45 }; 46 47 // The regular expression abstract class. It supports only functions used in 48 // phonenumberutil.cc. Consume(), Match() and Replace() methods must be 49 // implemented. 50 class RegExp { 51 public: 52 virtual ~RegExp() {} 53 54 // Matches string to regular expression, returns true if expression was 55 // matched, false otherwise, advances position in the match. 56 // input_string - string to be searched. 57 // anchor_at_start - if true, match would be successful only if it appears at 58 // the beginning of the tested region of the string. 59 // matched_string1 - the first string extracted from the match. Can be NULL. 60 // matched_string2 - the second string extracted from the match. Can be NULL. 61 // matched_string3 - the third string extracted from the match. Can be NULL. 62 virtual bool Consume(RegExpInput* input_string, 63 bool anchor_at_start, 64 string* matched_string1, 65 string* matched_string2, 66 string* matched_string3) const = 0; 67 68 // Helper methods calling the Consume method that assume the match must start 69 // at the beginning. 70 inline bool Consume(RegExpInput* input_string, 71 string* matched_string1, 72 string* matched_string2, 73 string* matched_string3) const { 74 return Consume(input_string, true, matched_string1, matched_string2, 75 matched_string3); 76 } 77 78 inline bool Consume(RegExpInput* input_string, 79 string* matched_string1, 80 string* matched_string2) const { 81 return Consume(input_string, true, matched_string1, matched_string2, NULL); 82 } 83 84 inline bool Consume(RegExpInput* input_string, string* matched_string) const { 85 return Consume(input_string, true, matched_string, NULL, NULL); 86 } 87 88 inline bool Consume(RegExpInput* input_string) const { 89 return Consume(input_string, true, NULL, NULL, NULL); 90 } 91 92 // Helper method calling the Consume method that assumes the match can start 93 // at any place in the string. 94 inline bool FindAndConsume(RegExpInput* input_string, 95 string* matched_string) const { 96 return Consume(input_string, false, matched_string, NULL, NULL); 97 } 98 99 // Matches string to regular expression, returns true if the expression was 100 // matched, false otherwise. 101 // input_string - string to be searched. 102 // full_match - if true, match would be successful only if it matches the 103 // complete string. 104 // matched_string - the string extracted from the match. Can be NULL. 105 virtual bool Match(const string& input_string, 106 bool full_match, 107 string* matched_string) const = 0; 108 109 // Helper methods calling the Match method with the right arguments. 110 inline bool PartialMatch(const string& input_string, 111 string* matched_string) const { 112 return Match(input_string, false, matched_string); 113 } 114 115 inline bool PartialMatch(const string& input_string) const { 116 return Match(input_string, false, NULL); 117 } 118 119 inline bool FullMatch(const string& input_string, 120 string* matched_string) const { 121 return Match(input_string, true, matched_string); 122 } 123 124 inline bool FullMatch(const string& input_string) const { 125 return Match(input_string, true, NULL); 126 } 127 128 // Replaces match(es) in 'string_to_process'. If 'global' is true, 129 // replaces all the matches, otherwise only the first match. 130 // replacement_string - text the matches are replaced with. The groups in the 131 // replacement string are referenced with the $[0-9] notation. 132 // Returns true if the pattern matches and a replacement occurs, false 133 // otherwise. 134 virtual bool Replace(string* string_to_process, 135 bool global, 136 const string& replacement_string) const = 0; 137 138 // Helper methods calling the Replace method with the right arguments. 139 inline bool Replace(string* string_to_process, 140 const string& replacement_string) const { 141 return Replace(string_to_process, false, replacement_string); 142 } 143 144 inline bool GlobalReplace(string* string_to_process, 145 const string& replacement_string) const { 146 return Replace(string_to_process, true, replacement_string); 147 } 148 }; 149 150 // Abstract factory class that lets its subclasses instantiate the classes 151 // implementing RegExp and RegExpInput. 152 class AbstractRegExpFactory { 153 public: 154 virtual ~AbstractRegExpFactory() {} 155 156 // Creates a new instance of RegExpInput. The deletion of the returned 157 // instance is under the responsibility of the caller. 158 virtual RegExpInput* CreateInput(const string& utf8_input) const = 0; 159 160 // Creates a new instance of RegExp. The deletion of the returned instance is 161 // under the responsibility of the caller. 162 virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0; 163 }; 164 165 } // namespace phonenumbers 166 } // namespace i18n 167 168 #endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ 169