1 // Copyright (C) 2011 The Libphonenumber Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Author: George Yakovlev 16 // Philippe Liard 17 18 #include "phonenumbers/regexp_adapter_re2.h" 19 20 #include <cstddef> 21 #include <string> 22 23 #include <re2/re2.h> 24 #include <re2/stringpiece.h> 25 26 #include "phonenumbers/base/basictypes.h" 27 #include "phonenumbers/base/logging.h" 28 #include "phonenumbers/stringutil.h" 29 30 namespace i18n { 31 namespace phonenumbers { 32 33 using re2::StringPiece; 34 35 // Implementation of RegExpInput abstract class. 36 class RE2RegExpInput : public RegExpInput { 37 public: 38 explicit RE2RegExpInput(const string& utf8_input) 39 : string_(utf8_input), 40 utf8_input_(string_) {} 41 42 virtual string ToString() const { 43 return utf8_input_.ToString(); 44 } 45 46 StringPiece* Data() { 47 return &utf8_input_; 48 } 49 50 private: 51 // string_ holds the string referenced by utf8_input_ as StringPiece doesn't 52 // copy the string passed in. 53 const string string_; 54 StringPiece utf8_input_; 55 }; 56 57 namespace { 58 59 template <typename Function, typename Input> 60 bool DispatchRE2Call(Function regex_function, 61 Input input, 62 const RE2& regexp, 63 string* out1, 64 string* out2, 65 string* out3) { 66 if (out3) { 67 return regex_function(input, regexp, out1, out2, out3); 68 } 69 if (out2) { 70 return regex_function(input, regexp, out1, out2); 71 } 72 if (out1) { 73 return regex_function(input, regexp, out1); 74 } 75 return regex_function(input, regexp); 76 } 77 78 // Replaces unescaped dollar-signs with backslashes. Backslashes are deleted 79 // when they escape dollar-signs. 80 string TransformRegularExpressionToRE2Syntax(const string& regex) { 81 string re2_regex(regex); 82 if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) { 83 return regex; 84 } 85 // If we replaced a dollar sign with a backslash and there are now two 86 // backslashes in the string, we assume that the dollar-sign was previously 87 // escaped and that we need to retain it. To do this, we replace pairs of 88 // backslashes with a dollar sign. 89 GlobalReplaceSubstring("\\\\", "$", &re2_regex); 90 return re2_regex; 91 } 92 93 } // namespace 94 95 // Implementation of RegExp abstract class. 96 class RE2RegExp : public RegExp { 97 public: 98 explicit RE2RegExp(const string& utf8_regexp) 99 : utf8_regexp_(utf8_regexp) {} 100 101 virtual bool Consume(RegExpInput* input_string, 102 bool anchor_at_start, 103 string* matched_string1, 104 string* matched_string2, 105 string* matched_string3) const { 106 DCHECK(input_string); 107 StringPiece* utf8_input = 108 static_cast<RE2RegExpInput*>(input_string)->Data(); 109 110 if (anchor_at_start) { 111 return DispatchRE2Call(RE2::Consume, utf8_input, utf8_regexp_, 112 matched_string1, matched_string2, 113 matched_string3); 114 } else { 115 return DispatchRE2Call(RE2::FindAndConsume, utf8_input, utf8_regexp_, 116 matched_string1, matched_string2, 117 matched_string3); 118 } 119 } 120 121 virtual bool Match(const string& input_string, 122 bool full_match, 123 string* matched_string) const { 124 if (full_match) { 125 return DispatchRE2Call(RE2::FullMatch, input_string, utf8_regexp_, 126 matched_string, NULL, NULL); 127 } else { 128 return DispatchRE2Call(RE2::PartialMatch, input_string, utf8_regexp_, 129 matched_string, NULL, NULL); 130 } 131 } 132 133 virtual bool Replace(string* string_to_process, 134 bool global, 135 const string& replacement_string) const { 136 DCHECK(string_to_process); 137 const string re2_replacement_string = 138 TransformRegularExpressionToRE2Syntax(replacement_string); 139 if (global) { 140 return RE2::GlobalReplace(string_to_process, utf8_regexp_, 141 re2_replacement_string); 142 } else { 143 return RE2::Replace(string_to_process, utf8_regexp_, 144 re2_replacement_string); 145 } 146 } 147 148 private: 149 RE2 utf8_regexp_; 150 }; 151 152 RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const { 153 return new RE2RegExpInput(utf8_input); 154 } 155 156 RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const { 157 return new RE2RegExp(utf8_regexp); 158 } 159 160 } // namespace phonenumbers 161 } // namespace i18n 162