Home | History | Annotate | Download | only in phonenumbers
      1 // Copyright (C) 2011 The Libphonenumber Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 // http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // Author: George Yakovlev
     16 //         Philippe Liard
     17 
     18 #include "phonenumbers/regexp_adapter_re2.h"
     19 
     20 #include <cstddef>
     21 #include <string>
     22 
     23 #include <re2/re2.h>
     24 #include <re2/stringpiece.h>
     25 
     26 #include "phonenumbers/base/basictypes.h"
     27 #include "phonenumbers/base/logging.h"
     28 #include "phonenumbers/stringutil.h"
     29 
     30 namespace i18n {
     31 namespace phonenumbers {
     32 
     33 using re2::StringPiece;
     34 
     35 // Implementation of RegExpInput abstract class.
     36 class RE2RegExpInput : public RegExpInput {
     37  public:
     38   explicit RE2RegExpInput(const string& utf8_input)
     39       : string_(utf8_input),
     40         utf8_input_(string_) {}
     41 
     42   virtual string ToString() const {
     43     return utf8_input_.ToString();
     44   }
     45 
     46   StringPiece* Data() {
     47     return &utf8_input_;
     48   }
     49 
     50  private:
     51   // string_ holds the string referenced by utf8_input_ as StringPiece doesn't
     52   // copy the string passed in.
     53   const string string_;
     54   StringPiece utf8_input_;
     55 };
     56 
     57 namespace {
     58 
     59 template <typename Function, typename Input>
     60 bool DispatchRE2Call(Function regex_function,
     61                      Input input,
     62                      const RE2& regexp,
     63                      string* out1,
     64                      string* out2,
     65                      string* out3) {
     66   if (out3) {
     67     return regex_function(input, regexp, out1, out2, out3);
     68   }
     69   if (out2) {
     70     return regex_function(input, regexp, out1, out2);
     71   }
     72   if (out1) {
     73     return regex_function(input, regexp, out1);
     74   }
     75   return regex_function(input, regexp);
     76 }
     77 
     78 // Replaces unescaped dollar-signs with backslashes. Backslashes are deleted
     79 // when they escape dollar-signs.
     80 string TransformRegularExpressionToRE2Syntax(const string& regex) {
     81   string re2_regex(regex);
     82   if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) {
     83     return regex;
     84   }
     85   // If we replaced a dollar sign with a backslash and there are now two
     86   // backslashes in the string, we assume that the dollar-sign was previously
     87   // escaped and that we need to retain it. To do this, we replace pairs of
     88   // backslashes with a dollar sign.
     89   GlobalReplaceSubstring("\\\\", "$", &re2_regex);
     90   return re2_regex;
     91 }
     92 
     93 }  // namespace
     94 
     95 // Implementation of RegExp abstract class.
     96 class RE2RegExp : public RegExp {
     97  public:
     98   explicit RE2RegExp(const string& utf8_regexp)
     99       : utf8_regexp_(utf8_regexp) {}
    100 
    101   virtual bool Consume(RegExpInput* input_string,
    102                        bool anchor_at_start,
    103                        string* matched_string1,
    104                        string* matched_string2,
    105                        string* matched_string3) const {
    106     DCHECK(input_string);
    107     StringPiece* utf8_input =
    108         static_cast<RE2RegExpInput*>(input_string)->Data();
    109 
    110     if (anchor_at_start) {
    111       return DispatchRE2Call(RE2::Consume, utf8_input, utf8_regexp_,
    112                              matched_string1, matched_string2,
    113                              matched_string3);
    114     } else {
    115       return DispatchRE2Call(RE2::FindAndConsume, utf8_input, utf8_regexp_,
    116                              matched_string1, matched_string2,
    117                              matched_string3);
    118     }
    119   }
    120 
    121   virtual bool Match(const string& input_string,
    122                      bool full_match,
    123                      string* matched_string) const {
    124     if (full_match) {
    125       return DispatchRE2Call(RE2::FullMatch, input_string, utf8_regexp_,
    126                              matched_string, NULL, NULL);
    127     } else {
    128       return DispatchRE2Call(RE2::PartialMatch, input_string, utf8_regexp_,
    129                              matched_string, NULL, NULL);
    130     }
    131   }
    132 
    133   virtual bool Replace(string* string_to_process,
    134                        bool global,
    135                        const string& replacement_string) const {
    136     DCHECK(string_to_process);
    137     const string re2_replacement_string =
    138         TransformRegularExpressionToRE2Syntax(replacement_string);
    139     if (global) {
    140       return RE2::GlobalReplace(string_to_process, utf8_regexp_,
    141                                 re2_replacement_string);
    142     } else {
    143       return RE2::Replace(string_to_process, utf8_regexp_,
    144                           re2_replacement_string);
    145     }
    146   }
    147 
    148  private:
    149   RE2 utf8_regexp_;
    150 };
    151 
    152 RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const {
    153   return new RE2RegExpInput(utf8_input);
    154 }
    155 
    156 RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const {
    157   return new RE2RegExp(utf8_regexp);
    158 }
    159 
    160 }  // namespace phonenumbers
    161 }  // namespace i18n
    162