Home | History | Annotate | Download | only in phonenumbers
      1 // Copyright (C) 2011 The Libphonenumber Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 // http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Author: Lara Rennie
     16 // Author: Tao Huang
     17 //
     18 // This is a direct port from PhoneNumberMatcher.java.
     19 // Changes to this class should also happen to the Java version, whenever it
     20 // makes sense.
     21 
     22 #ifndef I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
     23 #define I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
     24 
     25 #include <string>
     26 #include <vector>
     27 
     28 #include "phonenumbers/base/basictypes.h"
     29 #include "phonenumbers/base/memory/scoped_ptr.h"
     30 #include "phonenumbers/callback.h"
     31 #include "phonenumbers/regexp_adapter.h"
     32 
     33 namespace i18n {
     34 namespace phonenumbers {
     35 
     36 template <class R, class A1, class A2, class A3, class A4>
     37     class ResultCallback4;
     38 
     39 using std::string;
     40 using std::vector;
     41 
     42 class AlternateFormats;
     43 class NumberFormat;
     44 class PhoneNumber;
     45 class PhoneNumberMatch;
     46 class PhoneNumberMatcherRegExps;
     47 class PhoneNumberUtil;
     48 
     49 class PhoneNumberMatcher {
     50   friend class PhoneNumberMatcherTest;
     51  public:
     52   // Leniency when finding potential phone numbers in text segments. The levels
     53   // here are ordered in increasing strictness.
     54   enum Leniency {
     55     // Phone numbers accepted are possible, but not necessarily valid.
     56     POSSIBLE,
     57     // Phone numbers accepted are possible and valid.
     58     VALID,
     59     // Phone numbers accepted are valid and are grouped in a possible way for
     60     // this locale. For example, a US number written as "65 02 53 00 00" is not
     61     // accepted at this leniency level, whereas "650 253 0000" or "6502530000"
     62     // are. Numbers with more than one '/' symbol are also dropped at this
     63     // level.
     64     // Warning: The next two levels might result in lower coverage especially
     65     // for regions outside of country code "+1". If you are not sure about which
     66     // level to use, you can send an e-mail to the discussion group
     67     // http://groups.google.com/group/libphonenumber-discuss/
     68     STRICT_GROUPING,
     69     // Phone numbers accepted are valid and are grouped in the same way that we
     70     // would have formatted it, or as a single block. For example, a US number
     71     // written as "650 2530000" is not accepted at this leniency level, whereas
     72     // "650 253 0000" or "6502530000" are.
     73     EXACT_GROUPING,
     74   };
     75 
     76   // Constructs a phone number matcher.
     77   PhoneNumberMatcher(const PhoneNumberUtil& util,
     78                      const string& text,
     79                      const string& region_code,
     80                      Leniency leniency,
     81                      int max_tries);
     82 
     83   // Wrapper to construct a phone number matcher, with no limitation on the
     84   // number of retries and VALID Leniency.
     85   PhoneNumberMatcher(const string& text,
     86                      const string& region_code);
     87 
     88   ~PhoneNumberMatcher();
     89 
     90   // Returns true if the text sequence has another match.
     91   bool HasNext();
     92 
     93   // Gets next match from text sequence.
     94   bool Next(PhoneNumberMatch* match);
     95 
     96  private:
     97   // The potential states of a PhoneNumberMatcher.
     98   enum State {
     99     NOT_READY,
    100     READY,
    101     DONE,
    102   };
    103 
    104   // Attempts to extract a match from a candidate string. Returns true if a
    105   // match is found, otherwise returns false. The value "offset" refers to the
    106   // start index of the candidate string within the overall text.
    107   bool Find(int index, PhoneNumberMatch* match);
    108 
    109   // Checks a number was formatted with a national prefix, if the number was
    110   // found in national format, and a national prefix is required for that
    111   // number. Returns false if the number needed to have a national prefix and
    112   // none was found.
    113   bool IsNationalPrefixPresentIfRequired(const PhoneNumber& number) const;
    114 
    115   // Attempts to extract a match from candidate. Returns true if the match was
    116   // found, otherwise returns false.
    117   bool ExtractMatch(const string& candidate, int offset,
    118                     PhoneNumberMatch* match);
    119 
    120   // Attempts to extract a match from a candidate string if the whole candidate
    121   // does not qualify as a match. Returns true if a match is found, otherwise
    122   // returns false.
    123   bool ExtractInnerMatch(const string& candidate, int offset,
    124                          PhoneNumberMatch* match);
    125 
    126   // Parses a phone number from the candidate using PhoneNumberUtil::Parse() and
    127   // verifies it matches the requested leniency. If parsing and verification
    128   // succeed, returns true, otherwise this method returns false;
    129   bool ParseAndVerify(const string& candidate, int offset,
    130                       PhoneNumberMatch* match);
    131 
    132   bool CheckNumberGroupingIsValid(
    133     const PhoneNumber& phone_number,
    134     const string& candidate,
    135     ResultCallback4<bool, const PhoneNumberUtil&, const PhoneNumber&,
    136                     const string&, const vector<string>&>* checker) const;
    137 
    138   void GetNationalNumberGroups(
    139       const PhoneNumber& number,
    140       const NumberFormat* formatting_pattern,
    141       vector<string>* digit_blocks) const;
    142 
    143   bool AllNumberGroupsAreExactlyPresent(
    144       const PhoneNumberUtil& util,
    145       const PhoneNumber& phone_number,
    146       const string& normalized_candidate,
    147       const vector<string>& formatted_number_groups) const;
    148 
    149   bool VerifyAccordingToLeniency(Leniency leniency, const PhoneNumber& number,
    150                                  const string& candidate) const;
    151 
    152   // In interface for testing purposes.
    153   static bool ContainsMoreThanOneSlashInNationalNumber(
    154       const PhoneNumber& number,
    155       const string& candidate,
    156       const PhoneNumberUtil& util);
    157 
    158   // Helper method to determine if a character is a Latin-script letter or not.
    159   // For our purposes, combining marks should also return true since we assume
    160   // they have been added to a preceding Latin character.
    161   static bool IsLatinLetter(char32 letter);
    162 
    163   // Helper class holding useful regular expressions.
    164   const PhoneNumberMatcherRegExps* reg_exps_;
    165 
    166   // Helper class holding loaded data containing alternate ways phone numbers
    167   // might be formatted for certain regions.
    168   const AlternateFormats* alternate_formats_;
    169 
    170   // The phone number utility;
    171   const PhoneNumberUtil& phone_util_;
    172 
    173   // The text searched for phone numbers;
    174   const string text_;
    175 
    176   // The region(country) to assume for phone numbers without an international
    177   // prefix.
    178   const string preferred_region_;
    179 
    180   // The degree of validation requested.
    181   Leniency leniency_;
    182 
    183   // The maximum number of retries after matching an invalid number.
    184   int max_tries_;
    185 
    186   // The iteration tristate.
    187   State state_;
    188 
    189   // The last successful match, NULL unless in State.READY.
    190   scoped_ptr<PhoneNumberMatch> last_match_;
    191 
    192   // The next index to start searching at. Undefined in State.DONE.
    193   int search_index_;
    194 
    195   DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher);
    196 };
    197 
    198 }  // namespace phonenumbers
    199 }  // namespace i18n
    200 
    201 #endif  // I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
    202