Home | History | Annotate | Download | only in phonenumbers
      1 // Copyright (C) 2009 The Libphonenumber Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 // http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // Utility for international phone numbers.
     16 //
     17 // Author: Shaopeng Jia
     18 // Open-sourced by: Philippe Liard
     19 
     20 #ifndef I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
     21 #define I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
     22 
     23 #include <stddef.h>
     24 #include <list>
     25 #include <map>
     26 #include <set>
     27 #include <string>
     28 #include <utility>
     29 #include <vector>
     30 
     31 #include "phonenumbers/base/basictypes.h"
     32 #include "phonenumbers/base/memory/scoped_ptr.h"
     33 #include "phonenumbers/base/memory/singleton.h"
     34 #include "phonenumbers/phonenumber.pb.h"
     35 
     36 class TelephoneNumber;
     37 
     38 namespace i18n {
     39 namespace phonenumbers {
     40 
     41 using std::list;
     42 using std::map;
     43 using std::pair;
     44 using std::set;
     45 using std::string;
     46 using std::vector;
     47 
     48 using google::protobuf::RepeatedPtrField;
     49 
     50 class AsYouTypeFormatter;
     51 class Logger;
     52 class NumberFormat;
     53 class PhoneMetadata;
     54 class PhoneNumberRegExpsAndMappings;
     55 class RegExp;
     56 
     57 // NOTE: A lot of methods in this class require Region Code strings. These must
     58 // be provided using ISO 3166-1 two-letter country-code format. The list of the
     59 // codes can be found here:
     60 // http://www.iso.org/iso/english_country_names_and_code_elements
     61 
     62 class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
     63  private:
     64   friend class AsYouTypeFormatter;
     65   friend class PhoneNumberMatcher;
     66   friend class PhoneNumberMatcherRegExps;
     67   friend class PhoneNumberMatcherTest;
     68   friend class PhoneNumberRegExpsAndMappings;
     69   friend class PhoneNumberUtilTest;
     70   friend class ShortNumberUtil;
     71   friend class ShortNumberUtilTest;
     72   friend class Singleton<PhoneNumberUtil>;
     73 
     74  public:
     75   ~PhoneNumberUtil();
     76   static const char kRegionCodeForNonGeoEntity[];
     77 
     78   // INTERNATIONAL and NATIONAL formats are consistent with the definition
     79   // in ITU-T Recommendation E. 123. For example, the number of the Google
     80   // Zrich office will be written as "+41 44 668 1800" in INTERNATIONAL
     81   // format, and as "044 668 1800" in NATIONAL format. E164 format is as per
     82   // INTERNATIONAL format but with no formatting applied e.g. "+41446681800".
     83   // RFC3966 is as per INTERNATIONAL format, but with all spaces and other
     84   // separating symbols replaced with a hyphen, and with any phone number
     85   // extension appended with ";ext=". It also will have a prefix of "tel:"
     86   // added, e.g. "tel:+41-44-668-1800".
     87   enum PhoneNumberFormat {
     88     E164,
     89     INTERNATIONAL,
     90     NATIONAL,
     91     RFC3966
     92   };
     93 
     94   // Type of phone numbers.
     95   enum PhoneNumberType {
     96     FIXED_LINE,
     97     MOBILE,
     98     // In some regions (e.g. the USA), it is impossible to distinguish between
     99     // fixed-line and mobile numbers by looking at the phone number itself.
    100     FIXED_LINE_OR_MOBILE,
    101     // Freephone lines
    102     TOLL_FREE,
    103     PREMIUM_RATE,
    104     // The cost of this call is shared between the caller and the recipient, and
    105     // is hence typically less than PREMIUM_RATE calls. See
    106     // http://en.wikipedia.org/wiki/Shared_Cost_Service for more information.
    107     SHARED_COST,
    108     // Voice over IP numbers. This includes TSoIP (Telephony Service over IP).
    109     VOIP,
    110     // A personal number is associated with a particular person, and may be
    111     // routed to either a MOBILE or FIXED_LINE number. Some more information can
    112     // be found here: http://en.wikipedia.org/wiki/Personal_Numbers
    113     PERSONAL_NUMBER,
    114     PAGER,
    115     // Used for "Universal Access Numbers" or "Company Numbers". They may be
    116     // further routed to specific offices, but allow one number to be used for a
    117     // company.
    118     UAN,
    119     // Used for "Voice Mail Access Numbers".
    120     VOICEMAIL,
    121     // A phone number is of type UNKNOWN when it does not fit any of the known
    122     // patterns for a specific region.
    123     UNKNOWN
    124   };
    125 
    126   // Types of phone number matches. See detailed description beside the
    127   // IsNumberMatch() method.
    128   enum MatchType {
    129     INVALID_NUMBER,  // NOT_A_NUMBER in the java version.
    130     NO_MATCH,
    131     SHORT_NSN_MATCH,
    132     NSN_MATCH,
    133     EXACT_MATCH,
    134   };
    135 
    136   enum ErrorType {
    137     NO_PARSING_ERROR,
    138     INVALID_COUNTRY_CODE_ERROR,  // INVALID_COUNTRY_CODE in the java version.
    139     NOT_A_NUMBER,
    140     TOO_SHORT_AFTER_IDD,
    141     TOO_SHORT_NSN,
    142     TOO_LONG_NSN,  // TOO_LONG in the java version.
    143   };
    144 
    145   // Possible outcomes when testing if a PhoneNumber is possible.
    146   enum ValidationResult {
    147     IS_POSSIBLE,
    148     INVALID_COUNTRY_CODE,
    149     TOO_SHORT,
    150     TOO_LONG,
    151   };
    152 
    153   // Convenience method to get a list of what regions the library has metadata
    154   // for.
    155   void GetSupportedRegions(set<string>* regions) const;
    156 
    157   // Populates a list with the region codes that match the specific country
    158   // calling code. For non-geographical country calling codes, the region code
    159   // 001 is returned. Also, in the case of no region code being found, the list
    160   // is left unchanged.
    161   void GetRegionCodesForCountryCallingCode(
    162       int country_calling_code,
    163       list<string>* region_codes) const;
    164 
    165   // Gets a PhoneNumberUtil instance to carry out international phone number
    166   // formatting, parsing, or validation. The instance is loaded with phone
    167   // number metadata for a number of most commonly used regions, as specified by
    168   // DEFAULT_REGIONS_.
    169   //
    170   // The PhoneNumberUtil is implemented as a singleton. Therefore, calling
    171   // GetInstance multiple times will only result in one instance being created.
    172   static PhoneNumberUtil* GetInstance();
    173 
    174   // Returns true if the number is a valid vanity (alpha) number such as 800
    175   // MICROSOFT. A valid vanity number will start with at least 3 digits and will
    176   // have three or more alpha characters. This does not do region-specific
    177   // checks - to work out if this number is actually valid for a region, it
    178   // should be parsed and methods such as IsPossibleNumberWithReason or
    179   // IsValidNumber should be used.
    180   bool IsAlphaNumber(const string& number) const;
    181 
    182   // Converts all alpha characters in a number to their respective digits on
    183   // a keypad, but retains existing formatting.
    184   void ConvertAlphaCharactersInNumber(string* number) const;
    185 
    186   // Normalizes a string of characters representing a phone number. This
    187   // converts wide-ascii and arabic-indic numerals to European numerals, and
    188   // strips punctuation and alpha characters.
    189   void NormalizeDigitsOnly(string* number) const;
    190 
    191   // Gets the national significant number of a phone number. Note a national
    192   // significant number doesn't contain a national prefix or any formatting.
    193   void GetNationalSignificantNumber(const PhoneNumber& number,
    194                                     string* national_significant_num) const;
    195 
    196   // Gets the length of the geographical area code from the PhoneNumber object
    197   // passed in, so that clients could use it to split a national significant
    198   // number into geographical area code and subscriber number. It works in such
    199   // a way that the resultant subscriber number should be diallable, at least on
    200   // some devices. An example of how this could be used:
    201   //
    202   // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance());
    203   // PhoneNumber number;
    204   // phone_util.Parse("16502530000", "US", &number);
    205   // string national_significant_number;
    206   // phone_util.GetNationalSignificantNumber(number,
    207   //                                         &national_significant_number);
    208   // string area_code;
    209   // string subscriber_number;
    210   //
    211   // int area_code_length = phone_util.GetLengthOfGeographicalAreaCode(number);
    212   // if (area_code_length > 0) {
    213   //   area_code = national_significant_number.substr(0, area_code_length);
    214   //   subscriber_number = national_significant_number.substr(
    215   //       area_code_length, string::npos);
    216   // else {
    217   //   area_code = "";
    218   //   subscriber_number = national_significant_number;
    219   // }
    220   //
    221   // N.B.: area code is a very ambiguous concept, so the authors generally
    222   // recommend against using it for most purposes, but recommend using the
    223   // more general national_number instead. Read the following carefully before
    224   // deciding to use this method:
    225   //
    226   //  - geographical area codes change over time, and this method honors those
    227   //    changes; therefore, it doesn't guarantee the stability of the result it
    228   //    produces.
    229   //  - subscriber numbers may not be diallable from all devices (notably mobile
    230   //    devices, which typically requires the full national_number to be dialled
    231   //    in most regions).
    232   //  - most non-geographical numbers have no area codes, including numbers
    233   //    from non-geographical entities.
    234   //  - some geographical numbers have no area codes.
    235   int GetLengthOfGeographicalAreaCode(const PhoneNumber& number) const;
    236 
    237   // Gets the length of the national destination code (NDC) from the PhoneNumber
    238   // object passed in, so that clients could use it to split a national
    239   // significant number into NDC and subscriber number. The NDC of a phone
    240   // number is normally the first group of digit(s) right after the country
    241   // calling code when the number is formatted in the international format, if
    242   // there is a subscriber number part that follows. An example of how this
    243   // could be used:
    244   //
    245   // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance());
    246   // PhoneNumber number;
    247   // phone_util.Parse("16502530000", "US", &number);
    248   // string national_significant_number;
    249   // phone_util.GetNationalSignificantNumber(number,
    250   //                                         &national_significant_number);
    251   // string national_destination_code;
    252   // string subscriber_number;
    253   //
    254   // int national_destination_code_length =
    255   //     phone_util.GetLengthOfGeographicalAreaCode(number);
    256   // if (national_destination_code_length > 0) {
    257   //   national_destination_code = national_significant_number.substr(
    258   //       0, national_destination_code_length);
    259   //   subscriber_number = national_significant_number.substr(
    260   //       national_destination_code_length, string::npos);
    261   // else {
    262   //   national_destination_code = "";
    263   //   subscriber_number = national_significant_number;
    264   // }
    265   //
    266   // Refer to the unittests to see the difference between this function and
    267   // GetLengthOfGeographicalAreaCode().
    268   int GetLengthOfNationalDestinationCode(const PhoneNumber& number) const;
    269 
    270   // Formats a phone number in the specified format using default rules. Note
    271   // that this does not promise to produce a phone number that the user can
    272   // dial from where they are - although we do format in either NATIONAL or
    273   // INTERNATIONAL format depending on what the client asks for, we do not
    274   // currently support a more abbreviated format, such as for users in the
    275   // same area who could potentially dial the number without area code.
    276   void Format(const PhoneNumber& number,
    277               PhoneNumberFormat number_format,
    278               string* formatted_number) const;
    279 
    280   // Formats a phone number in the specified format using client-defined
    281   // formatting rules.
    282   void FormatByPattern(
    283       const PhoneNumber& number,
    284       PhoneNumberFormat number_format,
    285       const RepeatedPtrField<NumberFormat>& user_defined_formats,
    286       string* formatted_number) const;
    287 
    288   // Formats a phone number in national format for dialing using the carrier as
    289   // specified in the carrier_code. The carrier_code will always be used
    290   // regardless of whether the phone number already has a preferred domestic
    291   // carrier code stored. If carrier_code contains an empty string, return the
    292   // number in national format without any carrier code.
    293   void FormatNationalNumberWithCarrierCode(const PhoneNumber& number,
    294                                            const string& carrier_code,
    295                                            string* formatted_number) const;
    296 
    297   // Formats a phone number in national format for dialing using the carrier as
    298   // specified in the preferred_domestic_carrier_code field of the PhoneNumber
    299   // object passed in. If that is missing, use the fallback_carrier_code passed
    300   // in instead. If there is no preferred_domestic_carrier_code, and the
    301   // fallback_carrier_code contains an empty string, return the number in
    302   // national format without any carrier code.
    303   //
    304   // Use FormatNationalNumberWithCarrierCode instead if the carrier code passed
    305   // in should take precedence over the number's preferred_domestic_carrier_code
    306   // when formatting.
    307   void FormatNationalNumberWithPreferredCarrierCode(
    308       const PhoneNumber& number,
    309       const string& fallback_carrier_code,
    310       string* formatted_number) const;
    311 
    312   // Returns a number formatted in such a way that it can be dialed from a
    313   // mobile phone in a specific region. If the number cannot be reached from
    314   // the region (e.g. some countries block toll-free numbers from being called
    315   // outside of the country), the method returns an empty string.
    316   void FormatNumberForMobileDialing(
    317       const PhoneNumber& number,
    318       const string& region_calling_from,
    319       bool with_formatting,
    320       string* formatted_number) const;
    321 
    322   // Formats a phone number for out-of-country dialing purposes.
    323   //
    324   // Note this function takes care of the case for calling inside of NANPA
    325   // and between Russia and Kazakhstan (who share the same country calling
    326   // code). In those cases, no international prefix is used. For regions which
    327   // have multiple international prefixes, the number in its INTERNATIONAL
    328   // format will be returned instead.
    329   void FormatOutOfCountryCallingNumber(
    330       const PhoneNumber& number,
    331       const string& calling_from,
    332       string* formatted_number) const;
    333 
    334   // Formats a phone number using the original phone number format that the
    335   // number is parsed from. The original format is embedded in the
    336   // country_code_source field of the PhoneNumber object passed in. If such
    337   // information is missing, the number will be formatted into the NATIONAL
    338   // format by default. When the number is an invalid number, the method returns
    339   // the raw input when it is available.
    340   void FormatInOriginalFormat(const PhoneNumber& number,
    341                               const string& region_calling_from,
    342                               string* formatted_number) const;
    343 
    344   // Formats a phone number for out-of-country dialing purposes.
    345   //
    346   // Note that in this version, if the number was entered originally using alpha
    347   // characters and this version of the number is stored in raw_input, this
    348   // representation of the number will be used rather than the digit
    349   // representation. Grouping information, as specified by characters such as
    350   // "-" and " ", will be retained.
    351   //
    352   // Caveats:
    353   // 1) This will not produce good results if the country calling code is both
    354   // present in the raw input _and_ is the start of the national number. This
    355   // is not a problem in the regions which typically use alpha numbers.
    356   // 2) This will also not produce good results if the raw input has any
    357   // grouping information within the first three digits of the national number,
    358   // and if the function needs to strip preceding digits/words in the raw input
    359   // before these digits. Normally people group the first three digits together
    360   // so this is not a huge problem - and will be fixed if it proves to be so.
    361   void FormatOutOfCountryKeepingAlphaChars(
    362       const PhoneNumber& number,
    363       const string& calling_from,
    364       string* formatted_number) const;
    365 
    366   // Attempts to extract a valid number from a phone number that is too long to
    367   // be valid, and resets the PhoneNumber object passed in to that valid
    368   // version. If no valid number could be extracted, the PhoneNumber object
    369   // passed in will not be modified. It returns true if a valid phone number can
    370   // be successfully extracted.
    371   bool TruncateTooLongNumber(PhoneNumber* number) const;
    372 
    373   // Gets the type of a phone number.
    374   PhoneNumberType GetNumberType(const PhoneNumber& number) const;
    375 
    376   // Tests whether a phone number matches a valid pattern. Note this doesn't
    377   // verify the number is actually in use, which is impossible to tell by just
    378   // looking at a number itself.
    379   bool IsValidNumber(const PhoneNumber& number) const;
    380 
    381   // Tests whether a phone number is valid for a certain region. Note this
    382   // doesn't verify the number is actually in use, which is impossible to tell
    383   // by just looking at a number itself. If the country calling code is not the
    384   // same as the country calling code for the region, this immediately exits
    385   // with false.  After this, the specific number pattern rules for the region
    386   // are examined.
    387   // This is useful for determining for example whether a particular number is
    388   // valid for Canada, rather than just a valid NANPA number.
    389   // Warning: In most cases, you want to use IsValidNumber instead. For
    390   // example, this method will mark numbers from British Crown dependencies
    391   // such as the Isle of Man as invalid for the region "GB" (United Kingdom),
    392   // since it has its own region code, "IM", which may be undesirable.
    393   bool IsValidNumberForRegion(
    394       const PhoneNumber& number,
    395       const string& region_code) const;
    396 
    397   // Returns the region where a phone number is from. This could be used for
    398   // geo-coding at the region level.
    399   void GetRegionCodeForNumber(const PhoneNumber& number,
    400                               string* region_code) const;
    401 
    402   // Returns the country calling code for a specific region. For example,
    403   // this would be 1 for the United States, and 64 for New Zealand.
    404   int GetCountryCodeForRegion(const string& region_code) const;
    405 
    406   // Returns the region code that matches the specific country code. Note that
    407   // it is possible that several regions share the same country calling code
    408   // (e.g. US and Canada), and in that case, only one of the regions (normally
    409   // the one with the largest population) is returned.
    410   void GetRegionCodeForCountryCode(int country_code, string* region_code) const;
    411 
    412   // Checks if this is a region under the North American Numbering Plan
    413   // Administration (NANPA).
    414   bool IsNANPACountry(const string& region_code) const;
    415 
    416   // Returns the national dialling prefix for a specific region. For example,
    417   // this would be 1 for the United States, and 0 for New Zealand. Set
    418   // strip_non_digits to true to strip symbols like "~" (which indicates a wait
    419   // for a dialling tone) from the prefix returned. If no national prefix is
    420   // present, we return an empty string.
    421   void GetNddPrefixForRegion(const string& region_code,
    422                              bool strip_non_digits,
    423                              string* national_prefix) const;
    424 
    425   // Checks whether a phone number is a possible number. It provides a more
    426   // lenient check than IsValidNumber() in the following sense:
    427   //   1. It only checks the length of phone numbers. In particular, it doesn't
    428   //      check starting digits of the number.
    429   //   2. It doesn't attempt to figure out the type of the number, but uses
    430   //      general rules which applies to all types of phone numbers in a
    431   //      region. Therefore, it is much faster than IsValidNumber().
    432   //   3. For fixed line numbers, many regions have the concept of area code,
    433   //      which together with subscriber number constitute the national
    434   //      significant number. It is sometimes okay to dial the subscriber
    435   //      number only when dialing in the same area. This function will return
    436   //      true if the subscriber-number-only version is passed in. On the other
    437   //      hand, because IsValidNumber() validates using information on both
    438   //      starting digits (for fixed line numbers, that would most likely be
    439   //      area codes) and length (obviously includes the length of area codes
    440   //      for fixed line numbers), it will return false for the
    441   //      subscriber-number-only version.
    442   ValidationResult IsPossibleNumberWithReason(const PhoneNumber& number) const;
    443 
    444   // Convenience wrapper around IsPossibleNumberWithReason. Instead of returning
    445   // the reason for failure, this method returns a boolean value.
    446   bool IsPossibleNumber(const PhoneNumber& number) const;
    447 
    448   // Checks whether a phone number is a possible number given a number in the
    449   // form of a string, and the country where the number could be dialed from.
    450   // It provides a more lenient check than IsValidNumber(). See
    451   // IsPossibleNumber(const PhoneNumber& number) for details.
    452   //
    453   // This method first parses the number, then invokes
    454   // IsPossibleNumber(const PhoneNumber& number) with the resultant PhoneNumber
    455   // object.
    456   //
    457   // region_dialing_from represents the region that we are expecting the number
    458   // to be dialed from. Note this is different from the region where the number
    459   // belongs. For example, the number +1 650 253 0000 is a number that belongs
    460   // to US. When written in this form, it could be dialed from any region. When
    461   // it is written as 00 1 650 253 0000, it could be dialed from any region
    462   // which uses an international dialling prefix of 00. When it is written as
    463   // 650 253 0000, it could only be dialed from within the US, and when written
    464   // as 253 0000, it could only be dialed from within a smaller area in the US
    465   // (Mountain View, CA, to be more specific).
    466   bool IsPossibleNumberForString(
    467       const string& number,
    468       const string& region_dialing_from) const;
    469 
    470   // Gets a valid fixed-line number for the specified region. Returns false if
    471   // the region was unknown, or the region 001 is passed in. For 001
    472   // (representing non-geographical numbers), call
    473   // GetExampleNumberForNonGeoEntity instead.
    474   bool GetExampleNumber(const string& region_code,
    475                         PhoneNumber* number) const;
    476 
    477   // Gets a valid number of the specified type for the specified region.
    478   // Returns false if the region was unknown or 001, or if no example number of
    479   // that type could be found. For 001 (representing non-geographical numbers),
    480   // call GetExampleNumberForNonGeoEntity instead.
    481   bool GetExampleNumberForType(const string& region_code,
    482                                PhoneNumberType type,
    483                                PhoneNumber* number) const;
    484 
    485   // Gets a valid number for the specified country calling code for a
    486   // non-geographical entity. Returns false if the metadata does not contain
    487   // such information, or the country calling code passed in does not belong to
    488   // a non-geographical entity.
    489   bool GetExampleNumberForNonGeoEntity(
    490       int country_calling_code, PhoneNumber* number) const;
    491 
    492   // Parses a string and returns it in proto buffer format. This method will
    493   // return an error like INVALID_COUNTRY_CODE if the number is not considered
    494   // to be a possible number, and NO_PARSING_ERROR if it parsed correctly. Note
    495   // that validation of whether the number is actually a valid number for a
    496   // particular region is not performed. This can be done separately with
    497   // IsValidNumber().
    498   //
    499   // number_to_parse can also be provided in RFC3966 format.
    500   //
    501   // default_region represents the country that we are expecting the number to
    502   // be from. This is only used if the number being parsed is not written in
    503   // international format. The country_code for the number in this case would be
    504   // stored as that of the default country supplied. If the number is guaranteed
    505   // to start with a '+' followed by the country calling code, then
    506   // "ZZ" can be supplied.
    507   ErrorType Parse(const string& number_to_parse,
    508                   const string& default_region,
    509                   PhoneNumber* number) const;
    510   // Parses a string and returns it in proto buffer format. This method differs
    511   // from Parse() in that it always populates the raw_input field of the
    512   // protocol buffer with number_to_parse as well as the country_code_source
    513   // field.
    514   ErrorType ParseAndKeepRawInput(const string& number_to_parse,
    515                                  const string& default_region,
    516                                  PhoneNumber* number) const;
    517 
    518   // Takes two phone numbers and compares them for equality.
    519   //
    520   // Returns EXACT_MATCH if the country calling code, NSN, presence of a leading
    521   // zero for Italian numbers and any extension present are the same.
    522   // Returns NSN_MATCH if either or both has no country calling code specified,
    523   // and the NSNs and extensions are the same.
    524   // Returns SHORT_NSN_MATCH if either or both has no country calling code
    525   // specified, or the country calling code specified is the same, and one NSN
    526   // could be a shorter version of the other number. This includes the case
    527   // where one has an extension specified, and the other does not.
    528   // Returns NO_MATCH otherwise.
    529   // For example, the numbers +1 345 657 1234 and 657 1234 are a
    530   // SHORT_NSN_MATCH. The numbers +1 345 657 1234 and 345 657 are a NO_MATCH.
    531   MatchType IsNumberMatch(const PhoneNumber& first_number,
    532                           const PhoneNumber& second_number) const;
    533 
    534   // Takes two phone numbers as strings and compares them for equality. This
    535   // is a convenience wrapper for IsNumberMatch(PhoneNumber firstNumber,
    536   // PhoneNumber secondNumber). No default region is known.
    537   // Returns INVALID_NUMBER if either number cannot be parsed into a phone
    538   // number.
    539   MatchType IsNumberMatchWithTwoStrings(const string& first_number,
    540                                         const string& second_number) const;
    541 
    542   // Takes two phone numbers and compares them for equality. This is a
    543   // convenience wrapper for IsNumberMatch(PhoneNumber firstNumber,
    544   // PhoneNumber secondNumber). No default region is known.
    545   // Returns INVALID_NUMBER if second_number cannot be parsed into a phone
    546   // number.
    547   MatchType IsNumberMatchWithOneString(const PhoneNumber& first_number,
    548                                        const string& second_number) const;
    549 
    550   // Overrides the default logging system. This takes ownership of the provided
    551   // logger.
    552   void SetLogger(Logger* logger);
    553 
    554   // Gets an AsYouTypeFormatter for the specific region.
    555   // Returns an AsYouTypeFormatter object, which could be used to format phone
    556   // numbers in the specific region "as you type".
    557   // The deletion of the returned instance is under the responsibility of the
    558   // caller.
    559   AsYouTypeFormatter* GetAsYouTypeFormatter(const string& region_code) const;
    560 
    561   friend bool ConvertFromTelephoneNumberProto(
    562       const TelephoneNumber& proto_to_convert,
    563       PhoneNumber* new_proto);
    564   friend bool ConvertToTelephoneNumberProto(const PhoneNumber& proto_to_convert,
    565                                             TelephoneNumber* resulting_proto);
    566 
    567  protected:
    568   // Check whether the country_calling_code is from a country whose national
    569   // significant number could contain a leading zero. An example of such a
    570   // country is Italy.
    571   bool IsLeadingZeroPossible(int country_calling_code) const;
    572 
    573  private:
    574   scoped_ptr<Logger> logger_;
    575 
    576   typedef pair<int, list<string>*> IntRegionsPair;
    577 
    578   // The minimum and maximum length of the national significant number.
    579   static const size_t kMinLengthForNsn = 2;
    580   // The ITU says the maximum length should be 15, but we have found longer
    581   // numbers in Germany.
    582   static const size_t kMaxLengthForNsn = 16;
    583   // The maximum length of the country calling code.
    584   static const size_t kMaxLengthCountryCode = 3;
    585 
    586   static const char kPlusChars[];
    587   // Regular expression of acceptable punctuation found in phone numbers. This
    588   // excludes punctuation found as a leading character only. This consists of
    589   // dash characters, white space characters, full stops, slashes, square
    590   // brackets, parentheses and tildes. It also includes the letter 'x' as that
    591   // is found as a placeholder for carrier information in some phone numbers.
    592   // Full-width variants are also present.
    593   static const char kValidPunctuation[];
    594 
    595   // Regular expression of characters typically used to start a second phone
    596   // number for the purposes of parsing. This allows us to strip off parts of
    597   // the number that are actually the start of another number, such as for:
    598   // (530) 583-6985 x302/x2303 -> the second extension here makes this actually
    599   // two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove
    600   // the second extension so that the first number is parsed correctly. The
    601   // string preceding this is captured.
    602   // This corresponds to SECOND_NUMBER_START in the java version.
    603   static const char kCaptureUpToSecondNumberStart[];
    604 
    605   // Helper class holding useful regular expressions and character mappings.
    606   scoped_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
    607 
    608   // A mapping from a country calling code to a RegionCode object which denotes
    609   // the region represented by that country calling code. Note regions under
    610   // NANPA share the country calling code 1 and Russia and Kazakhstan share the
    611   // country calling code 7. Under this map, 1 is mapped to region code "US" and
    612   // 7 is mapped to region code "RU". This is implemented as a sorted vector to
    613   // achieve better performance.
    614   scoped_ptr<vector<IntRegionsPair> > country_calling_code_to_region_code_map_;
    615 
    616   // The set of regions that share country calling code 1.
    617   scoped_ptr<set<string> > nanpa_regions_;
    618   static const int kNanpaCountryCode = 1;
    619 
    620   // A mapping from a region code to a PhoneMetadata for that region.
    621   scoped_ptr<map<string, PhoneMetadata> > region_to_metadata_map_;
    622 
    623   // A mapping from a country calling code for a non-geographical entity to the
    624   // PhoneMetadata for that country calling code. Examples of the country
    625   // calling codes include 800 (International Toll Free Service) and 808
    626   // (International Shared Cost Service).
    627   scoped_ptr<map<int, PhoneMetadata> >
    628       country_code_to_non_geographical_metadata_map_;
    629 
    630   PhoneNumberUtil();
    631 
    632   // Returns a regular expression for the possible extensions that may be found
    633   // in a number, for use when matching.
    634   const string& GetExtnPatternsForMatching() const;
    635 
    636   // Checks if a number matches the plus chars pattern.
    637   bool StartsWithPlusCharsPattern(const string& number) const;
    638 
    639   // Checks whether a string contains only valid digits.
    640   bool ContainsOnlyValidDigits(const string& s) const;
    641 
    642   // Checks if a format is eligible to be used by the AsYouTypeFormatter. This
    643   // method is here rather than in asyoutypeformatter.h since it depends on the
    644   // valid punctuation declared by the phone number util.
    645   bool IsFormatEligibleForAsYouTypeFormatter(const string& format) const;
    646 
    647   // Helper function to check if the national prefix formatting rule has the
    648   // first group only, i.e., does not start with the national prefix.
    649   bool FormattingRuleHasFirstGroupOnly(
    650       const string& national_prefix_formatting_rule) const;
    651 
    652   // Trims unwanted end characters from a phone number string.
    653   void TrimUnwantedEndChars(string* number) const;
    654 
    655   // Tests whether a phone number has a geographical association. It checks if
    656   // the number is associated to a certain region in the country where it
    657   // belongs to. Note that this doesn't verify if the number is actually in use.
    658   bool IsNumberGeographical(const PhoneNumber& phone_number) const;
    659 
    660   // Helper function to check region code is not unknown or null.
    661   bool IsValidRegionCode(const string& region_code) const;
    662 
    663   // Helper function to check the country calling code is valid.
    664   bool HasValidCountryCallingCode(int country_calling_code) const;
    665 
    666   const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegion(
    667       const string& region_code) const;
    668 
    669   const i18n::phonenumbers::PhoneMetadata* GetMetadataForNonGeographicalRegion(
    670       int country_calling_code) const;
    671 
    672   const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegionOrCallingCode(
    673       int country_calling_code,
    674       const string& region_code) const;
    675 
    676   // As per GetCountryCodeForRegion, but assumes the validity of the region_code
    677   // has already been checked.
    678   int GetCountryCodeForValidRegion(const string& region_code) const;
    679 
    680   const NumberFormat* ChooseFormattingPatternForNumber(
    681       const RepeatedPtrField<NumberFormat>& available_formats,
    682       const string& national_number) const;
    683 
    684   void FormatNsnUsingPatternWithCarrier(
    685       const string& national_number,
    686       const NumberFormat& formatting_pattern,
    687       PhoneNumberUtil::PhoneNumberFormat number_format,
    688       const string& carrier_code,
    689       string* formatted_number) const;
    690 
    691   void FormatNsnUsingPattern(
    692       const string& national_number,
    693       const NumberFormat& formatting_pattern,
    694       PhoneNumberUtil::PhoneNumberFormat number_format,
    695       string* formatted_number) const;
    696 
    697   // Check if raw_input, which is assumed to be in the national format, has a
    698   // national prefix. The national prefix is assumed to be in digits-only form.
    699   bool RawInputContainsNationalPrefix(
    700       const string& raw_input,
    701       const string& national_prefix,
    702       const string& region_code) const;
    703 
    704   // Returns true if a number is from a region whose national significant number
    705   // couldn't contain a leading zero, but has the italian_leading_zero field set
    706   // to true.
    707   bool HasUnexpectedItalianLeadingZero(const PhoneNumber& number) const;
    708 
    709   bool HasFormattingPatternForNumber(const PhoneNumber& number) const;
    710 
    711   // Simple wrapper of FormatNsnWithCarrier for the common case of
    712   // no carrier code.
    713   void FormatNsn(const string& number,
    714                  const PhoneMetadata& metadata,
    715                  PhoneNumberFormat number_format,
    716                  string* formatted_number) const;
    717 
    718   void FormatNsnWithCarrier(const string& number,
    719                             const PhoneMetadata& metadata,
    720                             PhoneNumberFormat number_format,
    721                             const string& carrier_code,
    722                             string* formatted_number) const;
    723 
    724   void MaybeAppendFormattedExtension(
    725       const PhoneNumber& number,
    726       const PhoneMetadata& metadata,
    727       PhoneNumberFormat number_format,
    728       string* extension) const;
    729 
    730   void GetRegionCodeForNumberFromRegionList(
    731       const PhoneNumber& number,
    732       const list<string>& region_codes,
    733       string* region_code) const;
    734 
    735   // Strips the IDD from the start of the number if present. Helper function
    736   // used by MaybeStripInternationalPrefixAndNormalize.
    737   bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) const;
    738 
    739   void Normalize(string* number) const;
    740   PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize(
    741       const string& possible_idd_prefix,
    742       string* number) const;
    743 
    744   bool MaybeStripNationalPrefixAndCarrierCode(
    745       const PhoneMetadata& metadata,
    746       string* number,
    747       string* carrier_code) const;
    748 
    749   void ExtractPossibleNumber(const string& number,
    750                              string* extracted_number) const;
    751 
    752   bool IsViablePhoneNumber(const string& number) const;
    753 
    754   bool MaybeStripExtension(string* number, string* extension) const;
    755 
    756   int ExtractCountryCode(string* national_number) const;
    757   ErrorType MaybeExtractCountryCode(
    758       const PhoneMetadata* default_region_metadata,
    759       bool keepRawInput,
    760       string* national_number,
    761       PhoneNumber* phone_number) const;
    762 
    763   bool CheckRegionForParsing(
    764       const string& number_to_parse,
    765       const string& default_region) const;
    766 
    767   ErrorType ParseHelper(const string& number_to_parse,
    768                         const string& default_region,
    769                         bool keep_raw_input,
    770                         bool check_region,
    771                         PhoneNumber* phone_number) const;
    772 
    773   void BuildNationalNumberForParsing(const string& number_to_parse,
    774                                      string* national_number) const;
    775 
    776   // Returns true if the number can be dialled from outside the region, or
    777   // unknown. If the number can only be dialled from within the region, returns
    778   // false. Does not check the number is a valid number.
    779   bool CanBeInternationallyDialled(const PhoneNumber& number) const;
    780 
    781   DISALLOW_COPY_AND_ASSIGN(PhoneNumberUtil);
    782 };
    783 
    784 }  // namespace phonenumbers
    785 }  // namespace i18n
    786 
    787 #endif  // I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
    788