1 // Copyright (C) 2009 The Libphonenumber Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Author: Shaopeng Jia 16 // Open-sourced by: Philippe Liard 17 18 #include "phonenumbers/phonenumberutil.h" 19 20 #include <algorithm> 21 #include <cctype> 22 #include <cstring> 23 #include <iterator> 24 #include <map> 25 #include <utility> 26 #include <vector> 27 28 #include <google/protobuf/message_lite.h> 29 #include <unicode/uchar.h> 30 #include <unicode/utf8.h> 31 32 #include "phonenumbers/asyoutypeformatter.h" 33 #include "phonenumbers/base/basictypes.h" 34 #include "phonenumbers/base/logging.h" 35 #include "phonenumbers/base/memory/singleton.h" 36 #include "phonenumbers/default_logger.h" 37 #include "phonenumbers/encoding_utils.h" 38 #include "phonenumbers/metadata.h" 39 #include "phonenumbers/normalize_utf8.h" 40 #include "phonenumbers/phonemetadata.pb.h" 41 #include "phonenumbers/phonenumber.h" 42 #include "phonenumbers/phonenumber.pb.h" 43 #include "phonenumbers/regexp_adapter.h" 44 #include "phonenumbers/regexp_cache.h" 45 #include "phonenumbers/regexp_factory.h" 46 #include "phonenumbers/region_code.h" 47 #include "phonenumbers/stl_util.h" 48 #include "phonenumbers/stringutil.h" 49 #include "phonenumbers/utf/unicodetext.h" 50 #include "phonenumbers/utf/utf.h" 51 52 namespace i18n { 53 namespace phonenumbers { 54 55 using std::make_pair; 56 using std::sort; 57 58 using google::protobuf::RepeatedPtrField; 59 60 // static 61 const char PhoneNumberUtil::kPlusChars[] = "+\xEF\xBC\x8B"; /* "+" */ 62 // To find out the unicode code-point of the characters below in vim, highlight 63 // the character and type 'ga'. Note that the - is used to express ranges of 64 // full-width punctuation below, as well as being present in the expression 65 // itself. In emacs, you can use M-x unicode-what to query information about the 66 // unicode character. 67 // static 68 const char PhoneNumberUtil::kValidPunctuation[] = 69 /* "-x-- <U+200B><U+2060>().\\[\\]/~" */ 70 "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC" 71 "\x8F \xC2\xA0\xC2\xAD\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88" 72 "\xEF\xBC\x89\xEF\xBC\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC"; 73 74 // static 75 const char PhoneNumberUtil::kCaptureUpToSecondNumberStart[] = "(.*)[\\\\/] *x"; 76 77 // static 78 const char PhoneNumberUtil::kRegionCodeForNonGeoEntity[] = "001"; 79 80 namespace { 81 82 // The prefix that needs to be inserted in front of a Colombian landline 83 // number when dialed from a mobile phone in Colombia. 84 const char kColombiaMobileToFixedLinePrefix[] = "3"; 85 86 // The kPlusSign signifies the international prefix. 87 const char kPlusSign[] = "+"; 88 89 const char kStarSign[] = "*"; 90 91 const char kRfc3966ExtnPrefix[] = ";ext="; 92 const char kRfc3966Prefix[] = "tel:"; 93 const char kRfc3966PhoneContext[] = ";phone-context="; 94 const char kRfc3966IsdnSubaddress[] = ";isub="; 95 96 const char kDigits[] = "\\p{Nd}"; 97 // We accept alpha characters in phone numbers, ASCII only. We store lower-case 98 // here only since our regular expressions are case-insensitive. 99 const char kValidAlpha[] = "a-z"; 100 101 // Default extension prefix to use when formatting. This will be put in front of 102 // any extension component of the number, after the main national number is 103 // formatted. For example, if you wish the default extension formatting to be " 104 // extn: 3456", then you should specify " extn: " here as the default extension 105 // prefix. This can be overridden by region-specific preferences. 106 const char kDefaultExtnPrefix[] = " ext. "; 107 108 // One-character symbols that can be used to indicate an extension. 109 const char kSingleExtnSymbolsForMatching[] = 110 "x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\x9E"; 111 112 bool LoadCompiledInMetadata(PhoneMetadataCollection* metadata) { 113 if (!metadata->ParseFromArray(metadata_get(), metadata_size())) { 114 LOG(ERROR) << "Could not parse binary data."; 115 return false; 116 } 117 return true; 118 } 119 120 // Returns a pointer to the description inside the metadata of the appropriate 121 // type. 122 const PhoneNumberDesc* GetNumberDescByType( 123 const PhoneMetadata& metadata, 124 PhoneNumberUtil::PhoneNumberType type) { 125 switch (type) { 126 case PhoneNumberUtil::PREMIUM_RATE: 127 return &metadata.premium_rate(); 128 case PhoneNumberUtil::TOLL_FREE: 129 return &metadata.toll_free(); 130 case PhoneNumberUtil::MOBILE: 131 return &metadata.mobile(); 132 case PhoneNumberUtil::FIXED_LINE: 133 case PhoneNumberUtil::FIXED_LINE_OR_MOBILE: 134 return &metadata.fixed_line(); 135 case PhoneNumberUtil::SHARED_COST: 136 return &metadata.shared_cost(); 137 case PhoneNumberUtil::VOIP: 138 return &metadata.voip(); 139 case PhoneNumberUtil::PERSONAL_NUMBER: 140 return &metadata.personal_number(); 141 case PhoneNumberUtil::PAGER: 142 return &metadata.pager(); 143 case PhoneNumberUtil::UAN: 144 return &metadata.uan(); 145 case PhoneNumberUtil::VOICEMAIL: 146 return &metadata.voicemail(); 147 default: 148 return &metadata.general_desc(); 149 } 150 } 151 152 // A helper function that is used by Format and FormatByPattern. 153 void PrefixNumberWithCountryCallingCode( 154 int country_calling_code, 155 PhoneNumberUtil::PhoneNumberFormat number_format, 156 string* formatted_number) { 157 switch (number_format) { 158 case PhoneNumberUtil::E164: 159 formatted_number->insert(0, StrCat(kPlusSign, country_calling_code)); 160 return; 161 case PhoneNumberUtil::INTERNATIONAL: 162 formatted_number->insert(0, StrCat(kPlusSign, country_calling_code, " ")); 163 return; 164 case PhoneNumberUtil::RFC3966: 165 formatted_number->insert(0, StrCat(kRfc3966Prefix, kPlusSign, 166 country_calling_code, "-")); 167 return; 168 case PhoneNumberUtil::NATIONAL: 169 default: 170 // Do nothing. 171 return; 172 } 173 } 174 175 // Returns true when one national number is the suffix of the other or both are 176 // the same. 177 bool IsNationalNumberSuffixOfTheOther(const PhoneNumber& first_number, 178 const PhoneNumber& second_number) { 179 const string& first_number_national_number = 180 SimpleItoa(static_cast<uint64>(first_number.national_number())); 181 const string& second_number_national_number = 182 SimpleItoa(static_cast<uint64>(second_number.national_number())); 183 // Note that HasSuffixString returns true if the numbers are equal. 184 return HasSuffixString(first_number_national_number, 185 second_number_national_number) || 186 HasSuffixString(second_number_national_number, 187 first_number_national_number); 188 } 189 190 bool IsNumberMatchingDesc(const string& national_number, 191 const PhoneNumberDesc& number_desc, 192 RegExpCache* regexp_cache) { 193 return regexp_cache->GetRegExp(number_desc.possible_number_pattern()) 194 .FullMatch(national_number) && 195 regexp_cache->GetRegExp(number_desc.national_number_pattern()) 196 .FullMatch(national_number); 197 } 198 199 PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper( 200 const string& national_number, const PhoneMetadata& metadata, 201 RegExpCache* regexp_cache) { 202 const PhoneNumberDesc& general_desc = metadata.general_desc(); 203 if (!general_desc.has_national_number_pattern() || 204 !IsNumberMatchingDesc(national_number, general_desc, regexp_cache)) { 205 VLOG(4) << "Number type unknown - doesn't match general national number" 206 << " pattern."; 207 return PhoneNumberUtil::UNKNOWN; 208 } 209 if (IsNumberMatchingDesc(national_number, metadata.premium_rate(), 210 regexp_cache)) { 211 VLOG(4) << "Number is a premium number."; 212 return PhoneNumberUtil::PREMIUM_RATE; 213 } 214 if (IsNumberMatchingDesc(national_number, metadata.toll_free(), 215 regexp_cache)) { 216 VLOG(4) << "Number is a toll-free number."; 217 return PhoneNumberUtil::TOLL_FREE; 218 } 219 if (IsNumberMatchingDesc(national_number, metadata.shared_cost(), 220 regexp_cache)) { 221 VLOG(4) << "Number is a shared cost number."; 222 return PhoneNumberUtil::SHARED_COST; 223 } 224 if (IsNumberMatchingDesc(national_number, metadata.voip(), regexp_cache)) { 225 VLOG(4) << "Number is a VOIP (Voice over IP) number."; 226 return PhoneNumberUtil::VOIP; 227 } 228 if (IsNumberMatchingDesc(national_number, metadata.personal_number(), 229 regexp_cache)) { 230 VLOG(4) << "Number is a personal number."; 231 return PhoneNumberUtil::PERSONAL_NUMBER; 232 } 233 if (IsNumberMatchingDesc(national_number, metadata.pager(), regexp_cache)) { 234 VLOG(4) << "Number is a pager number."; 235 return PhoneNumberUtil::PAGER; 236 } 237 if (IsNumberMatchingDesc(national_number, metadata.uan(), regexp_cache)) { 238 VLOG(4) << "Number is a UAN."; 239 return PhoneNumberUtil::UAN; 240 } 241 if (IsNumberMatchingDesc(national_number, metadata.voicemail(), 242 regexp_cache)) { 243 VLOG(4) << "Number is a voicemail number."; 244 return PhoneNumberUtil::VOICEMAIL; 245 } 246 247 bool is_fixed_line = 248 IsNumberMatchingDesc(national_number, metadata.fixed_line(), 249 regexp_cache); 250 if (is_fixed_line) { 251 if (metadata.same_mobile_and_fixed_line_pattern()) { 252 VLOG(4) << "Fixed-line and mobile patterns equal, number is fixed-line" 253 << " or mobile"; 254 return PhoneNumberUtil::FIXED_LINE_OR_MOBILE; 255 } else if (IsNumberMatchingDesc(national_number, metadata.mobile(), 256 regexp_cache)) { 257 VLOG(4) << "Fixed-line and mobile patterns differ, but number is " 258 << "still fixed-line or mobile"; 259 return PhoneNumberUtil::FIXED_LINE_OR_MOBILE; 260 } 261 VLOG(4) << "Number is a fixed line number."; 262 return PhoneNumberUtil::FIXED_LINE; 263 } 264 // Otherwise, test to see if the number is mobile. Only do this if certain 265 // that the patterns for mobile and fixed line aren't the same. 266 if (!metadata.same_mobile_and_fixed_line_pattern() && 267 IsNumberMatchingDesc(national_number, metadata.mobile(), regexp_cache)) { 268 VLOG(4) << "Number is a mobile number."; 269 return PhoneNumberUtil::MOBILE; 270 } 271 VLOG(4) << "Number type unknown - doesn\'t match any specific number type" 272 << " pattern."; 273 return PhoneNumberUtil::UNKNOWN; 274 } 275 276 char32 ToUnicodeCodepoint(const char* unicode_char) { 277 char32 codepoint; 278 EncodingUtils::DecodeUTF8Char(unicode_char, &codepoint); 279 return codepoint; 280 } 281 282 // Helper initialiser method to create the regular-expression pattern to match 283 // extensions, allowing the one-codepoint extension symbols provided by 284 // single_extn_symbols. 285 // Note that there are currently three capturing groups for the extension itself 286 // - if this number is changed, MaybeStripExtension needs to be updated. 287 string CreateExtnPattern(const string& single_extn_symbols) { 288 static const string capturing_extn_digits = StrCat("([", kDigits, "]{1,7})"); 289 // The first regular expression covers RFC 3966 format, where the extension is 290 // added using ";ext=". The second more generic one starts with optional white 291 // space and ends with an optional full stop (.), followed by zero or more 292 // spaces/tabs and then the numbers themselves. The third one covers the 293 // special case of American numbers where the extension is written with a hash 294 // at the end, such as "- 503#". 295 // Note that the only capturing groups should be around the digits that you 296 // want to capture as part of the extension, or else parsing will fail! 297 // Canonical-equivalence doesn't seem to be an option with RE2, so we allow 298 // two options for representing the - the character itself, and one in the 299 // unicode decomposed form with the combining acute accent. 300 return (StrCat( 301 kRfc3966ExtnPrefix, capturing_extn_digits, "|" 302 /* "[ \\t,]*(?:e?xt(?:ensi(?:o?|))?n?|??|single_extn_symbols|" 303 "int||anexo)" 304 "[:\\.]?[ \\t,-]*", capturing_extn_digits, "#?|" */ 305 "[ \xC2\xA0\\t,]*(?:e?xt(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|" 306 "(?:\xEF\xBD\x85)?\xEF\xBD\x98\xEF\xBD\x94(?:\xEF\xBD\x8E)?|" 307 "[", single_extn_symbols, "]|int|" 308 "\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94|anexo)" 309 "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits, 310 "#?|[- ]+([", kDigits, "]{1,5})#")); 311 } 312 313 // Normalizes a string of characters representing a phone number by replacing 314 // all characters found in the accompanying map with the values therein, and 315 // stripping all other characters if remove_non_matches is true. 316 // Parameters: 317 // number - a pointer to a string of characters representing a phone number to 318 // be normalized. 319 // normalization_replacements - a mapping of characters to what they should be 320 // replaced by in the normalized version of the phone number 321 // remove_non_matches - indicates whether characters that are not able to be 322 // replaced should be stripped from the number. If this is false, they will be 323 // left unchanged in the number. 324 void NormalizeHelper(const map<char32, char>& normalization_replacements, 325 bool remove_non_matches, 326 string* number) { 327 DCHECK(number); 328 UnicodeText number_as_unicode; 329 number_as_unicode.PointToUTF8(number->data(), number->size()); 330 string normalized_number; 331 char unicode_char[5]; 332 for (UnicodeText::const_iterator it = number_as_unicode.begin(); 333 it != number_as_unicode.end(); 334 ++it) { 335 map<char32, char>::const_iterator found_glyph_pair = 336 normalization_replacements.find(*it); 337 if (found_glyph_pair != normalization_replacements.end()) { 338 normalized_number.push_back(found_glyph_pair->second); 339 } else if (!remove_non_matches) { 340 // Find out how long this unicode char is so we can append it all. 341 int char_len = it.get_utf8(unicode_char); 342 normalized_number.append(unicode_char, char_len); 343 } 344 // If neither of the above are true, we remove this character. 345 } 346 number->assign(normalized_number); 347 } 348 349 PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern( 350 const RegExp& number_pattern, const string& number) { 351 string extracted_number; 352 if (number_pattern.FullMatch(number, &extracted_number)) { 353 return PhoneNumberUtil::IS_POSSIBLE; 354 } 355 if (number_pattern.PartialMatch(number, &extracted_number)) { 356 return PhoneNumberUtil::TOO_LONG; 357 } else { 358 return PhoneNumberUtil::TOO_SHORT; 359 } 360 } 361 362 } // namespace 363 364 void PhoneNumberUtil::SetLogger(Logger* logger) { 365 logger_.reset(logger); 366 Logger::set_logger_impl(logger_.get()); 367 } 368 369 class PhoneNumberRegExpsAndMappings { 370 private: 371 void InitializeMapsAndSets() { 372 diallable_char_mappings_.insert(make_pair('+', '+')); 373 diallable_char_mappings_.insert(make_pair('*', '*')); 374 // Here we insert all punctuation symbols that we wish to respect when 375 // formatting alpha numbers, as they show the intended number groupings. 376 all_plus_number_grouping_symbols_.insert( 377 make_pair(ToUnicodeCodepoint("-"), '-')); 378 all_plus_number_grouping_symbols_.insert( 379 make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D" /* "" */), '-')); 380 all_plus_number_grouping_symbols_.insert( 381 make_pair(ToUnicodeCodepoint("\xE2\x80\x90" /* "" */), '-')); 382 all_plus_number_grouping_symbols_.insert( 383 make_pair(ToUnicodeCodepoint("\xE2\x80\x91" /* "" */), '-')); 384 all_plus_number_grouping_symbols_.insert( 385 make_pair(ToUnicodeCodepoint("\xE2\x80\x92" /* "" */), '-')); 386 all_plus_number_grouping_symbols_.insert( 387 make_pair(ToUnicodeCodepoint("\xE2\x80\x93" /* "" */), '-')); 388 all_plus_number_grouping_symbols_.insert( 389 make_pair(ToUnicodeCodepoint("\xE2\x80\x94" /* "" */), '-')); 390 all_plus_number_grouping_symbols_.insert( 391 make_pair(ToUnicodeCodepoint("\xE2\x80\x95" /* "" */), '-')); 392 all_plus_number_grouping_symbols_.insert( 393 make_pair(ToUnicodeCodepoint("\xE2\x88\x92" /* "" */), '-')); 394 all_plus_number_grouping_symbols_.insert( 395 make_pair(ToUnicodeCodepoint("/"), '/')); 396 all_plus_number_grouping_symbols_.insert( 397 make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F" /* "" */), '/')); 398 all_plus_number_grouping_symbols_.insert( 399 make_pair(ToUnicodeCodepoint(" "), ' ')); 400 all_plus_number_grouping_symbols_.insert( 401 make_pair(ToUnicodeCodepoint("\xE3\x80\x80" /* "" */), ' ')); 402 all_plus_number_grouping_symbols_.insert( 403 make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' ')); 404 all_plus_number_grouping_symbols_.insert( 405 make_pair(ToUnicodeCodepoint("."), '.')); 406 all_plus_number_grouping_symbols_.insert( 407 make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E" /* "" */), '.')); 408 // Only the upper-case letters are added here - the lower-case versions are 409 // added programmatically. 410 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("A"), '2')); 411 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("B"), '2')); 412 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("C"), '2')); 413 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("D"), '3')); 414 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("E"), '3')); 415 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("F"), '3')); 416 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("G"), '4')); 417 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("H"), '4')); 418 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("I"), '4')); 419 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("J"), '5')); 420 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("K"), '5')); 421 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("L"), '5')); 422 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("M"), '6')); 423 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("N"), '6')); 424 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("O"), '6')); 425 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("P"), '7')); 426 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("Q"), '7')); 427 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("R"), '7')); 428 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("S"), '7')); 429 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("T"), '8')); 430 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("U"), '8')); 431 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("V"), '8')); 432 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("W"), '9')); 433 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("X"), '9')); 434 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("Y"), '9')); 435 alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("Z"), '9')); 436 map<char32, char> lower_case_mappings; 437 map<char32, char> alpha_letters; 438 for (map<char32, char>::const_iterator it = alpha_mappings_.begin(); 439 it != alpha_mappings_.end(); 440 ++it) { 441 // Convert all the upper-case ASCII letters to lower-case. 442 if (it->first < 128) { 443 char letter_as_upper = static_cast<char>(it->first); 444 char32 letter_as_lower = static_cast<char32>(tolower(letter_as_upper)); 445 lower_case_mappings.insert(make_pair(letter_as_lower, it->second)); 446 // Add the letters in both variants to the alpha_letters map. This just 447 // pairs each letter with its upper-case representation so that it can 448 // be retained when normalising alpha numbers. 449 alpha_letters.insert(make_pair(letter_as_lower, letter_as_upper)); 450 alpha_letters.insert(make_pair(it->first, letter_as_upper)); 451 } 452 } 453 // In the Java version we don't insert the lower-case mappings in the map, 454 // because we convert to upper case on the fly. Doing this here would 455 // involve pulling in all of ICU, which we don't want to do if we don't have 456 // to. 457 alpha_mappings_.insert(lower_case_mappings.begin(), 458 lower_case_mappings.end()); 459 alpha_phone_mappings_.insert(alpha_mappings_.begin(), 460 alpha_mappings_.end()); 461 all_plus_number_grouping_symbols_.insert(alpha_letters.begin(), 462 alpha_letters.end()); 463 // Add the ASCII digits so that they don't get deleted by NormalizeHelper(). 464 for (char c = '0'; c <= '9'; ++c) { 465 diallable_char_mappings_.insert(make_pair(c, c)); 466 alpha_phone_mappings_.insert(make_pair(c, c)); 467 all_plus_number_grouping_symbols_.insert(make_pair(c, c)); 468 } 469 470 mobile_token_mappings_.insert(make_pair(52, '1')); 471 mobile_token_mappings_.insert(make_pair(54, '9')); 472 } 473 474 // Small string helpers since StrCat has a maximum number of arguments. These 475 // are both used to build valid_phone_number_. 476 const string punctuation_and_star_sign_; 477 const string min_length_phone_number_pattern_; 478 479 // Regular expression of viable phone numbers. This is location independent. 480 // Checks we have at least three leading digits, and only valid punctuation, 481 // alpha characters and digits in the phone number. Does not include extension 482 // data. The symbol 'x' is allowed here as valid punctuation since it is often 483 // used as a placeholder for carrier codes, for example in Brazilian phone 484 // numbers. We also allow multiple plus-signs at the start. 485 // Corresponds to the following: 486 // [digits]{minLengthNsn}| 487 // plus_sign*(([punctuation]|[star])*[digits]){3,} 488 // ([punctuation]|[star]|[digits]|[alpha])* 489 // 490 // The first reg-ex is to allow short numbers (two digits long) to be parsed 491 // if they are entered as "15" etc, but only if there is no punctuation in 492 // them. The second expression restricts the number of digits to three or 493 // more, but then allows them to be in international form, and to have 494 // alpha-characters and punctuation. 495 const string valid_phone_number_; 496 497 // Regexp of all possible ways to write extensions, for use when parsing. This 498 // will be run as a case-insensitive regexp match. Wide character versions are 499 // also provided after each ASCII version. 500 // For parsing, we are slightly more lenient in our interpretation than for 501 // matching. Here we allow a "comma" as a possible extension indicator. When 502 // matching, this is hardly ever used to indicate this. 503 const string extn_patterns_for_parsing_; 504 505 public: 506 scoped_ptr<const AbstractRegExpFactory> regexp_factory_; 507 scoped_ptr<RegExpCache> regexp_cache_; 508 509 // A map that contains characters that are essential when dialling. That means 510 // any of the characters in this map must not be removed from a number when 511 // dialing, otherwise the call will not reach the intended destination. 512 map<char32, char> diallable_char_mappings_; 513 // These mappings map a character (key) to a specific digit that should 514 // replace it for normalization purposes. 515 map<char32, char> alpha_mappings_; 516 // For performance reasons, store a map of combining alpha_mappings with ASCII 517 // digits. 518 map<char32, char> alpha_phone_mappings_; 519 520 // Separate map of all symbols that we wish to retain when formatting alpha 521 // numbers. This includes digits, ascii letters and number grouping symbols 522 // such as "-" and " ". 523 map<char32, char> all_plus_number_grouping_symbols_; 524 525 // Map of country calling codes that use a mobile token before the area code. 526 // One example of when this is relevant is when determining the length of the 527 // national destination code, which should be the length of the area code plus 528 // the length of the mobile token. 529 map<int, char> mobile_token_mappings_; 530 531 // Pattern that makes it easy to distinguish whether a region has a unique 532 // international dialing prefix or not. If a region has a unique international 533 // prefix (e.g. 011 in USA), it will be represented as a string that contains 534 // a sequence of ASCII digits. If there are multiple available international 535 // prefixes in a region, they will be represented as a regex string that 536 // always contains character(s) other than ASCII digits. 537 // Note this regex also includes tilde, which signals waiting for the tone. 538 scoped_ptr<const RegExp> unique_international_prefix_; 539 540 scoped_ptr<const RegExp> digits_pattern_; 541 scoped_ptr<const RegExp> capturing_digit_pattern_; 542 scoped_ptr<const RegExp> capturing_ascii_digits_pattern_; 543 544 // Regular expression of acceptable characters that may start a phone number 545 // for the purposes of parsing. This allows us to strip away meaningless 546 // prefixes to phone numbers that may be mistakenly given to us. This consists 547 // of digits, the plus symbol and arabic-indic digits. This does not contain 548 // alpha characters, although they may be used later in the number. It also 549 // does not include other punctuation, as this will be stripped later during 550 // parsing and is of no information value when parsing a number. The string 551 // starting with this valid character is captured. 552 // This corresponds to VALID_START_CHAR in the java version. 553 scoped_ptr<const RegExp> valid_start_char_pattern_; 554 555 // Regular expression of valid characters before a marker that might indicate 556 // a second number. 557 scoped_ptr<const RegExp> capture_up_to_second_number_start_pattern_; 558 559 // Regular expression of trailing characters that we want to remove. We remove 560 // all characters that are not alpha or numerical characters. The hash 561 // character is retained here, as it may signify the previous block was an 562 // extension. Note the capturing block at the start to capture the rest of the 563 // number if this was a match. 564 // This corresponds to UNWANTED_END_CHAR_PATTERN in the java version. 565 scoped_ptr<const RegExp> unwanted_end_char_pattern_; 566 567 // Regular expression of groups of valid punctuation characters. 568 scoped_ptr<const RegExp> separator_pattern_; 569 570 // Regexp of all possible ways to write extensions, for use when finding phone 571 // numbers in text. This will be run as a case-insensitive regexp match. Wide 572 // character versions are also provided after each ASCII version. 573 const string extn_patterns_for_matching_; 574 575 // Regexp of all known extension prefixes used by different regions followed 576 // by 1 or more valid digits, for use when parsing. 577 scoped_ptr<const RegExp> extn_pattern_; 578 579 // We append optionally the extension pattern to the end here, as a valid 580 // phone number may have an extension prefix appended, followed by 1 or more 581 // digits. 582 scoped_ptr<const RegExp> valid_phone_number_pattern_; 583 584 // We use this pattern to check if the phone number has at least three letters 585 // in it - if so, then we treat it as a number where some phone-number digits 586 // are represented by letters. 587 scoped_ptr<const RegExp> valid_alpha_phone_pattern_; 588 589 scoped_ptr<const RegExp> first_group_capturing_pattern_; 590 591 scoped_ptr<const RegExp> carrier_code_pattern_; 592 593 scoped_ptr<const RegExp> plus_chars_pattern_; 594 595 PhoneNumberRegExpsAndMappings() 596 : punctuation_and_star_sign_(StrCat(PhoneNumberUtil::kValidPunctuation, 597 kStarSign)), 598 min_length_phone_number_pattern_( 599 StrCat(kDigits, "{", PhoneNumberUtil::kMinLengthForNsn, "}")), 600 valid_phone_number_( 601 StrCat(min_length_phone_number_pattern_, "|[", 602 PhoneNumberUtil::kPlusChars, "]*(?:[", 603 punctuation_and_star_sign_, "]*", 604 kDigits, "){3,}[", kValidAlpha, 605 punctuation_and_star_sign_, kDigits, 606 "]*")), 607 extn_patterns_for_parsing_( 608 CreateExtnPattern(StrCat(",", kSingleExtnSymbolsForMatching))), 609 regexp_factory_(new RegExpFactory()), 610 regexp_cache_(new RegExpCache(*regexp_factory_.get(), 128)), 611 diallable_char_mappings_(), 612 alpha_mappings_(), 613 alpha_phone_mappings_(), 614 all_plus_number_grouping_symbols_(), 615 mobile_token_mappings_(), 616 unique_international_prefix_(regexp_factory_->CreateRegExp( 617 /* "[\\d]+(?:[~][\\d]+)?" */ 618 "[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?")), 619 digits_pattern_( 620 regexp_factory_->CreateRegExp(StrCat("[", kDigits, "]*"))), 621 capturing_digit_pattern_( 622 regexp_factory_->CreateRegExp(StrCat("([", kDigits, "])"))), 623 capturing_ascii_digits_pattern_( 624 regexp_factory_->CreateRegExp("(\\d+)")), 625 valid_start_char_pattern_(regexp_factory_->CreateRegExp( 626 StrCat("[", PhoneNumberUtil::kPlusChars, kDigits, "]"))), 627 capture_up_to_second_number_start_pattern_( 628 regexp_factory_->CreateRegExp( 629 PhoneNumberUtil::kCaptureUpToSecondNumberStart)), 630 unwanted_end_char_pattern_( 631 regexp_factory_->CreateRegExp("[^\\p{N}\\p{L}#]")), 632 separator_pattern_( 633 regexp_factory_->CreateRegExp( 634 StrCat("[", PhoneNumberUtil::kValidPunctuation, "]+"))), 635 extn_patterns_for_matching_( 636 CreateExtnPattern(kSingleExtnSymbolsForMatching)), 637 extn_pattern_(regexp_factory_->CreateRegExp( 638 StrCat("(?i)(?:", extn_patterns_for_parsing_, ")$"))), 639 valid_phone_number_pattern_(regexp_factory_->CreateRegExp( 640 StrCat("(?i)", valid_phone_number_, 641 "(?:", extn_patterns_for_parsing_, ")?"))), 642 valid_alpha_phone_pattern_(regexp_factory_->CreateRegExp( 643 StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}"))), 644 // The first_group_capturing_pattern was originally set to $1 but there 645 // are some countries for which the first group is not used in the 646 // national pattern (e.g. Argentina) so the $1 group does not match 647 // correctly. Therefore, we use \d, so that the first group actually 648 // used in the pattern will be matched. 649 first_group_capturing_pattern_( 650 regexp_factory_->CreateRegExp("(\\$\\d)")), 651 carrier_code_pattern_(regexp_factory_->CreateRegExp("\\$CC")), 652 plus_chars_pattern_( 653 regexp_factory_->CreateRegExp( 654 StrCat("[", PhoneNumberUtil::kPlusChars, "]+"))) { 655 InitializeMapsAndSets(); 656 } 657 658 private: 659 DISALLOW_COPY_AND_ASSIGN(PhoneNumberRegExpsAndMappings); 660 }; 661 662 // Private constructor. Also takes care of initialisation. 663 PhoneNumberUtil::PhoneNumberUtil() 664 : logger_(Logger::set_logger_impl(new NullLogger())), 665 reg_exps_(new PhoneNumberRegExpsAndMappings), 666 country_calling_code_to_region_code_map_(new vector<IntRegionsPair>()), 667 nanpa_regions_(new set<string>()), 668 region_to_metadata_map_(new map<string, PhoneMetadata>()), 669 country_code_to_non_geographical_metadata_map_( 670 new map<int, PhoneMetadata>) { 671 Logger::set_logger_impl(logger_.get()); 672 // TODO: Update the java version to put the contents of the init 673 // method inside the constructor as well to keep both in sync. 674 PhoneMetadataCollection metadata_collection; 675 if (!LoadCompiledInMetadata(&metadata_collection)) { 676 LOG(DFATAL) << "Could not parse compiled-in metadata."; 677 return; 678 } 679 // Storing data in a temporary map to make it easier to find other regions 680 // that share a country calling code when inserting data. 681 map<int, list<string>* > country_calling_code_to_region_map; 682 for (RepeatedPtrField<PhoneMetadata>::const_iterator it = 683 metadata_collection.metadata().begin(); 684 it != metadata_collection.metadata().end(); 685 ++it) { 686 const string& region_code = it->id(); 687 if (region_code == RegionCode::GetUnknown()) { 688 continue; 689 } 690 691 int country_calling_code = it->country_code(); 692 if (kRegionCodeForNonGeoEntity == region_code) { 693 country_code_to_non_geographical_metadata_map_->insert( 694 make_pair(country_calling_code, *it)); 695 } else { 696 region_to_metadata_map_->insert(make_pair(region_code, *it)); 697 } 698 map<int, list<string>* >::iterator calling_code_in_map = 699 country_calling_code_to_region_map.find(country_calling_code); 700 if (calling_code_in_map != country_calling_code_to_region_map.end()) { 701 if (it->main_country_for_code()) { 702 calling_code_in_map->second->push_front(region_code); 703 } else { 704 calling_code_in_map->second->push_back(region_code); 705 } 706 } else { 707 // For most country calling codes, there will be only one region code. 708 list<string>* list_with_region_code = new list<string>(); 709 list_with_region_code->push_back(region_code); 710 country_calling_code_to_region_map.insert( 711 make_pair(country_calling_code, list_with_region_code)); 712 } 713 if (country_calling_code == kNanpaCountryCode) { 714 nanpa_regions_->insert(region_code); 715 } 716 } 717 718 country_calling_code_to_region_code_map_->insert( 719 country_calling_code_to_region_code_map_->begin(), 720 country_calling_code_to_region_map.begin(), 721 country_calling_code_to_region_map.end()); 722 // Sort all the pairs in ascending order according to country calling code. 723 sort(country_calling_code_to_region_code_map_->begin(), 724 country_calling_code_to_region_code_map_->end(), 725 OrderByFirst()); 726 } 727 728 PhoneNumberUtil::~PhoneNumberUtil() { 729 STLDeleteContainerPairSecondPointers( 730 country_calling_code_to_region_code_map_->begin(), 731 country_calling_code_to_region_code_map_->end()); 732 } 733 734 void PhoneNumberUtil::GetSupportedRegions(set<string>* regions) const { 735 DCHECK(regions); 736 for (map<string, PhoneMetadata>::const_iterator it = 737 region_to_metadata_map_->begin(); it != region_to_metadata_map_->end(); 738 ++it) { 739 regions->insert(it->first); 740 } 741 } 742 743 // Public wrapper function to get a PhoneNumberUtil instance with the default 744 // metadata file. 745 // static 746 PhoneNumberUtil* PhoneNumberUtil::GetInstance() { 747 return Singleton<PhoneNumberUtil>::GetInstance(); 748 } 749 750 const string& PhoneNumberUtil::GetExtnPatternsForMatching() const { 751 return reg_exps_->extn_patterns_for_matching_; 752 } 753 754 bool PhoneNumberUtil::StartsWithPlusCharsPattern(const string& number) 755 const { 756 const scoped_ptr<RegExpInput> number_string_piece( 757 reg_exps_->regexp_factory_->CreateInput(number)); 758 return reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get()); 759 } 760 761 bool PhoneNumberUtil::ContainsOnlyValidDigits(const string& s) const { 762 return reg_exps_->digits_pattern_->FullMatch(s); 763 } 764 765 void PhoneNumberUtil::TrimUnwantedEndChars(string* number) const { 766 DCHECK(number); 767 UnicodeText number_as_unicode; 768 number_as_unicode.PointToUTF8(number->data(), number->size()); 769 char current_char[5]; 770 int len; 771 UnicodeText::const_reverse_iterator reverse_it(number_as_unicode.end()); 772 for (; reverse_it.base() != number_as_unicode.begin(); ++reverse_it) { 773 len = reverse_it.get_utf8(current_char); 774 current_char[len] = '\0'; 775 if (!reg_exps_->unwanted_end_char_pattern_->FullMatch(current_char)) { 776 break; 777 } 778 } 779 780 number->assign(UnicodeText::UTF8Substring(number_as_unicode.begin(), 781 reverse_it.base())); 782 } 783 784 bool PhoneNumberUtil::IsFormatEligibleForAsYouTypeFormatter( 785 const string& format) const { 786 // A pattern that is used to determine if a numberFormat under 787 // availableFormats is eligible to be used by the AYTF. It is eligible when 788 // the format element under numberFormat contains groups of the dollar sign 789 // followed by a single digit, separated by valid phone number punctuation. 790 // This prevents invalid punctuation (such as the star sign in Israeli star 791 // numbers) getting into the output of the AYTF. 792 const RegExp& eligible_format_pattern = reg_exps_->regexp_cache_->GetRegExp( 793 StrCat("[", kValidPunctuation, "]*", "(\\$\\d", "[", 794 kValidPunctuation, "]*)+")); 795 return eligible_format_pattern.FullMatch(format); 796 } 797 798 bool PhoneNumberUtil::FormattingRuleHasFirstGroupOnly( 799 const string& national_prefix_formatting_rule) const { 800 // A pattern that is used to determine if the national prefix formatting rule 801 // has the first group only, i.e., does not start with the national prefix. 802 // Note that the pattern explicitly allows for unbalanced parentheses. 803 const RegExp& first_group_only_prefix_pattern = 804 reg_exps_->regexp_cache_->GetRegExp("\\(?\\$1\\)?"); 805 return national_prefix_formatting_rule.empty() || 806 first_group_only_prefix_pattern.FullMatch( 807 national_prefix_formatting_rule); 808 } 809 810 void PhoneNumberUtil::GetNddPrefixForRegion(const string& region_code, 811 bool strip_non_digits, 812 string* national_prefix) const { 813 DCHECK(national_prefix); 814 const PhoneMetadata* metadata = GetMetadataForRegion(region_code); 815 if (!metadata) { 816 LOG(WARNING) << "Invalid or unknown region code (" << region_code 817 << ") provided."; 818 return; 819 } 820 national_prefix->assign(metadata->national_prefix()); 821 if (strip_non_digits) { 822 // Note: if any other non-numeric symbols are ever used in national 823 // prefixes, these would have to be removed here as well. 824 strrmm(national_prefix, "~"); 825 } 826 } 827 828 bool PhoneNumberUtil::IsValidRegionCode(const string& region_code) const { 829 return (region_to_metadata_map_->find(region_code) != 830 region_to_metadata_map_->end()); 831 } 832 833 bool PhoneNumberUtil::HasValidCountryCallingCode( 834 int country_calling_code) const { 835 // Create an IntRegionsPair with the country_code passed in, and use it to 836 // locate the pair with the same country_code in the sorted vector. 837 IntRegionsPair target_pair; 838 target_pair.first = country_calling_code; 839 return (binary_search(country_calling_code_to_region_code_map_->begin(), 840 country_calling_code_to_region_code_map_->end(), 841 target_pair, OrderByFirst())); 842 } 843 844 // Returns a pointer to the phone metadata for the appropriate region or NULL 845 // if the region code is invalid or unknown. 846 const PhoneMetadata* PhoneNumberUtil::GetMetadataForRegion( 847 const string& region_code) const { 848 map<string, PhoneMetadata>::const_iterator it = 849 region_to_metadata_map_->find(region_code); 850 if (it != region_to_metadata_map_->end()) { 851 return &it->second; 852 } 853 return NULL; 854 } 855 856 const PhoneMetadata* PhoneNumberUtil::GetMetadataForNonGeographicalRegion( 857 int country_calling_code) const { 858 map<int, PhoneMetadata>::const_iterator it = 859 country_code_to_non_geographical_metadata_map_->find( 860 country_calling_code); 861 if (it != country_code_to_non_geographical_metadata_map_->end()) { 862 return &it->second; 863 } 864 return NULL; 865 } 866 867 void PhoneNumberUtil::Format(const PhoneNumber& number, 868 PhoneNumberFormat number_format, 869 string* formatted_number) const { 870 DCHECK(formatted_number); 871 if (number.national_number() == 0) { 872 const string& raw_input = number.raw_input(); 873 if (!raw_input.empty()) { 874 // Unparseable numbers that kept their raw input just use that. 875 // This is the only case where a number can be formatted as E164 without a 876 // leading '+' symbol (but the original number wasn't parseable anyway). 877 // TODO: Consider removing the 'if' above so that unparseable 878 // strings without raw input format to the empty string instead of "+00". 879 formatted_number->assign(raw_input); 880 return; 881 } 882 } 883 int country_calling_code = number.country_code(); 884 string national_significant_number; 885 GetNationalSignificantNumber(number, &national_significant_number); 886 if (number_format == E164) { 887 // Early exit for E164 case (even if the country calling code is invalid) 888 // since no formatting of the national number needs to be applied. 889 // Extensions are not formatted. 890 formatted_number->assign(national_significant_number); 891 PrefixNumberWithCountryCallingCode(country_calling_code, E164, 892 formatted_number); 893 return; 894 } 895 if (!HasValidCountryCallingCode(country_calling_code)) { 896 formatted_number->assign(national_significant_number); 897 return; 898 } 899 // Note here that all NANPA formatting rules are contained by US, so we use 900 // that to format NANPA numbers. The same applies to Russian Fed regions - 901 // rules are contained by Russia. French Indian Ocean country rules are 902 // contained by Runion. 903 string region_code; 904 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); 905 // Metadata cannot be NULL because the country calling code is valid (which 906 // means that the region code cannot be ZZ and must be one of our supported 907 // region codes). 908 const PhoneMetadata* metadata = 909 GetMetadataForRegionOrCallingCode(country_calling_code, region_code); 910 FormatNsn(national_significant_number, *metadata, number_format, 911 formatted_number); 912 MaybeAppendFormattedExtension(number, *metadata, number_format, 913 formatted_number); 914 PrefixNumberWithCountryCallingCode(country_calling_code, number_format, 915 formatted_number); 916 } 917 918 void PhoneNumberUtil::FormatByPattern( 919 const PhoneNumber& number, 920 PhoneNumberFormat number_format, 921 const RepeatedPtrField<NumberFormat>& user_defined_formats, 922 string* formatted_number) const { 923 DCHECK(formatted_number); 924 int country_calling_code = number.country_code(); 925 // Note GetRegionCodeForCountryCode() is used because formatting information 926 // for regions which share a country calling code is contained by only one 927 // region for performance reasons. For example, for NANPA regions it will be 928 // contained in the metadata for US. 929 string national_significant_number; 930 GetNationalSignificantNumber(number, &national_significant_number); 931 if (!HasValidCountryCallingCode(country_calling_code)) { 932 formatted_number->assign(national_significant_number); 933 return; 934 } 935 string region_code; 936 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); 937 // Metadata cannot be NULL because the country calling code is valid. 938 const PhoneMetadata* metadata = 939 GetMetadataForRegionOrCallingCode(country_calling_code, region_code); 940 const NumberFormat* formatting_pattern = 941 ChooseFormattingPatternForNumber(user_defined_formats, 942 national_significant_number); 943 if (!formatting_pattern) { 944 // If no pattern above is matched, we format the number as a whole. 945 formatted_number->assign(national_significant_number); 946 } else { 947 NumberFormat num_format_copy; 948 // Before we do a replacement of the national prefix pattern $NP with the 949 // national prefix, we need to copy the rule so that subsequent replacements 950 // for different numbers have the appropriate national prefix. 951 num_format_copy.MergeFrom(*formatting_pattern); 952 string national_prefix_formatting_rule( 953 formatting_pattern->national_prefix_formatting_rule()); 954 if (!national_prefix_formatting_rule.empty()) { 955 const string& national_prefix = metadata->national_prefix(); 956 if (!national_prefix.empty()) { 957 // Replace $NP with national prefix and $FG with the first group ($1). 958 GlobalReplaceSubstring("$NP", national_prefix, 959 &national_prefix_formatting_rule); 960 GlobalReplaceSubstring("$FG", "$1", 961 &national_prefix_formatting_rule); 962 num_format_copy.set_national_prefix_formatting_rule( 963 national_prefix_formatting_rule); 964 } else { 965 // We don't want to have a rule for how to format the national prefix if 966 // there isn't one. 967 num_format_copy.clear_national_prefix_formatting_rule(); 968 } 969 } 970 FormatNsnUsingPattern(national_significant_number, num_format_copy, 971 number_format, formatted_number); 972 } 973 MaybeAppendFormattedExtension(number, *metadata, NATIONAL, formatted_number); 974 PrefixNumberWithCountryCallingCode(country_calling_code, number_format, 975 formatted_number); 976 } 977 978 void PhoneNumberUtil::FormatNationalNumberWithCarrierCode( 979 const PhoneNumber& number, 980 const string& carrier_code, 981 string* formatted_number) const { 982 int country_calling_code = number.country_code(); 983 string national_significant_number; 984 GetNationalSignificantNumber(number, &national_significant_number); 985 if (!HasValidCountryCallingCode(country_calling_code)) { 986 formatted_number->assign(national_significant_number); 987 return; 988 } 989 990 // Note GetRegionCodeForCountryCode() is used because formatting information 991 // for regions which share a country calling code is contained by only one 992 // region for performance reasons. For example, for NANPA regions it will be 993 // contained in the metadata for US. 994 string region_code; 995 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); 996 // Metadata cannot be NULL because the country calling code is valid. 997 const PhoneMetadata* metadata = 998 GetMetadataForRegionOrCallingCode(country_calling_code, region_code); 999 FormatNsnWithCarrier(national_significant_number, *metadata, NATIONAL, 1000 carrier_code, formatted_number); 1001 MaybeAppendFormattedExtension(number, *metadata, NATIONAL, formatted_number); 1002 PrefixNumberWithCountryCallingCode(country_calling_code, NATIONAL, 1003 formatted_number); 1004 } 1005 1006 const PhoneMetadata* PhoneNumberUtil::GetMetadataForRegionOrCallingCode( 1007 int country_calling_code, const string& region_code) const { 1008 return kRegionCodeForNonGeoEntity == region_code 1009 ? GetMetadataForNonGeographicalRegion(country_calling_code) 1010 : GetMetadataForRegion(region_code); 1011 } 1012 1013 void PhoneNumberUtil::FormatNationalNumberWithPreferredCarrierCode( 1014 const PhoneNumber& number, 1015 const string& fallback_carrier_code, 1016 string* formatted_number) const { 1017 FormatNationalNumberWithCarrierCode( 1018 number, 1019 number.has_preferred_domestic_carrier_code() 1020 ? number.preferred_domestic_carrier_code() 1021 : fallback_carrier_code, 1022 formatted_number); 1023 } 1024 1025 void PhoneNumberUtil::FormatNumberForMobileDialing( 1026 const PhoneNumber& number, 1027 const string& calling_from, 1028 bool with_formatting, 1029 string* formatted_number) const { 1030 int country_calling_code = number.country_code(); 1031 if (!HasValidCountryCallingCode(country_calling_code)) { 1032 formatted_number->assign(number.has_raw_input() ? number.raw_input() : ""); 1033 return; 1034 } 1035 1036 formatted_number->assign(""); 1037 // Clear the extension, as that part cannot normally be dialed together with 1038 // the main number. 1039 PhoneNumber number_no_extension(number); 1040 number_no_extension.clear_extension(); 1041 string region_code; 1042 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); 1043 if (calling_from == region_code) { 1044 PhoneNumberType number_type = GetNumberType(number_no_extension); 1045 bool is_fixed_line_or_mobile = 1046 (number_type == FIXED_LINE) || (number_type == MOBILE) || 1047 (number_type == FIXED_LINE_OR_MOBILE); 1048 // Carrier codes may be needed in some countries. We handle this here. 1049 if ((region_code == "CO") && (number_type == FIXED_LINE)) { 1050 FormatNationalNumberWithCarrierCode( 1051 number_no_extension, kColombiaMobileToFixedLinePrefix, 1052 formatted_number); 1053 } else if ((region_code == "BR") && (is_fixed_line_or_mobile)) { 1054 if (number_no_extension.has_preferred_domestic_carrier_code()) { 1055 FormatNationalNumberWithPreferredCarrierCode(number_no_extension, "", 1056 formatted_number); 1057 } else { 1058 // Brazilian fixed line and mobile numbers need to be dialed with a 1059 // carrier code when called within Brazil. Without that, most of the 1060 // carriers won't connect the call. Because of that, we return an empty 1061 // string here. 1062 formatted_number->assign(""); 1063 } 1064 } else if (region_code == "HU") { 1065 // The national format for HU numbers doesn't contain the national prefix, 1066 // because that is how numbers are normally written down. However, the 1067 // national prefix is obligatory when dialing from a mobile phone. As a 1068 // result, we add it back here. 1069 Format(number_no_extension, NATIONAL, formatted_number); 1070 string hu_national_prefix; 1071 GetNddPrefixForRegion(region_code, true /* strip non-digits */, 1072 &hu_national_prefix); 1073 formatted_number->assign( 1074 StrCat(hu_national_prefix, " ", *formatted_number)); 1075 } else { 1076 // For NANPA countries, non-geographical countries, Mexican and Chilean 1077 // fixed line and mobile numbers, we output international format for 1078 // numbers that can be dialed internationally as that always works. 1079 if ((country_calling_code == kNanpaCountryCode || 1080 region_code == kRegionCodeForNonGeoEntity || 1081 // MX fixed line and mobile numbers should always be formatted in 1082 // international format, even when dialed within MX. For national 1083 // format to work, a carrier code needs to be used, and the correct 1084 // carrier code depends on if the caller and callee are from the same 1085 // local area. It is trickier to get that to work correctly than 1086 // using international format, which is tested to work fine on all 1087 // carriers. 1088 // CL fixed line numbers need the national prefix when dialing in the 1089 // national format, but don't have it when used for display. The 1090 // reverse is true for mobile numbers. As a result, we output them in 1091 // the international format to make it work. 1092 ((region_code == "MX" || region_code == "CL") && 1093 is_fixed_line_or_mobile)) && 1094 CanBeInternationallyDialled(number_no_extension)) { 1095 Format(number_no_extension, INTERNATIONAL, formatted_number); 1096 } else { 1097 Format(number_no_extension, NATIONAL, formatted_number); 1098 } 1099 } 1100 } else if (CanBeInternationallyDialled(number_no_extension)) { 1101 with_formatting 1102 ? Format(number_no_extension, INTERNATIONAL, formatted_number) 1103 : Format(number_no_extension, E164, formatted_number); 1104 return; 1105 } 1106 if (!with_formatting) { 1107 NormalizeDiallableCharsOnly(formatted_number); 1108 } 1109 } 1110 1111 void PhoneNumberUtil::FormatOutOfCountryCallingNumber( 1112 const PhoneNumber& number, 1113 const string& calling_from, 1114 string* formatted_number) const { 1115 DCHECK(formatted_number); 1116 if (!IsValidRegionCode(calling_from)) { 1117 LOG(WARNING) << "Trying to format number from invalid region " 1118 << calling_from 1119 << ". International formatting applied."; 1120 Format(number, INTERNATIONAL, formatted_number); 1121 return; 1122 } 1123 int country_code = number.country_code(); 1124 string national_significant_number; 1125 GetNationalSignificantNumber(number, &national_significant_number); 1126 if (!HasValidCountryCallingCode(country_code)) { 1127 formatted_number->assign(national_significant_number); 1128 return; 1129 } 1130 if (country_code == kNanpaCountryCode) { 1131 if (IsNANPACountry(calling_from)) { 1132 // For NANPA regions, return the national format for these regions but 1133 // prefix it with the country calling code. 1134 Format(number, NATIONAL, formatted_number); 1135 formatted_number->insert(0, StrCat(country_code, " ")); 1136 return; 1137 } 1138 } else if (country_code == GetCountryCodeForValidRegion(calling_from)) { 1139 // If neither region is a NANPA region, then we check to see if the 1140 // country calling code of the number and the country calling code of the 1141 // region we are calling from are the same. 1142 // For regions that share a country calling code, the country calling code 1143 // need not be dialled. This also applies when dialling within a region, so 1144 // this if clause covers both these cases. 1145 // Technically this is the case for dialling from la Runion to other 1146 // overseas departments of France (French Guiana, Martinique, Guadeloupe), 1147 // but not vice versa - so we don't cover this edge case for now and for 1148 // those cases return the version including country calling code. 1149 // Details here: 1150 // http://www.petitfute.com/voyage/225-info-pratiques-reunion 1151 Format(number, NATIONAL, formatted_number); 1152 return; 1153 } 1154 // Metadata cannot be NULL because we checked 'IsValidRegionCode()' above. 1155 const PhoneMetadata* metadata_calling_from = 1156 GetMetadataForRegion(calling_from); 1157 const string& international_prefix = 1158 metadata_calling_from->international_prefix(); 1159 1160 // For regions that have multiple international prefixes, the international 1161 // format of the number is returned, unless there is a preferred international 1162 // prefix. 1163 const string international_prefix_for_formatting( 1164 reg_exps_->unique_international_prefix_->FullMatch(international_prefix) 1165 ? international_prefix 1166 : metadata_calling_from->preferred_international_prefix()); 1167 1168 string region_code; 1169 GetRegionCodeForCountryCode(country_code, ®ion_code); 1170 // Metadata cannot be NULL because the country_code is valid. 1171 const PhoneMetadata* metadata_for_region = 1172 GetMetadataForRegionOrCallingCode(country_code, region_code); 1173 FormatNsn(national_significant_number, *metadata_for_region, INTERNATIONAL, 1174 formatted_number); 1175 MaybeAppendFormattedExtension(number, *metadata_for_region, INTERNATIONAL, 1176 formatted_number); 1177 if (!international_prefix_for_formatting.empty()) { 1178 formatted_number->insert( 1179 0, StrCat(international_prefix_for_formatting, " ", country_code, " ")); 1180 } else { 1181 PrefixNumberWithCountryCallingCode(country_code, INTERNATIONAL, 1182 formatted_number); 1183 } 1184 } 1185 1186 void PhoneNumberUtil::FormatInOriginalFormat(const PhoneNumber& number, 1187 const string& region_calling_from, 1188 string* formatted_number) const { 1189 DCHECK(formatted_number); 1190 1191 if (number.has_raw_input() && 1192 (HasUnexpectedItalianLeadingZero(number) || 1193 !HasFormattingPatternForNumber(number))) { 1194 // We check if we have the formatting pattern because without that, we might 1195 // format the number as a group without national prefix. 1196 formatted_number->assign(number.raw_input()); 1197 return; 1198 } 1199 if (!number.has_country_code_source()) { 1200 Format(number, NATIONAL, formatted_number); 1201 return; 1202 } 1203 switch (number.country_code_source()) { 1204 case PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN: 1205 Format(number, INTERNATIONAL, formatted_number); 1206 break; 1207 case PhoneNumber::FROM_NUMBER_WITH_IDD: 1208 FormatOutOfCountryCallingNumber(number, region_calling_from, 1209 formatted_number); 1210 break; 1211 case PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN: 1212 Format(number, INTERNATIONAL, formatted_number); 1213 formatted_number->erase(formatted_number->begin()); 1214 break; 1215 case PhoneNumber::FROM_DEFAULT_COUNTRY: 1216 // Fall-through to default case. 1217 default: 1218 string region_code; 1219 GetRegionCodeForCountryCode(number.country_code(), ®ion_code); 1220 // We strip non-digits from the NDD here, and from the raw input later, so 1221 // that we can compare them easily. 1222 string national_prefix; 1223 GetNddPrefixForRegion(region_code, true /* strip non-digits */, 1224 &national_prefix); 1225 if (national_prefix.empty()) { 1226 // If the region doesn't have a national prefix at all, we can safely 1227 // return the national format without worrying about a national prefix 1228 // being added. 1229 Format(number, NATIONAL, formatted_number); 1230 break; 1231 } 1232 // Otherwise, we check if the original number was entered with a national 1233 // prefix. 1234 if (RawInputContainsNationalPrefix(number.raw_input(), national_prefix, 1235 region_code)) { 1236 // If so, we can safely return the national format. 1237 Format(number, NATIONAL, formatted_number); 1238 break; 1239 } 1240 // Metadata cannot be NULL here because GetNddPrefixForRegion() (above) 1241 // leaves the prefix empty if there is no metadata for the region. 1242 const PhoneMetadata* metadata = GetMetadataForRegion(region_code); 1243 string national_number; 1244 GetNationalSignificantNumber(number, &national_number); 1245 // This shouldn't be NULL, because we have checked that above with 1246 // HasFormattingPatternForNumber. 1247 const NumberFormat* format_rule = 1248 ChooseFormattingPatternForNumber(metadata->number_format(), 1249 national_number); 1250 // The format rule could still be NULL here if the national number was 0 1251 // and there was no raw input (this should not be possible for numbers 1252 // generated by the phonenumber library as they would also not have a 1253 // country calling code and we would have exited earlier). 1254 if (!format_rule) { 1255 Format(number, NATIONAL, formatted_number); 1256 break; 1257 } 1258 // When the format we apply to this number doesn't contain national 1259 // prefix, we can just return the national format. 1260 // TODO: Refactor the code below with the code in 1261 // IsNationalPrefixPresentIfRequired. 1262 string candidate_national_prefix_rule( 1263 format_rule->national_prefix_formatting_rule()); 1264 // We assume that the first-group symbol will never be _before_ the 1265 // national prefix. 1266 if (!candidate_national_prefix_rule.empty()) { 1267 candidate_national_prefix_rule.erase( 1268 candidate_national_prefix_rule.find("$1")); 1269 NormalizeDigitsOnly(&candidate_national_prefix_rule); 1270 } 1271 if (candidate_national_prefix_rule.empty()) { 1272 // National prefix not used when formatting this number. 1273 Format(number, NATIONAL, formatted_number); 1274 break; 1275 } 1276 // Otherwise, we need to remove the national prefix from our output. 1277 RepeatedPtrField<NumberFormat> number_formats; 1278 NumberFormat* number_format = number_formats.Add(); 1279 number_format->MergeFrom(*format_rule); 1280 number_format->clear_national_prefix_formatting_rule(); 1281 FormatByPattern(number, NATIONAL, number_formats, formatted_number); 1282 break; 1283 } 1284 // If no digit is inserted/removed/modified as a result of our formatting, we 1285 // return the formatted phone number; otherwise we return the raw input the 1286 // user entered. 1287 if (!formatted_number->empty() && !number.raw_input().empty()) { 1288 string normalized_formatted_number(*formatted_number); 1289 NormalizeDiallableCharsOnly(&normalized_formatted_number); 1290 string normalized_raw_input(number.raw_input()); 1291 NormalizeDiallableCharsOnly(&normalized_raw_input); 1292 if (normalized_formatted_number != normalized_raw_input) { 1293 formatted_number->assign(number.raw_input()); 1294 } 1295 } 1296 } 1297 1298 // Check if raw_input, which is assumed to be in the national format, has a 1299 // national prefix. The national prefix is assumed to be in digits-only form. 1300 bool PhoneNumberUtil::RawInputContainsNationalPrefix( 1301 const string& raw_input, 1302 const string& national_prefix, 1303 const string& region_code) const { 1304 string normalized_national_number(raw_input); 1305 NormalizeDigitsOnly(&normalized_national_number); 1306 if (HasPrefixString(normalized_national_number, national_prefix)) { 1307 // Some Japanese numbers (e.g. 00777123) might be mistaken to contain 1308 // the national prefix when written without it (e.g. 0777123) if we just 1309 // do prefix matching. To tackle that, we check the validity of the 1310 // number if the assumed national prefix is removed (777123 won't be 1311 // valid in Japan). 1312 PhoneNumber number_without_national_prefix; 1313 if (Parse(normalized_national_number.substr(national_prefix.length()), 1314 region_code, &number_without_national_prefix) 1315 == NO_PARSING_ERROR) { 1316 return IsValidNumber(number_without_national_prefix); 1317 } 1318 } 1319 return false; 1320 } 1321 1322 bool PhoneNumberUtil::HasUnexpectedItalianLeadingZero( 1323 const PhoneNumber& number) const { 1324 return number.has_italian_leading_zero() && 1325 !IsLeadingZeroPossible(number.country_code()); 1326 } 1327 1328 bool PhoneNumberUtil::HasFormattingPatternForNumber( 1329 const PhoneNumber& number) const { 1330 int country_calling_code = number.country_code(); 1331 string region_code; 1332 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); 1333 const PhoneMetadata* metadata = 1334 GetMetadataForRegionOrCallingCode(country_calling_code, region_code); 1335 if (!metadata) { 1336 return false; 1337 } 1338 string national_number; 1339 GetNationalSignificantNumber(number, &national_number); 1340 const NumberFormat* format_rule = 1341 ChooseFormattingPatternForNumber(metadata->number_format(), 1342 national_number); 1343 return format_rule; 1344 } 1345 1346 void PhoneNumberUtil::FormatOutOfCountryKeepingAlphaChars( 1347 const PhoneNumber& number, 1348 const string& calling_from, 1349 string* formatted_number) const { 1350 // If there is no raw input, then we can't keep alpha characters because there 1351 // aren't any. In this case, we return FormatOutOfCountryCallingNumber. 1352 if (number.raw_input().empty()) { 1353 FormatOutOfCountryCallingNumber(number, calling_from, formatted_number); 1354 return; 1355 } 1356 int country_code = number.country_code(); 1357 if (!HasValidCountryCallingCode(country_code)) { 1358 formatted_number->assign(number.raw_input()); 1359 return; 1360 } 1361 // Strip any prefix such as country calling code, IDD, that was present. We do 1362 // this by comparing the number in raw_input with the parsed number. 1363 string raw_input_copy(number.raw_input()); 1364 // Normalize punctuation. We retain number grouping symbols such as " " only. 1365 NormalizeHelper(reg_exps_->all_plus_number_grouping_symbols_, true, 1366 &raw_input_copy); 1367 // Now we trim everything before the first three digits in the parsed number. 1368 // We choose three because all valid alpha numbers have 3 digits at the start 1369 // - if it does not, then we don't trim anything at all. Similarly, if the 1370 // national number was less than three digits, we don't trim anything at all. 1371 string national_number; 1372 GetNationalSignificantNumber(number, &national_number); 1373 if (national_number.length() > 3) { 1374 size_t first_national_number_digit = 1375 raw_input_copy.find(national_number.substr(0, 3)); 1376 if (first_national_number_digit != string::npos) { 1377 raw_input_copy = raw_input_copy.substr(first_national_number_digit); 1378 } 1379 } 1380 const PhoneMetadata* metadata = GetMetadataForRegion(calling_from); 1381 if (country_code == kNanpaCountryCode) { 1382 if (IsNANPACountry(calling_from)) { 1383 StrAppend(formatted_number, country_code, " ", raw_input_copy); 1384 return; 1385 } 1386 } else if (metadata && 1387 country_code == GetCountryCodeForValidRegion(calling_from)) { 1388 const NumberFormat* formatting_pattern = 1389 ChooseFormattingPatternForNumber(metadata->number_format(), 1390 national_number); 1391 if (!formatting_pattern) { 1392 // If no pattern above is matched, we format the original input. 1393 formatted_number->assign(raw_input_copy); 1394 return; 1395 } 1396 NumberFormat new_format; 1397 new_format.MergeFrom(*formatting_pattern); 1398 // The first group is the first group of digits that the user wrote 1399 // together. 1400 new_format.set_pattern("(\\d+)(.*)"); 1401 // Here we just concatenate them back together after the national prefix 1402 // has been fixed. 1403 new_format.set_format("$1$2"); 1404 // Now we format using this pattern instead of the default pattern, but 1405 // with the national prefix prefixed if necessary. 1406 // This will not work in the cases where the pattern (and not the 1407 // leading digits) decide whether a national prefix needs to be used, since 1408 // we have overridden the pattern to match anything, but that is not the 1409 // case in the metadata to date. 1410 FormatNsnUsingPattern(raw_input_copy, new_format, NATIONAL, 1411 formatted_number); 1412 return; 1413 } 1414 1415 string international_prefix_for_formatting; 1416 // If an unsupported region-calling-from is entered, or a country with 1417 // multiple international prefixes, the international format of the number is 1418 // returned, unless there is a preferred international prefix. 1419 if (metadata) { 1420 const string& international_prefix = metadata->international_prefix(); 1421 international_prefix_for_formatting = 1422 reg_exps_->unique_international_prefix_->FullMatch(international_prefix) 1423 ? international_prefix 1424 : metadata->preferred_international_prefix(); 1425 } 1426 if (!international_prefix_for_formatting.empty()) { 1427 StrAppend(formatted_number, international_prefix_for_formatting, " ", 1428 country_code, " ", raw_input_copy); 1429 } else { 1430 // Invalid region entered as country-calling-from (so no metadata was found 1431 // for it) or the region chosen has multiple international dialling 1432 // prefixes. 1433 LOG(WARNING) << "Trying to format number from invalid region " 1434 << calling_from 1435 << ". International formatting applied."; 1436 formatted_number->assign(raw_input_copy); 1437 PrefixNumberWithCountryCallingCode(country_code, INTERNATIONAL, 1438 formatted_number); 1439 } 1440 } 1441 1442 const NumberFormat* PhoneNumberUtil::ChooseFormattingPatternForNumber( 1443 const RepeatedPtrField<NumberFormat>& available_formats, 1444 const string& national_number) const { 1445 for (RepeatedPtrField<NumberFormat>::const_iterator 1446 it = available_formats.begin(); it != available_formats.end(); ++it) { 1447 int size = it->leading_digits_pattern_size(); 1448 if (size > 0) { 1449 const scoped_ptr<RegExpInput> number_copy( 1450 reg_exps_->regexp_factory_->CreateInput(national_number)); 1451 // We always use the last leading_digits_pattern, as it is the most 1452 // detailed. 1453 if (!reg_exps_->regexp_cache_->GetRegExp( 1454 it->leading_digits_pattern(size - 1)).Consume( 1455 number_copy.get())) { 1456 continue; 1457 } 1458 } 1459 const RegExp& pattern_to_match( 1460 reg_exps_->regexp_cache_->GetRegExp(it->pattern())); 1461 if (pattern_to_match.FullMatch(national_number)) { 1462 return &(*it); 1463 } 1464 } 1465 return NULL; 1466 } 1467 1468 // Note that carrier_code is optional - if an empty string, no carrier code 1469 // replacement will take place. 1470 void PhoneNumberUtil::FormatNsnUsingPatternWithCarrier( 1471 const string& national_number, 1472 const NumberFormat& formatting_pattern, 1473 PhoneNumberUtil::PhoneNumberFormat number_format, 1474 const string& carrier_code, 1475 string* formatted_number) const { 1476 DCHECK(formatted_number); 1477 string number_format_rule(formatting_pattern.format()); 1478 if (number_format == PhoneNumberUtil::NATIONAL && 1479 carrier_code.length() > 0 && 1480 formatting_pattern.domestic_carrier_code_formatting_rule().length() > 0) { 1481 // Replace the $CC in the formatting rule with the desired carrier code. 1482 string carrier_code_formatting_rule = 1483 formatting_pattern.domestic_carrier_code_formatting_rule(); 1484 reg_exps_->carrier_code_pattern_->Replace(&carrier_code_formatting_rule, 1485 carrier_code); 1486 reg_exps_->first_group_capturing_pattern_-> 1487 Replace(&number_format_rule, carrier_code_formatting_rule); 1488 } else { 1489 // Use the national prefix formatting rule instead. 1490 string national_prefix_formatting_rule = 1491 formatting_pattern.national_prefix_formatting_rule(); 1492 if (number_format == PhoneNumberUtil::NATIONAL && 1493 national_prefix_formatting_rule.length() > 0) { 1494 // Apply the national_prefix_formatting_rule as the formatting_pattern 1495 // contains only information on how the national significant number 1496 // should be formatted at this point. 1497 reg_exps_->first_group_capturing_pattern_->Replace( 1498 &number_format_rule, national_prefix_formatting_rule); 1499 } 1500 } 1501 formatted_number->assign(national_number); 1502 1503 const RegExp& pattern_to_match( 1504 reg_exps_->regexp_cache_->GetRegExp(formatting_pattern.pattern())); 1505 pattern_to_match.GlobalReplace(formatted_number, number_format_rule); 1506 1507 if (number_format == RFC3966) { 1508 // First consume any leading punctuation, if any was present. 1509 const scoped_ptr<RegExpInput> number( 1510 reg_exps_->regexp_factory_->CreateInput(*formatted_number)); 1511 if (reg_exps_->separator_pattern_->Consume(number.get())) { 1512 formatted_number->assign(number->ToString()); 1513 } 1514 // Then replace all separators with a "-". 1515 reg_exps_->separator_pattern_->GlobalReplace(formatted_number, "-"); 1516 } 1517 } 1518 1519 // Simple wrapper of FormatNsnUsingPatternWithCarrier for the common case of 1520 // no carrier code. 1521 void PhoneNumberUtil::FormatNsnUsingPattern( 1522 const string& national_number, 1523 const NumberFormat& formatting_pattern, 1524 PhoneNumberUtil::PhoneNumberFormat number_format, 1525 string* formatted_number) const { 1526 DCHECK(formatted_number); 1527 FormatNsnUsingPatternWithCarrier(national_number, formatting_pattern, 1528 number_format, "", formatted_number); 1529 } 1530 1531 void PhoneNumberUtil::FormatNsn(const string& number, 1532 const PhoneMetadata& metadata, 1533 PhoneNumberFormat number_format, 1534 string* formatted_number) const { 1535 DCHECK(formatted_number); 1536 FormatNsnWithCarrier(number, metadata, number_format, "", formatted_number); 1537 } 1538 1539 // Note in some regions, the national number can be written in two completely 1540 // different ways depending on whether it forms part of the NATIONAL format or 1541 // INTERNATIONAL format. The number_format parameter here is used to specify 1542 // which format to use for those cases. If a carrier_code is specified, this 1543 // will be inserted into the formatted string to replace $CC. 1544 void PhoneNumberUtil::FormatNsnWithCarrier(const string& number, 1545 const PhoneMetadata& metadata, 1546 PhoneNumberFormat number_format, 1547 const string& carrier_code, 1548 string* formatted_number) const { 1549 DCHECK(formatted_number); 1550 // When the intl_number_formats exists, we use that to format national number 1551 // for the INTERNATIONAL format instead of using the number_formats. 1552 const RepeatedPtrField<NumberFormat> available_formats = 1553 (metadata.intl_number_format_size() == 0 || number_format == NATIONAL) 1554 ? metadata.number_format() 1555 : metadata.intl_number_format(); 1556 const NumberFormat* formatting_pattern = 1557 ChooseFormattingPatternForNumber(available_formats, number); 1558 if (!formatting_pattern) { 1559 formatted_number->assign(number); 1560 } else { 1561 FormatNsnUsingPatternWithCarrier(number, *formatting_pattern, number_format, 1562 carrier_code, formatted_number); 1563 } 1564 } 1565 1566 // Appends the formatted extension of a phone number, if the phone number had an 1567 // extension specified. 1568 void PhoneNumberUtil::MaybeAppendFormattedExtension( 1569 const PhoneNumber& number, 1570 const PhoneMetadata& metadata, 1571 PhoneNumberFormat number_format, 1572 string* formatted_number) const { 1573 DCHECK(formatted_number); 1574 if (number.has_extension() && number.extension().length() > 0) { 1575 if (number_format == RFC3966) { 1576 StrAppend(formatted_number, kRfc3966ExtnPrefix, number.extension()); 1577 } else { 1578 if (metadata.has_preferred_extn_prefix()) { 1579 StrAppend(formatted_number, metadata.preferred_extn_prefix(), 1580 number.extension()); 1581 } else { 1582 StrAppend(formatted_number, kDefaultExtnPrefix, number.extension()); 1583 } 1584 } 1585 } 1586 } 1587 1588 bool PhoneNumberUtil::IsNANPACountry(const string& region_code) const { 1589 return nanpa_regions_->find(region_code) != nanpa_regions_->end(); 1590 } 1591 1592 // Returns the region codes that matches the specific country calling code. In 1593 // the case of no region code being found, region_codes will be left empty. 1594 void PhoneNumberUtil::GetRegionCodesForCountryCallingCode( 1595 int country_calling_code, 1596 list<string>* region_codes) const { 1597 DCHECK(region_codes); 1598 // Create a IntRegionsPair with the country_code passed in, and use it to 1599 // locate the pair with the same country_code in the sorted vector. 1600 IntRegionsPair target_pair; 1601 target_pair.first = country_calling_code; 1602 typedef vector<IntRegionsPair>::const_iterator ConstIterator; 1603 pair<ConstIterator, ConstIterator> range = equal_range( 1604 country_calling_code_to_region_code_map_->begin(), 1605 country_calling_code_to_region_code_map_->end(), 1606 target_pair, OrderByFirst()); 1607 if (range.first != range.second) { 1608 region_codes->insert(region_codes->begin(), 1609 range.first->second->begin(), 1610 range.first->second->end()); 1611 } 1612 } 1613 1614 // Returns the region code that matches the specific country calling code. In 1615 // the case of no region code being found, the unknown region code will be 1616 // returned. 1617 void PhoneNumberUtil::GetRegionCodeForCountryCode( 1618 int country_calling_code, 1619 string* region_code) const { 1620 DCHECK(region_code); 1621 list<string> region_codes; 1622 1623 GetRegionCodesForCountryCallingCode(country_calling_code, ®ion_codes); 1624 *region_code = (region_codes.size() > 0) ? 1625 region_codes.front() : RegionCode::GetUnknown(); 1626 } 1627 1628 void PhoneNumberUtil::GetRegionCodeForNumber(const PhoneNumber& number, 1629 string* region_code) const { 1630 DCHECK(region_code); 1631 int country_calling_code = number.country_code(); 1632 list<string> region_codes; 1633 GetRegionCodesForCountryCallingCode(country_calling_code, ®ion_codes); 1634 if (region_codes.size() == 0) { 1635 string number_string; 1636 GetNationalSignificantNumber(number, &number_string); 1637 LOG(WARNING) << "Missing/invalid country calling code (" 1638 << country_calling_code 1639 << ") for number " << number_string; 1640 *region_code = RegionCode::GetUnknown(); 1641 return; 1642 } 1643 if (region_codes.size() == 1) { 1644 *region_code = region_codes.front(); 1645 } else { 1646 GetRegionCodeForNumberFromRegionList(number, region_codes, region_code); 1647 } 1648 } 1649 1650 void PhoneNumberUtil::GetRegionCodeForNumberFromRegionList( 1651 const PhoneNumber& number, const list<string>& region_codes, 1652 string* region_code) const { 1653 DCHECK(region_code); 1654 string national_number; 1655 GetNationalSignificantNumber(number, &national_number); 1656 for (list<string>::const_iterator it = region_codes.begin(); 1657 it != region_codes.end(); ++it) { 1658 // Metadata cannot be NULL because the region codes come from the country 1659 // calling code map. 1660 const PhoneMetadata* metadata = GetMetadataForRegion(*it); 1661 if (metadata->has_leading_digits()) { 1662 const scoped_ptr<RegExpInput> number( 1663 reg_exps_->regexp_factory_->CreateInput(national_number)); 1664 if (reg_exps_->regexp_cache_-> 1665 GetRegExp(metadata->leading_digits()).Consume(number.get())) { 1666 *region_code = *it; 1667 return; 1668 } 1669 } else if (GetNumberTypeHelper(national_number, *metadata, 1670 reg_exps_->regexp_cache_.get()) != UNKNOWN) { 1671 *region_code = *it; 1672 return; 1673 } 1674 } 1675 *region_code = RegionCode::GetUnknown(); 1676 } 1677 1678 int PhoneNumberUtil::GetCountryCodeForRegion(const string& region_code) const { 1679 if (!IsValidRegionCode(region_code)) { 1680 LOG(WARNING) << "Invalid or unknown region code (" << region_code 1681 << ") provided."; 1682 return 0; 1683 } 1684 return GetCountryCodeForValidRegion(region_code); 1685 } 1686 1687 int PhoneNumberUtil::GetCountryCodeForValidRegion( 1688 const string& region_code) const { 1689 const PhoneMetadata* metadata = GetMetadataForRegion(region_code); 1690 return metadata->country_code(); 1691 } 1692 1693 // Gets a valid fixed-line number for the specified region_code. Returns false 1694 // if the region was unknown or 001 (representing non-geographical regions), or 1695 // if no number exists. 1696 bool PhoneNumberUtil::GetExampleNumber(const string& region_code, 1697 PhoneNumber* number) const { 1698 DCHECK(number); 1699 return GetExampleNumberForType(region_code, FIXED_LINE, number); 1700 } 1701 1702 // Gets a valid number for the specified region_code and type. Returns false if 1703 // the country was unknown or 001 (representing non-geographical regions), or if 1704 // no number exists. 1705 bool PhoneNumberUtil::GetExampleNumberForType( 1706 const string& region_code, 1707 PhoneNumberUtil::PhoneNumberType type, 1708 PhoneNumber* number) const { 1709 DCHECK(number); 1710 if (!IsValidRegionCode(region_code)) { 1711 LOG(WARNING) << "Invalid or unknown region code (" << region_code 1712 << ") provided."; 1713 return false; 1714 } 1715 const PhoneMetadata* region_metadata = GetMetadataForRegion(region_code); 1716 const PhoneNumberDesc* desc = GetNumberDescByType(*region_metadata, type); 1717 if (desc && desc->has_example_number()) { 1718 ErrorType success = Parse(desc->example_number(), region_code, number); 1719 if (success == NO_PARSING_ERROR) { 1720 return true; 1721 } else { 1722 LOG(ERROR) << "Error parsing example number (" 1723 << static_cast<int>(success) << ")"; 1724 } 1725 } 1726 return false; 1727 } 1728 1729 bool PhoneNumberUtil::GetExampleNumberForNonGeoEntity( 1730 int country_calling_code, PhoneNumber* number) const { 1731 DCHECK(number); 1732 const PhoneMetadata* metadata = 1733 GetMetadataForNonGeographicalRegion(country_calling_code); 1734 if (metadata) { 1735 const PhoneNumberDesc& desc = metadata->general_desc(); 1736 if (desc.has_example_number()) { 1737 ErrorType success = Parse(StrCat(kPlusSign, 1738 SimpleItoa(country_calling_code), 1739 desc.example_number()), 1740 RegionCode::ZZ(), number); 1741 if (success == NO_PARSING_ERROR) { 1742 return true; 1743 } else { 1744 LOG(ERROR) << "Error parsing example number (" 1745 << static_cast<int>(success) << ")"; 1746 } 1747 } 1748 } else { 1749 LOG(WARNING) << "Invalid or unknown country calling code provided: " 1750 << country_calling_code; 1751 } 1752 return false; 1753 } 1754 1755 PhoneNumberUtil::ErrorType PhoneNumberUtil::Parse(const string& number_to_parse, 1756 const string& default_region, 1757 PhoneNumber* number) const { 1758 DCHECK(number); 1759 return ParseHelper(number_to_parse, default_region, false, true, number); 1760 } 1761 1762 PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseAndKeepRawInput( 1763 const string& number_to_parse, 1764 const string& default_region, 1765 PhoneNumber* number) const { 1766 DCHECK(number); 1767 return ParseHelper(number_to_parse, default_region, true, true, number); 1768 } 1769 1770 // Checks to see that the region code used is valid, or if it is not valid, that 1771 // the number to parse starts with a + symbol so that we can attempt to infer 1772 // the country from the number. Returns false if it cannot use the region 1773 // provided and the region cannot be inferred. 1774 bool PhoneNumberUtil::CheckRegionForParsing( 1775 const string& number_to_parse, 1776 const string& default_region) const { 1777 if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) { 1778 const scoped_ptr<RegExpInput> number( 1779 reg_exps_->regexp_factory_->CreateInput(number_to_parse)); 1780 if (!reg_exps_->plus_chars_pattern_->Consume(number.get())) { 1781 return false; 1782 } 1783 } 1784 return true; 1785 } 1786 1787 // Converts number_to_parse to a form that we can parse and write it to 1788 // national_number if it is written in RFC3966; otherwise extract a possible 1789 // number out of it and write to national_number. 1790 void PhoneNumberUtil::BuildNationalNumberForParsing( 1791 const string& number_to_parse, string* national_number) const { 1792 size_t index_of_phone_context = number_to_parse.find(kRfc3966PhoneContext); 1793 if (index_of_phone_context != string::npos) { 1794 int phone_context_start = 1795 index_of_phone_context + strlen(kRfc3966PhoneContext); 1796 // If the phone context contains a phone number prefix, we need to capture 1797 // it, whereas domains will be ignored. 1798 if (number_to_parse.at(phone_context_start) == kPlusSign[0]) { 1799 // Additional parameters might follow the phone context. If so, we will 1800 // remove them here because the parameters after phone context are not 1801 // important for parsing the phone number. 1802 size_t phone_context_end = number_to_parse.find(';', phone_context_start); 1803 if (phone_context_end != string::npos) { 1804 StrAppend( 1805 national_number, number_to_parse.substr( 1806 phone_context_start, phone_context_end - phone_context_start)); 1807 } else { 1808 StrAppend(national_number, number_to_parse.substr(phone_context_start)); 1809 } 1810 } 1811 1812 // Now append everything between the "tel:" prefix and the phone-context. 1813 // This should include the national number, an optional extension or 1814 // isdn-subaddress component. 1815 int end_of_rfc_prefix = 1816 number_to_parse.find(kRfc3966Prefix) + strlen(kRfc3966Prefix); 1817 StrAppend( 1818 national_number, 1819 number_to_parse.substr(end_of_rfc_prefix, 1820 index_of_phone_context - end_of_rfc_prefix)); 1821 } else { 1822 // Extract a possible number from the string passed in (this strips leading 1823 // characters that could not be the start of a phone number.) 1824 ExtractPossibleNumber(number_to_parse, national_number); 1825 } 1826 1827 // Delete the isdn-subaddress and everything after it if it is present. Note 1828 // extension won't appear at the same time with isdn-subaddress according to 1829 // paragraph 5.3 of the RFC3966 spec. 1830 size_t index_of_isdn = national_number->find(kRfc3966IsdnSubaddress); 1831 if (index_of_isdn != string::npos) { 1832 national_number->erase(index_of_isdn); 1833 } 1834 // If both phone context and isdn-subaddress are absent but other parameters 1835 // are present, the parameters are left in nationalNumber. This is because 1836 // we are concerned about deleting content from a potential number string 1837 // when there is no strong evidence that the number is actually written in 1838 // RFC3966. 1839 } 1840 1841 PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseHelper( 1842 const string& number_to_parse, 1843 const string& default_region, 1844 bool keep_raw_input, 1845 bool check_region, 1846 PhoneNumber* phone_number) const { 1847 DCHECK(phone_number); 1848 1849 string national_number; 1850 BuildNationalNumberForParsing(number_to_parse, &national_number); 1851 1852 if (!IsViablePhoneNumber(national_number)) { 1853 VLOG(2) << "The string supplied did not seem to be a phone number."; 1854 return NOT_A_NUMBER; 1855 } 1856 1857 if (check_region && 1858 !CheckRegionForParsing(national_number, default_region)) { 1859 VLOG(1) << "Missing or invalid default country."; 1860 return INVALID_COUNTRY_CODE_ERROR; 1861 } 1862 PhoneNumber temp_number; 1863 if (keep_raw_input) { 1864 temp_number.set_raw_input(number_to_parse); 1865 } 1866 // Attempt to parse extension first, since it doesn't require country-specific 1867 // data and we want to have the non-normalised number here. 1868 string extension; 1869 MaybeStripExtension(&national_number, &extension); 1870 if (!extension.empty()) { 1871 temp_number.set_extension(extension); 1872 } 1873 const PhoneMetadata* country_metadata = GetMetadataForRegion(default_region); 1874 // Check to see if the number is given in international format so we know 1875 // whether this number is from the default country or not. 1876 string normalized_national_number(national_number); 1877 ErrorType country_code_error = 1878 MaybeExtractCountryCode(country_metadata, keep_raw_input, 1879 &normalized_national_number, &temp_number); 1880 if (country_code_error != NO_PARSING_ERROR) { 1881 const scoped_ptr<RegExpInput> number_string_piece( 1882 reg_exps_->regexp_factory_->CreateInput(national_number)); 1883 if ((country_code_error == INVALID_COUNTRY_CODE_ERROR) && 1884 (reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get()))) { 1885 normalized_national_number.assign(number_string_piece->ToString()); 1886 // Strip the plus-char, and try again. 1887 MaybeExtractCountryCode(country_metadata, 1888 keep_raw_input, 1889 &normalized_national_number, 1890 &temp_number); 1891 if (temp_number.country_code() == 0) { 1892 return INVALID_COUNTRY_CODE_ERROR; 1893 } 1894 } else { 1895 return country_code_error; 1896 } 1897 } 1898 int country_code = temp_number.country_code(); 1899 if (country_code != 0) { 1900 string phone_number_region; 1901 GetRegionCodeForCountryCode(country_code, &phone_number_region); 1902 if (phone_number_region != default_region) { 1903 country_metadata = 1904 GetMetadataForRegionOrCallingCode(country_code, phone_number_region); 1905 } 1906 } else if (country_metadata) { 1907 // If no extracted country calling code, use the region supplied instead. 1908 // Note that the national number was already normalized by 1909 // MaybeExtractCountryCode. 1910 country_code = country_metadata->country_code(); 1911 } 1912 if (normalized_national_number.length() < kMinLengthForNsn) { 1913 VLOG(2) << "The string supplied is too short to be a phone number."; 1914 return TOO_SHORT_NSN; 1915 } 1916 if (country_metadata) { 1917 string* carrier_code = keep_raw_input ? 1918 temp_number.mutable_preferred_domestic_carrier_code() : NULL; 1919 MaybeStripNationalPrefixAndCarrierCode(*country_metadata, 1920 &normalized_national_number, 1921 carrier_code); 1922 } 1923 size_t normalized_national_number_length = 1924 normalized_national_number.length(); 1925 if (normalized_national_number_length < kMinLengthForNsn) { 1926 VLOG(2) << "The string supplied is too short to be a phone number."; 1927 return TOO_SHORT_NSN; 1928 } 1929 if (normalized_national_number_length > kMaxLengthForNsn) { 1930 VLOG(2) << "The string supplied is too long to be a phone number."; 1931 return TOO_LONG_NSN; 1932 } 1933 temp_number.set_country_code(country_code); 1934 if (normalized_national_number[0] == '0') { 1935 temp_number.set_italian_leading_zero(true); 1936 } 1937 uint64 number_as_int; 1938 safe_strtou64(normalized_national_number, &number_as_int); 1939 temp_number.set_national_number(number_as_int); 1940 phone_number->MergeFrom(temp_number); 1941 return NO_PARSING_ERROR; 1942 } 1943 1944 // Attempts to extract a possible number from the string passed in. This 1945 // currently strips all leading characters that could not be used to start a 1946 // phone number. Characters that can be used to start a phone number are 1947 // defined in the valid_start_char_pattern. If none of these characters are 1948 // found in the number passed in, an empty string is returned. This function 1949 // also attempts to strip off any alternative extensions or endings if two or 1950 // more are present, such as in the case of: (530) 583-6985 x302/x2303. The 1951 // second extension here makes this actually two phone numbers, (530) 583-6985 1952 // x302 and (530) 583-6985 x2303. We remove the second extension so that the 1953 // first number is parsed correctly. 1954 void PhoneNumberUtil::ExtractPossibleNumber(const string& number, 1955 string* extracted_number) const { 1956 DCHECK(extracted_number); 1957 1958 UnicodeText number_as_unicode; 1959 number_as_unicode.PointToUTF8(number.data(), number.size()); 1960 char current_char[5]; 1961 int len; 1962 UnicodeText::const_iterator it; 1963 for (it = number_as_unicode.begin(); it != number_as_unicode.end(); ++it) { 1964 len = it.get_utf8(current_char); 1965 current_char[len] = '\0'; 1966 if (reg_exps_->valid_start_char_pattern_->FullMatch(current_char)) { 1967 break; 1968 } 1969 } 1970 1971 if (it == number_as_unicode.end()) { 1972 // No valid start character was found. extracted_number should be set to 1973 // empty string. 1974 extracted_number->assign(""); 1975 return; 1976 } 1977 1978 extracted_number->assign( 1979 UnicodeText::UTF8Substring(it, number_as_unicode.end())); 1980 TrimUnwantedEndChars(extracted_number); 1981 if (extracted_number->length() == 0) { 1982 return; 1983 } 1984 1985 VLOG(3) << "After stripping starting and trailing characters, left with: " 1986 << *extracted_number; 1987 1988 // Now remove any extra numbers at the end. 1989 reg_exps_->capture_up_to_second_number_start_pattern_-> 1990 PartialMatch(*extracted_number, extracted_number); 1991 } 1992 1993 bool PhoneNumberUtil::IsPossibleNumber(const PhoneNumber& number) const { 1994 return IsPossibleNumberWithReason(number) == IS_POSSIBLE; 1995 } 1996 1997 bool PhoneNumberUtil::IsPossibleNumberForString( 1998 const string& number, 1999 const string& region_dialing_from) const { 2000 PhoneNumber number_proto; 2001 if (Parse(number, region_dialing_from, &number_proto) == NO_PARSING_ERROR) { 2002 return IsPossibleNumber(number_proto); 2003 } else { 2004 return false; 2005 } 2006 } 2007 2008 PhoneNumberUtil::ValidationResult PhoneNumberUtil::IsPossibleNumberWithReason( 2009 const PhoneNumber& number) const { 2010 string national_number; 2011 GetNationalSignificantNumber(number, &national_number); 2012 int country_code = number.country_code(); 2013 // Note: For Russian Fed and NANPA numbers, we just use the rules from the 2014 // default region (US or Russia) since the GetRegionCodeForNumber will not 2015 // work if the number is possible but not valid. This would need to be 2016 // revisited if the possible number pattern ever differed between various 2017 // regions within those plans. 2018 if (!HasValidCountryCallingCode(country_code)) { 2019 return INVALID_COUNTRY_CODE; 2020 } 2021 string region_code; 2022 GetRegionCodeForCountryCode(country_code, ®ion_code); 2023 // Metadata cannot be NULL because the country calling code is valid. 2024 const PhoneMetadata* metadata = 2025 GetMetadataForRegionOrCallingCode(country_code, region_code); 2026 const PhoneNumberDesc& general_num_desc = metadata->general_desc(); 2027 // Handling case of numbers with no metadata. 2028 if (!general_num_desc.has_national_number_pattern()) { 2029 size_t number_length = national_number.length(); 2030 if (number_length < kMinLengthForNsn) { 2031 return TOO_SHORT; 2032 } else if (number_length > kMaxLengthForNsn) { 2033 return TOO_LONG; 2034 } else { 2035 return IS_POSSIBLE; 2036 } 2037 } 2038 const RegExp& possible_number_pattern = reg_exps_->regexp_cache_->GetRegExp( 2039 StrCat("(", general_num_desc.possible_number_pattern(), ")")); 2040 return TestNumberLengthAgainstPattern(possible_number_pattern, 2041 national_number); 2042 } 2043 2044 bool PhoneNumberUtil::TruncateTooLongNumber(PhoneNumber* number) const { 2045 if (IsValidNumber(*number)) { 2046 return true; 2047 } 2048 PhoneNumber number_copy(*number); 2049 uint64 national_number = number->national_number(); 2050 do { 2051 national_number /= 10; 2052 number_copy.set_national_number(national_number); 2053 if (IsPossibleNumberWithReason(number_copy) == TOO_SHORT || 2054 national_number == 0) { 2055 return false; 2056 } 2057 } while (!IsValidNumber(number_copy)); 2058 number->set_national_number(national_number); 2059 return true; 2060 } 2061 2062 PhoneNumberUtil::PhoneNumberType PhoneNumberUtil::GetNumberType( 2063 const PhoneNumber& number) const { 2064 string region_code; 2065 GetRegionCodeForNumber(number, ®ion_code); 2066 const PhoneMetadata* metadata = 2067 GetMetadataForRegionOrCallingCode(number.country_code(), region_code); 2068 if (!metadata) { 2069 return UNKNOWN; 2070 } 2071 string national_significant_number; 2072 GetNationalSignificantNumber(number, &national_significant_number); 2073 return GetNumberTypeHelper(national_significant_number, 2074 *metadata, 2075 reg_exps_->regexp_cache_.get()); 2076 } 2077 2078 bool PhoneNumberUtil::IsValidNumber(const PhoneNumber& number) const { 2079 string region_code; 2080 GetRegionCodeForNumber(number, ®ion_code); 2081 return IsValidNumberForRegion(number, region_code); 2082 } 2083 2084 bool PhoneNumberUtil::IsValidNumberForRegion(const PhoneNumber& number, 2085 const string& region_code) const { 2086 int country_code = number.country_code(); 2087 const PhoneMetadata* metadata = 2088 GetMetadataForRegionOrCallingCode(country_code, region_code); 2089 if (!metadata || 2090 ((kRegionCodeForNonGeoEntity != region_code) && 2091 country_code != GetCountryCodeForValidRegion(region_code))) { 2092 // Either the region code was invalid, or the country calling code for this 2093 // number does not match that of the region code. 2094 return false; 2095 } 2096 const PhoneNumberDesc& general_desc = metadata->general_desc(); 2097 string national_number; 2098 GetNationalSignificantNumber(number, &national_number); 2099 2100 // For regions where we don't have metadata for PhoneNumberDesc, we treat 2101 // any number passed in as a valid number if its national significant number 2102 // is between the minimum and maximum lengths defined by ITU for a national 2103 // significant number. 2104 if (!general_desc.has_national_number_pattern()) { 2105 VLOG(3) << "Validating number with incomplete metadata."; 2106 size_t number_length = national_number.length(); 2107 return number_length > kMinLengthForNsn && 2108 number_length <= kMaxLengthForNsn; 2109 } 2110 return GetNumberTypeHelper(national_number, *metadata, 2111 reg_exps_->regexp_cache_.get()) != UNKNOWN; 2112 } 2113 2114 bool PhoneNumberUtil::IsNumberGeographical( 2115 const PhoneNumber& phone_number) const { 2116 PhoneNumberType number_type = GetNumberType(phone_number); 2117 // TODO: Include mobile phone numbers from countries like 2118 // Indonesia, which has some mobile numbers that are geographical. 2119 return number_type == PhoneNumberUtil::FIXED_LINE || 2120 number_type == PhoneNumberUtil::FIXED_LINE_OR_MOBILE; 2121 } 2122 2123 bool PhoneNumberUtil::IsLeadingZeroPossible(int country_calling_code) const { 2124 string region_code; 2125 GetRegionCodeForCountryCode(country_calling_code, ®ion_code); 2126 const PhoneMetadata* main_metadata_for_calling_code = 2127 GetMetadataForRegionOrCallingCode(country_calling_code, region_code); 2128 if (!main_metadata_for_calling_code) return false; 2129 return main_metadata_for_calling_code->leading_zero_possible(); 2130 } 2131 2132 void PhoneNumberUtil::GetNationalSignificantNumber( 2133 const PhoneNumber& number, 2134 string* national_number) const { 2135 DCHECK(national_number); 2136 // If a leading zero has been set, we prefix this now. Note this is not a 2137 // national prefix. 2138 StrAppend(national_number, number.italian_leading_zero() ? "0" : ""); 2139 StrAppend(national_number, number.national_number()); 2140 } 2141 2142 int PhoneNumberUtil::GetLengthOfGeographicalAreaCode( 2143 const PhoneNumber& number) const { 2144 string region_code; 2145 GetRegionCodeForNumber(number, ®ion_code); 2146 const PhoneMetadata* metadata = GetMetadataForRegion(region_code); 2147 if (!metadata) { 2148 return 0; 2149 } 2150 // If a country doesn't use a national prefix, and this number doesn't have an 2151 // Italian leading zero, we assume it is a closed dialling plan with no area 2152 // codes. 2153 if (!metadata->has_national_prefix() && !number.italian_leading_zero()) { 2154 return 0; 2155 } 2156 2157 if (!IsNumberGeographical(number)) { 2158 return 0; 2159 } 2160 2161 return GetLengthOfNationalDestinationCode(number); 2162 } 2163 2164 int PhoneNumberUtil::GetLengthOfNationalDestinationCode( 2165 const PhoneNumber& number) const { 2166 PhoneNumber copied_proto(number); 2167 if (number.has_extension()) { 2168 // Clear the extension so it's not included when formatting. 2169 copied_proto.clear_extension(); 2170 } 2171 2172 string formatted_number; 2173 Format(copied_proto, INTERNATIONAL, &formatted_number); 2174 const scoped_ptr<RegExpInput> i18n_number( 2175 reg_exps_->regexp_factory_->CreateInput(formatted_number)); 2176 string digit_group; 2177 string ndc; 2178 string third_group; 2179 for (int i = 0; i < 3; ++i) { 2180 if (!reg_exps_->capturing_ascii_digits_pattern_->FindAndConsume( 2181 i18n_number.get(), &digit_group)) { 2182 // We should find at least three groups. 2183 return 0; 2184 } 2185 if (i == 1) { 2186 ndc = digit_group; 2187 } else if (i == 2) { 2188 third_group = digit_group; 2189 } 2190 } 2191 2192 if (GetNumberType(number) == MOBILE) { 2193 // For example Argentinian mobile numbers, when formatted in the 2194 // international format, are in the form of +54 9 NDC XXXX.... As a result, 2195 // we take the length of the third group (NDC) and add the length of the 2196 // mobile token, which also forms part of the national significant number. 2197 // This assumes that the mobile token is always formatted separately from 2198 // the rest of the phone number. 2199 string mobile_token; 2200 GetCountryMobileToken(number.country_code(), &mobile_token); 2201 if (!mobile_token.empty()) { 2202 return third_group.size() + mobile_token.size(); 2203 } 2204 } 2205 return ndc.size(); 2206 } 2207 2208 void PhoneNumberUtil::GetCountryMobileToken(int country_calling_code, 2209 string* mobile_token) const { 2210 DCHECK(mobile_token); 2211 map<int, char>::iterator it = reg_exps_->mobile_token_mappings_.find( 2212 country_calling_code); 2213 if (it != reg_exps_->mobile_token_mappings_.end()) { 2214 *mobile_token = it->second; 2215 } else { 2216 mobile_token->assign(""); 2217 } 2218 } 2219 2220 void PhoneNumberUtil::NormalizeDigitsOnly(string* number) const { 2221 DCHECK(number); 2222 const RegExp& non_digits_pattern = reg_exps_->regexp_cache_->GetRegExp( 2223 StrCat("[^", kDigits, "]")); 2224 // Delete everything that isn't valid digits. 2225 non_digits_pattern.GlobalReplace(number, ""); 2226 // Normalize all decimal digits to ASCII digits. 2227 number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number)); 2228 } 2229 2230 void PhoneNumberUtil::NormalizeDiallableCharsOnly(string* number) const { 2231 DCHECK(number); 2232 NormalizeHelper(reg_exps_->diallable_char_mappings_, 2233 true /* remove non matches */, number); 2234 } 2235 2236 bool PhoneNumberUtil::IsAlphaNumber(const string& number) const { 2237 if (!IsViablePhoneNumber(number)) { 2238 // Number is too short, or doesn't match the basic phone number pattern. 2239 return false; 2240 } 2241 // Copy the number, since we are going to try and strip the extension from it. 2242 string number_copy(number); 2243 string extension; 2244 MaybeStripExtension(&number_copy, &extension); 2245 return reg_exps_->valid_alpha_phone_pattern_->FullMatch(number_copy); 2246 } 2247 2248 void PhoneNumberUtil::ConvertAlphaCharactersInNumber(string* number) const { 2249 DCHECK(number); 2250 NormalizeHelper(reg_exps_->alpha_phone_mappings_, false, number); 2251 } 2252 2253 // Normalizes a string of characters representing a phone number. This performs 2254 // the following conversions: 2255 // - Punctuation is stripped. 2256 // For ALPHA/VANITY numbers: 2257 // - Letters are converted to their numeric representation on a telephone 2258 // keypad. The keypad used here is the one defined in ITU Recommendation 2259 // E.161. This is only done if there are 3 or more letters in the number, to 2260 // lessen the risk that such letters are typos. 2261 // For other numbers: 2262 // - Wide-ascii digits are converted to normal ASCII (European) digits. 2263 // - Arabic-Indic numerals are converted to European numerals. 2264 // - Spurious alpha characters are stripped. 2265 void PhoneNumberUtil::Normalize(string* number) const { 2266 DCHECK(number); 2267 if (reg_exps_->valid_alpha_phone_pattern_->PartialMatch(*number)) { 2268 NormalizeHelper(reg_exps_->alpha_phone_mappings_, true, number); 2269 } 2270 NormalizeDigitsOnly(number); 2271 } 2272 2273 // Checks to see if the string of characters could possibly be a phone number at 2274 // all. At the moment, checks to see that the string begins with at least 3 2275 // digits, ignoring any punctuation commonly found in phone numbers. This 2276 // method does not require the number to be normalized in advance - but does 2277 // assume that leading non-number symbols have been removed, such as by the 2278 // method ExtractPossibleNumber. 2279 bool PhoneNumberUtil::IsViablePhoneNumber(const string& number) const { 2280 if (number.length() < kMinLengthForNsn) { 2281 VLOG(2) << "Number too short to be viable:" << number; 2282 return false; 2283 } 2284 return reg_exps_->valid_phone_number_pattern_->FullMatch(number); 2285 } 2286 2287 // Strips the IDD from the start of the number if present. Helper function used 2288 // by MaybeStripInternationalPrefixAndNormalize. 2289 bool PhoneNumberUtil::ParsePrefixAsIdd(const RegExp& idd_pattern, 2290 string* number) const { 2291 DCHECK(number); 2292 const scoped_ptr<RegExpInput> number_copy( 2293 reg_exps_->regexp_factory_->CreateInput(*number)); 2294 // First attempt to strip the idd_pattern at the start, if present. We make a 2295 // copy so that we can revert to the original string if necessary. 2296 if (idd_pattern.Consume(number_copy.get())) { 2297 // Only strip this if the first digit after the match is not a 0, since 2298 // country calling codes cannot begin with 0. 2299 string extracted_digit; 2300 if (reg_exps_->capturing_digit_pattern_->PartialMatch( 2301 number_copy->ToString(), &extracted_digit)) { 2302 NormalizeDigitsOnly(&extracted_digit); 2303 if (extracted_digit == "0") { 2304 return false; 2305 } 2306 } 2307 number->assign(number_copy->ToString()); 2308 return true; 2309 } 2310 return false; 2311 } 2312 2313 // Strips any international prefix (such as +, 00, 011) present in the number 2314 // provided, normalizes the resulting number, and indicates if an international 2315 // prefix was present. 2316 // 2317 // possible_idd_prefix represents the international direct dialing prefix from 2318 // the region we think this number may be dialed in. 2319 // Returns true if an international dialing prefix could be removed from the 2320 // number, otherwise false if the number did not seem to be in international 2321 // format. 2322 PhoneNumber::CountryCodeSource 2323 PhoneNumberUtil::MaybeStripInternationalPrefixAndNormalize( 2324 const string& possible_idd_prefix, 2325 string* number) const { 2326 DCHECK(number); 2327 if (number->empty()) { 2328 return PhoneNumber::FROM_DEFAULT_COUNTRY; 2329 } 2330 const scoped_ptr<RegExpInput> number_string_piece( 2331 reg_exps_->regexp_factory_->CreateInput(*number)); 2332 if (reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get())) { 2333 number->assign(number_string_piece->ToString()); 2334 // Can now normalize the rest of the number since we've consumed the "+" 2335 // sign at the start. 2336 Normalize(number); 2337 return PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN; 2338 } 2339 // Attempt to parse the first digits as an international prefix. 2340 const RegExp& idd_pattern = 2341 reg_exps_->regexp_cache_->GetRegExp(possible_idd_prefix); 2342 Normalize(number); 2343 return ParsePrefixAsIdd(idd_pattern, number) 2344 ? PhoneNumber::FROM_NUMBER_WITH_IDD 2345 : PhoneNumber::FROM_DEFAULT_COUNTRY; 2346 } 2347 2348 // Strips any national prefix (such as 0, 1) present in the number provided. 2349 // The number passed in should be the normalized telephone number that we wish 2350 // to strip any national dialing prefix from. The metadata should be for the 2351 // region that we think this number is from. Returns true if a national prefix 2352 // and/or carrier code was stripped. 2353 bool PhoneNumberUtil::MaybeStripNationalPrefixAndCarrierCode( 2354 const PhoneMetadata& metadata, 2355 string* number, 2356 string* carrier_code) const { 2357 DCHECK(number); 2358 string carrier_code_temp; 2359 const string& possible_national_prefix = 2360 metadata.national_prefix_for_parsing(); 2361 if (number->empty() || possible_national_prefix.empty()) { 2362 // Early return for numbers of zero length or with no national prefix 2363 // possible. 2364 return false; 2365 } 2366 // We use two copies here since Consume modifies the phone number, and if the 2367 // first if-clause fails the number will already be changed. 2368 const scoped_ptr<RegExpInput> number_copy( 2369 reg_exps_->regexp_factory_->CreateInput(*number)); 2370 const scoped_ptr<RegExpInput> number_copy_without_transform( 2371 reg_exps_->regexp_factory_->CreateInput(*number)); 2372 string number_string_copy(*number); 2373 string captured_part_of_prefix; 2374 const RegExp& national_number_rule = reg_exps_->regexp_cache_->GetRegExp( 2375 metadata.general_desc().national_number_pattern()); 2376 // Check if the original number is viable. 2377 bool is_viable_original_number = national_number_rule.FullMatch(*number); 2378 // Attempt to parse the first digits as a national prefix. We make a 2379 // copy so that we can revert to the original string if necessary. 2380 const string& transform_rule = metadata.national_prefix_transform_rule(); 2381 const RegExp& possible_national_prefix_pattern = 2382 reg_exps_->regexp_cache_->GetRegExp(possible_national_prefix); 2383 if (!transform_rule.empty() && 2384 (possible_national_prefix_pattern.Consume( 2385 number_copy.get(), &carrier_code_temp, &captured_part_of_prefix) || 2386 possible_national_prefix_pattern.Consume( 2387 number_copy.get(), &captured_part_of_prefix)) && 2388 !captured_part_of_prefix.empty()) { 2389 // If this succeeded, then we must have had a transform rule and there must 2390 // have been some part of the prefix that we captured. 2391 // We make the transformation and check that the resultant number is still 2392 // viable. If so, replace the number and return. 2393 possible_national_prefix_pattern.Replace(&number_string_copy, 2394 transform_rule); 2395 if (is_viable_original_number && 2396 !national_number_rule.FullMatch(number_string_copy)) { 2397 return false; 2398 } 2399 number->assign(number_string_copy); 2400 if (carrier_code) { 2401 carrier_code->assign(carrier_code_temp); 2402 } 2403 } else if (possible_national_prefix_pattern.Consume( 2404 number_copy_without_transform.get(), &carrier_code_temp) || 2405 possible_national_prefix_pattern.Consume( 2406 number_copy_without_transform.get())) { 2407 VLOG(4) << "Parsed the first digits as a national prefix."; 2408 // If captured_part_of_prefix is empty, this implies nothing was captured by 2409 // the capturing groups in possible_national_prefix; therefore, no 2410 // transformation is necessary, and we just remove the national prefix. 2411 const string number_copy_as_string = 2412 number_copy_without_transform->ToString(); 2413 if (is_viable_original_number && 2414 !national_number_rule.FullMatch(number_copy_as_string)) { 2415 return false; 2416 } 2417 number->assign(number_copy_as_string); 2418 if (carrier_code) { 2419 carrier_code->assign(carrier_code_temp); 2420 } 2421 } else { 2422 return false; 2423 VLOG(4) << "The first digits did not match the national prefix."; 2424 } 2425 return true; 2426 } 2427 2428 // Strips any extension (as in, the part of the number dialled after the call is 2429 // connected, usually indicated with extn, ext, x or similar) from the end of 2430 // the number, and returns it. The number passed in should be non-normalized. 2431 bool PhoneNumberUtil::MaybeStripExtension(string* number, string* extension) 2432 const { 2433 DCHECK(number); 2434 DCHECK(extension); 2435 // There are three extension capturing groups in the regular expression. 2436 string possible_extension_one; 2437 string possible_extension_two; 2438 string possible_extension_three; 2439 string number_copy(*number); 2440 const scoped_ptr<RegExpInput> number_copy_as_regexp_input( 2441 reg_exps_->regexp_factory_->CreateInput(number_copy)); 2442 if (reg_exps_->extn_pattern_->Consume(number_copy_as_regexp_input.get(), 2443 false, 2444 &possible_extension_one, 2445 &possible_extension_two, 2446 &possible_extension_three)) { 2447 // Replace the extensions in the original string here. 2448 reg_exps_->extn_pattern_->Replace(&number_copy, ""); 2449 VLOG(4) << "Found an extension. Possible extension one: " 2450 << possible_extension_one 2451 << ". Possible extension two: " << possible_extension_two 2452 << ". Possible extension three: " << possible_extension_three 2453 << ". Remaining number: " << number_copy; 2454 // If we find a potential extension, and the number preceding this is a 2455 // viable number, we assume it is an extension. 2456 if ((!possible_extension_one.empty() || !possible_extension_two.empty() || 2457 !possible_extension_three.empty()) && 2458 IsViablePhoneNumber(number_copy)) { 2459 number->assign(number_copy); 2460 if (!possible_extension_one.empty()) { 2461 extension->assign(possible_extension_one); 2462 } else if (!possible_extension_two.empty()) { 2463 extension->assign(possible_extension_two); 2464 } else if (!possible_extension_three.empty()) { 2465 extension->assign(possible_extension_three); 2466 } 2467 return true; 2468 } 2469 } 2470 return false; 2471 } 2472 2473 // Extracts country calling code from national_number, and returns it. It 2474 // assumes that the leading plus sign or IDD has already been removed. Returns 0 2475 // if national_number doesn't start with a valid country calling code, and 2476 // leaves national_number unmodified. Assumes the national_number is at least 3 2477 // characters long. 2478 int PhoneNumberUtil::ExtractCountryCode(string* national_number) const { 2479 int potential_country_code; 2480 if (national_number->empty() || (national_number->at(0) == '0')) { 2481 // Country codes do not begin with a '0'. 2482 return 0; 2483 } 2484 for (size_t i = 1; i <= kMaxLengthCountryCode; ++i) { 2485 safe_strto32(national_number->substr(0, i), &potential_country_code); 2486 string region_code; 2487 GetRegionCodeForCountryCode(potential_country_code, ®ion_code); 2488 if (region_code != RegionCode::GetUnknown()) { 2489 national_number->erase(0, i); 2490 return potential_country_code; 2491 } 2492 } 2493 return 0; 2494 } 2495 2496 // Tries to extract a country calling code from a number. Country calling codes 2497 // are extracted in the following ways: 2498 // - by stripping the international dialing prefix of the region the person 2499 // is dialing from, if this is present in the number, and looking at the next 2500 // digits 2501 // - by stripping the '+' sign if present and then looking at the next digits 2502 // - by comparing the start of the number and the country calling code of the 2503 // default region. If the number is not considered possible for the numbering 2504 // plan of the default region initially, but starts with the country calling 2505 // code of this region, validation will be reattempted after stripping this 2506 // country calling code. If this number is considered a possible number, then 2507 // the first digits will be considered the country calling code and removed as 2508 // such. 2509 // 2510 // Returns NO_PARSING_ERROR if a country calling code was successfully 2511 // extracted or none was present, or the appropriate error otherwise, such as 2512 // if a + was present but it was not followed by a valid country calling code. 2513 // If NO_PARSING_ERROR is returned, the national_number without the country 2514 // calling code is populated, and the country_code of the phone_number passed 2515 // in is set to the country calling code if found, otherwise to 0. 2516 PhoneNumberUtil::ErrorType PhoneNumberUtil::MaybeExtractCountryCode( 2517 const PhoneMetadata* default_region_metadata, 2518 bool keep_raw_input, 2519 string* national_number, 2520 PhoneNumber* phone_number) const { 2521 DCHECK(national_number); 2522 DCHECK(phone_number); 2523 // Set the default prefix to be something that will never match if there is no 2524 // default region. 2525 string possible_country_idd_prefix = default_region_metadata 2526 ? default_region_metadata->international_prefix() 2527 : "NonMatch"; 2528 PhoneNumber::CountryCodeSource country_code_source = 2529 MaybeStripInternationalPrefixAndNormalize(possible_country_idd_prefix, 2530 national_number); 2531 if (keep_raw_input) { 2532 phone_number->set_country_code_source(country_code_source); 2533 } 2534 if (country_code_source != PhoneNumber::FROM_DEFAULT_COUNTRY) { 2535 if (national_number->length() <= kMinLengthForNsn) { 2536 VLOG(2) << "Phone number had an IDD, but after this was not " 2537 << "long enough to be a viable phone number."; 2538 return TOO_SHORT_AFTER_IDD; 2539 } 2540 int potential_country_code = ExtractCountryCode(national_number); 2541 if (potential_country_code != 0) { 2542 phone_number->set_country_code(potential_country_code); 2543 return NO_PARSING_ERROR; 2544 } 2545 // If this fails, they must be using a strange country calling code that we 2546 // don't recognize, or that doesn't exist. 2547 return INVALID_COUNTRY_CODE_ERROR; 2548 } else if (default_region_metadata) { 2549 // Check to see if the number starts with the country calling code for the 2550 // default region. If so, we remove the country calling code, and do some 2551 // checks on the validity of the number before and after. 2552 int default_country_code = default_region_metadata->country_code(); 2553 string default_country_code_string(SimpleItoa(default_country_code)); 2554 VLOG(4) << "Possible country calling code: " << default_country_code_string; 2555 string potential_national_number; 2556 if (TryStripPrefixString(*national_number, 2557 default_country_code_string, 2558 &potential_national_number)) { 2559 const PhoneNumberDesc& general_num_desc = 2560 default_region_metadata->general_desc(); 2561 const RegExp& valid_number_pattern = 2562 reg_exps_->regexp_cache_->GetRegExp( 2563 general_num_desc.national_number_pattern()); 2564 MaybeStripNationalPrefixAndCarrierCode(*default_region_metadata, 2565 &potential_national_number, 2566 NULL); 2567 VLOG(4) << "Number without country calling code prefix: " 2568 << potential_national_number; 2569 const RegExp& possible_number_pattern = 2570 reg_exps_->regexp_cache_->GetRegExp( 2571 StrCat("(", general_num_desc.possible_number_pattern(), ")")); 2572 // If the number was not valid before but is valid now, or if it was too 2573 // long before, we consider the number with the country code stripped to 2574 // be a better result and keep that instead. 2575 if ((!valid_number_pattern.FullMatch(*national_number) && 2576 valid_number_pattern.FullMatch(potential_national_number)) || 2577 TestNumberLengthAgainstPattern(possible_number_pattern, 2578 *national_number) == TOO_LONG) { 2579 national_number->assign(potential_national_number); 2580 if (keep_raw_input) { 2581 phone_number->set_country_code_source( 2582 PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN); 2583 } 2584 phone_number->set_country_code(default_country_code); 2585 return NO_PARSING_ERROR; 2586 } 2587 } 2588 } 2589 // No country calling code present. Set the country_code to 0. 2590 phone_number->set_country_code(0); 2591 return NO_PARSING_ERROR; 2592 } 2593 2594 PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatch( 2595 const PhoneNumber& first_number_in, 2596 const PhoneNumber& second_number_in) const { 2597 // Make copies of the phone number so that the numbers passed in are not 2598 // edited. 2599 PhoneNumber first_number(first_number_in); 2600 PhoneNumber second_number(second_number_in); 2601 // First clear raw_input and country_code_source and 2602 // preferred_domestic_carrier_code fields and any empty-string extensions so 2603 // that we can use the proto-buffer equality method. 2604 first_number.clear_raw_input(); 2605 first_number.clear_country_code_source(); 2606 first_number.clear_preferred_domestic_carrier_code(); 2607 second_number.clear_raw_input(); 2608 second_number.clear_country_code_source(); 2609 second_number.clear_preferred_domestic_carrier_code(); 2610 if (first_number.extension().empty()) { 2611 first_number.clear_extension(); 2612 } 2613 if (second_number.extension().empty()) { 2614 second_number.clear_extension(); 2615 } 2616 // Early exit if both had extensions and these are different. 2617 if (first_number.has_extension() && second_number.has_extension() && 2618 first_number.extension() != second_number.extension()) { 2619 return NO_MATCH; 2620 } 2621 int first_number_country_code = first_number.country_code(); 2622 int second_number_country_code = second_number.country_code(); 2623 // Both had country calling code specified. 2624 if (first_number_country_code != 0 && second_number_country_code != 0) { 2625 if (ExactlySameAs(first_number, second_number)) { 2626 return EXACT_MATCH; 2627 } else if (first_number_country_code == second_number_country_code && 2628 IsNationalNumberSuffixOfTheOther(first_number, second_number)) { 2629 // A SHORT_NSN_MATCH occurs if there is a difference because of the 2630 // presence or absence of an 'Italian leading zero', the presence or 2631 // absence of an extension, or one NSN being a shorter variant of the 2632 // other. 2633 return SHORT_NSN_MATCH; 2634 } 2635 // This is not a match. 2636 return NO_MATCH; 2637 } 2638 // Checks cases where one or both country calling codes were not specified. To 2639 // make equality checks easier, we first set the country_code fields to be 2640 // equal. 2641 first_number.set_country_code(second_number_country_code); 2642 // If all else was the same, then this is an NSN_MATCH. 2643 if (ExactlySameAs(first_number, second_number)) { 2644 return