Home | History | Annotate | Download | only in phonenumbers
      1 // Copyright (C) 2011 The Libphonenumber Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 // http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "phonenumbers/asyoutypeformatter.h"
     16 
     17 #include <cctype>
     18 #include <list>
     19 #include <string>
     20 
     21 #include <google/protobuf/message_lite.h>
     22 
     23 #include "phonenumbers/base/logging.h"
     24 #include "phonenumbers/base/memory/scoped_ptr.h"
     25 #include "phonenumbers/phonemetadata.pb.h"
     26 #include "phonenumbers/phonenumberutil.h"
     27 #include "phonenumbers/regexp_cache.h"
     28 #include "phonenumbers/regexp_factory.h"
     29 #include "phonenumbers/stringutil.h"
     30 #include "phonenumbers/unicodestring.h"
     31 
     32 namespace i18n {
     33 namespace phonenumbers {
     34 
     35 using google::protobuf::RepeatedPtrField;
     36 
     37 namespace {
     38 
     39 const char kPlusSign = '+';
     40 
     41 // A pattern that is used to match character classes in regular expressions.
     42 // An example of a character class is [1-4].
     43 const char kCharacterClassPattern[] = "\\[([^\\[\\]])*\\]";
     44 
     45 // This is the minimum length of national number accrued that is required to
     46 // trigger the formatter. The first element of the leading_digits_pattern of
     47 // each number_format contains a regular expression that matches up to this
     48 // number of digits.
     49 const size_t kMinLeadingDigitsLength = 3;
     50 
     51 // The digits that have not been entered yet will be represented by a \u2008,
     52 // the punctuation space.
     53 const char kDigitPlaceholder[] = "\xE2\x80\x88"; /* "" */
     54 
     55 // Character used when appropriate to separate a prefix, such as a long NDD or a
     56 // country calling code, from the national number.
     57 const char kSeparatorBeforeNationalNumber = ' ';
     58 
     59 // A set of characters that, if found in a national prefix formatting rules, are
     60 // an indicator to us that we should separate the national prefix from the
     61 // number when formatting.
     62 const char kNationalPrefixSeparatorsPattern[] = "[- ]";
     63 
     64 // Replaces any standalone digit in the pattern (not any inside a {} grouping)
     65 // with \d. This function replaces the standalone digit regex used in the Java
     66 // version which is currently not supported by RE2 because it uses a special
     67 // construct (?=).
     68 void ReplacePatternDigits(string* pattern) {
     69   DCHECK(pattern);
     70   string new_pattern;
     71   // This is needed since sometimes there is more than one digit in between the
     72   // curly braces.
     73   bool is_in_braces = false;
     74 
     75   for (string::const_iterator it = pattern->begin(); it != pattern->end();
     76        ++it) {
     77     const char current_char = *it;
     78 
     79     if (isdigit(current_char)) {
     80       if (is_in_braces) {
     81         new_pattern += current_char;
     82       } else {
     83         new_pattern += "\\d";
     84       }
     85     } else {
     86       new_pattern += current_char;
     87       if (current_char == '{') {
     88         is_in_braces = true;
     89       } else if (current_char == '}') {
     90         is_in_braces = false;
     91       }
     92     }
     93   }
     94   pattern->assign(new_pattern);
     95 }
     96 
     97 // Matches all the groups contained in 'input' against 'pattern'.
     98 void MatchAllGroups(const string& pattern,
     99                     const string& input,
    100                     const AbstractRegExpFactory& regexp_factory,
    101                     RegExpCache* cache,
    102                     string* group) {
    103   DCHECK(cache);
    104   DCHECK(group);
    105   string new_pattern(pattern);
    106 
    107   // Transforms pattern "(...)(...)(...)" to "(.........)".
    108   strrmm(&new_pattern, "()");
    109   new_pattern = StrCat("(", new_pattern, ")");
    110 
    111   const scoped_ptr<RegExpInput> consume_input(
    112       regexp_factory.CreateInput(input));
    113   bool status =
    114       cache->GetRegExp(new_pattern).Consume(consume_input.get(), group);
    115   DCHECK(status);
    116 }
    117 
    118 PhoneMetadata CreateEmptyMetadata() {
    119   PhoneMetadata metadata;
    120   metadata.set_international_prefix("NA");
    121   return metadata;
    122 }
    123 
    124 }  // namespace
    125 
    126 AsYouTypeFormatter::AsYouTypeFormatter(const string& region_code)
    127     : regexp_factory_(new RegExpFactory()),
    128       regexp_cache_(*regexp_factory_.get(), 64),
    129       current_output_(),
    130       formatting_template_(),
    131       current_formatting_pattern_(),
    132       accrued_input_(),
    133       accrued_input_without_formatting_(),
    134       able_to_format_(true),
    135       input_has_formatting_(false),
    136       is_complete_number_(false),
    137       is_expecting_country_code_(false),
    138       phone_util_(*PhoneNumberUtil::GetInstance()),
    139       default_country_(region_code),
    140       empty_metadata_(CreateEmptyMetadata()),
    141       default_metadata_(GetMetadataForRegion(region_code)),
    142       current_metadata_(default_metadata_),
    143       last_match_position_(0),
    144       original_position_(0),
    145       position_to_remember_(0),
    146       prefix_before_national_number_(),
    147       should_add_space_after_national_prefix_(false),
    148       national_prefix_extracted_(),
    149       national_number_(),
    150       possible_formats_() {
    151 }
    152 
    153 // The metadata needed by this class is the same for all regions sharing the
    154 // same country calling code. Therefore, we return the metadata for "main"
    155 // region for this country calling code.
    156 const PhoneMetadata* AsYouTypeFormatter::GetMetadataForRegion(
    157     const string& region_code) const {
    158   int country_calling_code = phone_util_.GetCountryCodeForRegion(region_code);
    159   string main_country;
    160   phone_util_.GetRegionCodeForCountryCode(country_calling_code, &main_country);
    161   const PhoneMetadata* const metadata =
    162       phone_util_.GetMetadataForRegion(main_country);
    163   if (metadata) {
    164     return metadata;
    165   }
    166   // Set to a default instance of the metadata. This allows us to function with
    167   // an incorrect region code, even if formatting only works for numbers
    168   // specified with "+".
    169   return &empty_metadata_;
    170 }
    171 
    172 bool AsYouTypeFormatter::MaybeCreateNewTemplate() {
    173   // When there are multiple available formats, the formatter uses the first
    174   // format where a formatting template could be created.
    175   for (list<const NumberFormat*>::const_iterator it = possible_formats_.begin();
    176        it != possible_formats_.end(); ++it) {
    177     DCHECK(*it);
    178     const NumberFormat& number_format = **it;
    179     const string& pattern = number_format.pattern();
    180     if (current_formatting_pattern_ == pattern) {
    181       return false;
    182     }
    183     if (CreateFormattingTemplate(number_format)) {
    184       current_formatting_pattern_ = pattern;
    185       SetShouldAddSpaceAfterNationalPrefix(number_format);
    186       // With a new formatting template, the matched position using the old
    187       // template needs to be reset.
    188       last_match_position_ = 0;
    189       return true;
    190     }
    191   }
    192   able_to_format_ = false;
    193   return false;
    194 }
    195 
    196 void AsYouTypeFormatter::GetAvailableFormats(
    197     const string& leading_three_digits) {
    198   const RepeatedPtrField<NumberFormat>& format_list =
    199       (is_complete_number_ &&
    200        current_metadata_->intl_number_format().size() > 0)
    201           ? current_metadata_->intl_number_format()
    202           : current_metadata_->number_format();
    203   bool national_prefix_used_by_country =
    204       current_metadata_->has_national_prefix();
    205   for (RepeatedPtrField<NumberFormat>::const_iterator it = format_list.begin();
    206        it != format_list.end(); ++it) {
    207     if (!national_prefix_used_by_country || is_complete_number_ ||
    208         it->national_prefix_optional_when_formatting() ||
    209         phone_util_.FormattingRuleHasFirstGroupOnly(
    210             it->national_prefix_formatting_rule())) {
    211       if (phone_util_.IsFormatEligibleForAsYouTypeFormatter(it->format())) {
    212         possible_formats_.push_back(&*it);
    213       }
    214     }
    215   }
    216   NarrowDownPossibleFormats(leading_three_digits);
    217 }
    218 
    219 void AsYouTypeFormatter::NarrowDownPossibleFormats(
    220     const string& leading_digits) {
    221   const int index_of_leading_digits_pattern =
    222       leading_digits.length() - kMinLeadingDigitsLength;
    223 
    224   for (list<const NumberFormat*>::iterator it = possible_formats_.begin();
    225        it != possible_formats_.end(); ) {
    226     DCHECK(*it);
    227     const NumberFormat& format = **it;
    228 
    229     if (format.leading_digits_pattern_size() >
    230         index_of_leading_digits_pattern) {
    231       const scoped_ptr<RegExpInput> input(
    232           regexp_factory_->CreateInput(leading_digits));
    233       if (!regexp_cache_.GetRegExp(format.leading_digits_pattern().Get(
    234               index_of_leading_digits_pattern)).Consume(input.get())) {
    235         it = possible_formats_.erase(it);
    236         continue;
    237       }
    238     }  // else the particular format has no more specific leadingDigitsPattern,
    239        // and it should be retained.
    240     ++it;
    241   }
    242 }
    243 
    244 void AsYouTypeFormatter::SetShouldAddSpaceAfterNationalPrefix(
    245     const NumberFormat& format) {
    246   static const scoped_ptr<const RegExp> national_prefix_separators_pattern(
    247       regexp_factory_->CreateRegExp(kNationalPrefixSeparatorsPattern));
    248   should_add_space_after_national_prefix_ =
    249       national_prefix_separators_pattern->PartialMatch(
    250           format.national_prefix_formatting_rule());
    251 }
    252 
    253 bool AsYouTypeFormatter::CreateFormattingTemplate(const NumberFormat& format) {
    254   string number_pattern = format.pattern();
    255 
    256   // The formatter doesn't format numbers when numberPattern contains "|", e.g.
    257   // (20|3)\d{4}. In those cases we quickly return.
    258   if (number_pattern.find('|') != string::npos) {
    259     return false;
    260   }
    261   // Replace anything in the form of [..] with \d.
    262   static const scoped_ptr<const RegExp> character_class_pattern(
    263       regexp_factory_->CreateRegExp(kCharacterClassPattern));
    264   character_class_pattern->GlobalReplace(&number_pattern, "\\\\d");
    265 
    266   // Replace any standalone digit (not the one in d{}) with \d.
    267   ReplacePatternDigits(&number_pattern);
    268 
    269   string number_format = format.format();
    270   formatting_template_.remove();
    271   UnicodeString temp_template;
    272   GetFormattingTemplate(number_pattern, number_format, &temp_template);
    273 
    274   if (temp_template.length() > 0) {
    275     formatting_template_.append(temp_template);
    276     return true;
    277   }
    278   return false;
    279 }
    280 
    281 void AsYouTypeFormatter::GetFormattingTemplate(
    282     const string& number_pattern,
    283     const string& number_format,
    284     UnicodeString* formatting_template) {
    285   DCHECK(formatting_template);
    286 
    287   // Creates a phone number consisting only of the digit 9 that matches the
    288   // number_pattern by applying the pattern to the longest_phone_number string.
    289   static const char longest_phone_number[] = "999999999999999";
    290   string a_phone_number;
    291 
    292   MatchAllGroups(number_pattern, longest_phone_number, *regexp_factory_,
    293                  &regexp_cache_, &a_phone_number);
    294   // No formatting template can be created if the number of digits entered so
    295   // far is longer than the maximum the current formatting rule can accommodate.
    296   if (a_phone_number.length() < national_number_.length()) {
    297     formatting_template->remove();
    298     return;
    299   }
    300   // Formats the number according to number_format.
    301   regexp_cache_.GetRegExp(number_pattern).GlobalReplace(
    302       &a_phone_number, number_format);
    303   // Replaces each digit with character kDigitPlaceholder.
    304   GlobalReplaceSubstring("9", kDigitPlaceholder, &a_phone_number);
    305   formatting_template->setTo(a_phone_number.c_str(), a_phone_number.size());
    306 }
    307 
    308 void AsYouTypeFormatter::Clear() {
    309   current_output_.clear();
    310   accrued_input_.remove();
    311   accrued_input_without_formatting_.remove();
    312   formatting_template_.remove();
    313   last_match_position_ = 0;
    314   current_formatting_pattern_.clear();
    315   prefix_before_national_number_.clear();
    316   national_prefix_extracted_.clear();
    317   national_number_.clear();
    318   able_to_format_ = true;
    319   input_has_formatting_ = false;
    320   position_to_remember_ = 0;
    321   original_position_ = 0;
    322   is_complete_number_ = false;
    323   is_expecting_country_code_ = false;
    324   possible_formats_.clear();
    325   should_add_space_after_national_prefix_ = false;
    326 
    327   if (current_metadata_ != default_metadata_) {
    328     current_metadata_ = GetMetadataForRegion(default_country_);
    329   }
    330 }
    331 
    332 const string& AsYouTypeFormatter::InputDigit(char32 next_char, string* result) {
    333   DCHECK(result);
    334 
    335   InputDigitWithOptionToRememberPosition(next_char, false, &current_output_);
    336   result->assign(current_output_);
    337   return *result;
    338 }
    339 
    340 const string& AsYouTypeFormatter::InputDigitAndRememberPosition(
    341     char32 next_char,
    342     string* result) {
    343   DCHECK(result);
    344 
    345   InputDigitWithOptionToRememberPosition(next_char, true, &current_output_);
    346   result->assign(current_output_);
    347   return *result;
    348 }
    349 
    350 void AsYouTypeFormatter::InputDigitWithOptionToRememberPosition(
    351     char32 next_char,
    352     bool remember_position,
    353     string* phone_number) {
    354   DCHECK(phone_number);
    355 
    356   accrued_input_.append(next_char);
    357   if (remember_position) {
    358     original_position_ = accrued_input_.length();
    359   }
    360   // We do formatting on-the-fly only when each character entered is either a
    361   // plus sign (accepted at the start of the number only).
    362   string next_char_string;
    363   UnicodeString(next_char).toUTF8String(next_char_string);
    364 
    365   char normalized_next_char = '\0';
    366   if (!(phone_util_.ContainsOnlyValidDigits(next_char_string) ||
    367       (accrued_input_.length() == 1 && next_char == kPlusSign))) {
    368     able_to_format_ = false;
    369     input_has_formatting_ = true;
    370   } else {
    371     normalized_next_char =
    372         NormalizeAndAccrueDigitsAndPlusSign(next_char, remember_position);
    373   }
    374   if (!able_to_format_) {
    375     // When we are unable to format because of reasons other than that
    376     // formatting chars have been entered, it can be due to really long IDDs or
    377     // NDDs. If that is the case, we might be able to do formatting again after
    378     // extracting them.
    379     if (input_has_formatting_) {
    380       phone_number->clear();
    381       accrued_input_.toUTF8String(*phone_number);
    382     } else if (AttemptToExtractIdd()) {
    383       if (AttemptToExtractCountryCode()) {
    384         AttemptToChoosePatternWithPrefixExtracted(phone_number);
    385         return;
    386       }
    387     } else if (AbleToExtractLongerNdd()) {
    388       // Add an additional space to separate long NDD and national significant
    389       // number for readability. We don't set
    390       // should_add_space_after_national_prefix_ to true, since we don't want
    391       // this to change later when we choose formatting templates.
    392       prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
    393       AttemptToChoosePatternWithPrefixExtracted(phone_number);
    394       return;
    395     }
    396     phone_number->clear();
    397     accrued_input_.toUTF8String(*phone_number);
    398     return;
    399   }
    400 
    401   // We start to attempt to format only when at least kMinLeadingDigitsLength
    402   // digits (the plus sign is counted as a digit as well for this purpose) have
    403   // been entered.
    404   switch (accrued_input_without_formatting_.length()) {
    405     case 0:
    406     case 1:
    407     case 2:
    408       phone_number->clear();
    409       accrued_input_.toUTF8String(*phone_number);
    410       return;
    411     case 3:
    412       if (AttemptToExtractIdd()) {
    413         is_expecting_country_code_ = true;
    414       } else {
    415         // No IDD or plus sign is found, might be entering in national format.
    416         RemoveNationalPrefixFromNationalNumber(&national_prefix_extracted_);
    417         AttemptToChooseFormattingPattern(phone_number);
    418         return;
    419       }
    420     default:
    421       if (is_expecting_country_code_) {
    422         if (AttemptToExtractCountryCode()) {
    423           is_expecting_country_code_ = false;
    424         }
    425         phone_number->assign(prefix_before_national_number_);
    426         phone_number->append(national_number_);
    427         return;
    428       }
    429       if (possible_formats_.size() > 0) {
    430         // The formatting pattern is already chosen.
    431         string temp_national_number;
    432         InputDigitHelper(normalized_next_char, &temp_national_number);
    433         // See if accrued digits can be formatted properly already. If not, use
    434         // the results from InputDigitHelper, which does formatting based on the
    435         // formatting pattern chosen.
    436         string formatted_number;
    437         AttemptToFormatAccruedDigits(&formatted_number);
    438         if (formatted_number.length() > 0) {
    439           phone_number->assign(formatted_number);
    440           return;
    441         }
    442         NarrowDownPossibleFormats(national_number_);
    443         if (MaybeCreateNewTemplate()) {
    444           InputAccruedNationalNumber(phone_number);
    445           return;
    446         }
    447         if (able_to_format_) {
    448           AppendNationalNumber(temp_national_number, phone_number);
    449         } else {
    450           phone_number->clear();
    451           accrued_input_.toUTF8String(*phone_number);
    452         }
    453         return;
    454       } else {
    455         AttemptToChooseFormattingPattern(phone_number);
    456       }
    457   }
    458 }
    459 
    460 void AsYouTypeFormatter::AttemptToChoosePatternWithPrefixExtracted(
    461     string* formatted_number) {
    462   able_to_format_ = true;
    463   is_expecting_country_code_ = false;
    464   possible_formats_.clear();
    465   AttemptToChooseFormattingPattern(formatted_number);
    466 }
    467 
    468 bool AsYouTypeFormatter::AbleToExtractLongerNdd() {
    469   if (national_prefix_extracted_.length() > 0) {
    470     // Put the extracted NDD back to the national number before attempting to
    471     // extract a new NDD.
    472     national_number_.insert(0, national_prefix_extracted_);
    473     // Remove the previously extracted NDD from prefixBeforeNationalNumber. We
    474     // cannot simply set it to empty string because people sometimes incorrectly
    475     // enter national prefix after the country code, e.g. +44 (0)20-1234-5678.
    476     int index_of_previous_ndd =
    477         prefix_before_national_number_.find_last_of(national_prefix_extracted_);
    478     prefix_before_national_number_.resize(index_of_previous_ndd);
    479   }
    480   string new_national_prefix;
    481   RemoveNationalPrefixFromNationalNumber(&new_national_prefix);
    482   return national_prefix_extracted_ != new_national_prefix;
    483 }
    484 
    485 void AsYouTypeFormatter::AttemptToFormatAccruedDigits(
    486     string* formatted_result) {
    487   DCHECK(formatted_result);
    488 
    489   for (list<const NumberFormat*>::const_iterator it = possible_formats_.begin();
    490        it != possible_formats_.end(); ++it) {
    491     DCHECK(*it);
    492     const NumberFormat& number_format = **it;
    493     const string& pattern = number_format.pattern();
    494 
    495     if (regexp_cache_.GetRegExp(pattern).FullMatch(national_number_)) {
    496       SetShouldAddSpaceAfterNationalPrefix(number_format);
    497 
    498       string formatted_number(national_number_);
    499       bool status = regexp_cache_.GetRegExp(pattern).GlobalReplace(
    500           &formatted_number, number_format.format());
    501       DCHECK(status);
    502 
    503       AppendNationalNumber(formatted_number, formatted_result);
    504       return;
    505     }
    506   }
    507 }
    508 
    509 int AsYouTypeFormatter::GetRememberedPosition() const {
    510   UnicodeString current_output(current_output_.c_str());
    511   if (!able_to_format_) {
    512     return ConvertUnicodeStringPosition(current_output, original_position_);
    513   }
    514   int accrued_input_index = 0;
    515   int current_output_index = 0;
    516 
    517   while (accrued_input_index < position_to_remember_ &&
    518          current_output_index < current_output.length()) {
    519     if (accrued_input_without_formatting_[accrued_input_index] ==
    520         current_output[current_output_index]) {
    521       ++accrued_input_index;
    522     }
    523     ++current_output_index;
    524   }
    525   return ConvertUnicodeStringPosition(current_output, current_output_index);
    526 }
    527 
    528 void AsYouTypeFormatter::AppendNationalNumber(const string& national_number,
    529                                               string* phone_number) const {
    530   int prefix_before_national_number_length =
    531       prefix_before_national_number_.size();
    532   if (should_add_space_after_national_prefix_ &&
    533       prefix_before_national_number_length > 0 &&
    534       prefix_before_national_number_.at(
    535           prefix_before_national_number_length - 1) !=
    536       kSeparatorBeforeNationalNumber) {
    537     // We want to add a space after the national prefix if the national prefix
    538     // formatting rule indicates that this would normally be done, with the
    539     // exception of the case where we already appended a space because the NDD
    540     // was surprisingly long.
    541     phone_number->assign(prefix_before_national_number_);
    542     phone_number->push_back(kSeparatorBeforeNationalNumber);
    543     StrAppend(phone_number, national_number);
    544   } else {
    545     phone_number->assign(
    546         StrCat(prefix_before_national_number_, national_number));
    547   }
    548 }
    549 
    550 void AsYouTypeFormatter::AttemptToChooseFormattingPattern(
    551     string* formatted_number) {
    552   DCHECK(formatted_number);
    553 
    554   if (national_number_.length() >= kMinLeadingDigitsLength) {
    555     const string leading_digits =
    556         national_number_.substr(0, kMinLeadingDigitsLength);
    557 
    558     GetAvailableFormats(leading_digits);
    559     formatted_number->clear();
    560     AttemptToFormatAccruedDigits(formatted_number);
    561     // See if the accrued digits can be formatted properly already.
    562     if (formatted_number->length() > 0) {
    563       return;
    564     }
    565     if (MaybeCreateNewTemplate()) {
    566       InputAccruedNationalNumber(formatted_number);
    567     } else {
    568       formatted_number->clear();
    569       accrued_input_.toUTF8String(*formatted_number);
    570     }
    571     return;
    572   } else {
    573     AppendNationalNumber(national_number_, formatted_number);
    574   }
    575 }
    576 
    577 void AsYouTypeFormatter::InputAccruedNationalNumber(string* number) {
    578   DCHECK(number);
    579   int length_of_national_number = national_number_.length();
    580 
    581   if (length_of_national_number > 0) {
    582     string temp_national_number;
    583 
    584     for (int i = 0; i < length_of_national_number; ++i) {
    585       temp_national_number.clear();
    586       InputDigitHelper(national_number_[i], &temp_national_number);
    587     }
    588     if (able_to_format_) {
    589       AppendNationalNumber(temp_national_number, number);
    590     } else {
    591       number->clear();
    592       accrued_input_.toUTF8String(*number);
    593     }
    594     return;
    595   } else {
    596     number->assign(prefix_before_national_number_);
    597   }
    598 }
    599 
    600 bool AsYouTypeFormatter::IsNanpaNumberWithNationalPrefix() const {
    601   // For NANPA numbers beginning with 1[2-9], treat the 1 as the national
    602   // prefix. The reason is that national significant numbers in NANPA always
    603   // start with [2-9] after the national prefix.  Numbers beginning with 1[01]
    604   // can only be short/emergency numbers, which don't need the national
    605   // prefix.
    606   return (current_metadata_->country_code() == 1) &&
    607          (national_number_[0] == '1') && (national_number_[1] != '0') &&
    608          (national_number_[1] != '1');
    609 }
    610 
    611 void AsYouTypeFormatter::RemoveNationalPrefixFromNationalNumber(
    612     string* national_prefix) {
    613   int start_of_national_number = 0;
    614 
    615   if (IsNanpaNumberWithNationalPrefix()) {
    616     start_of_national_number = 1;
    617     prefix_before_national_number_.append("1");
    618     prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
    619     is_complete_number_ = true;
    620   } else if (current_metadata_->has_national_prefix_for_parsing()) {
    621     const scoped_ptr<RegExpInput> consumed_input(
    622         regexp_factory_->CreateInput(national_number_));
    623     const RegExp& pattern = regexp_cache_.GetRegExp(
    624         current_metadata_->national_prefix_for_parsing());
    625 
    626     // Since some national prefix patterns are entirely optional, check that a
    627     // national prefix could actually be extracted.
    628     if (pattern.Consume(consumed_input.get())) {
    629       start_of_national_number =
    630           national_number_.length() - consumed_input->ToString().length();
    631       if (start_of_national_number > 0) {
    632         // When the national prefix is detected, we use international formatting
    633         // rules instead of national ones, because national formatting rules
    634         // could countain local formatting rules for numbers entered without
    635         // area code.
    636         is_complete_number_ = true;
    637         prefix_before_national_number_.append(
    638             national_number_.substr(0, start_of_national_number));
    639       }
    640     }
    641   }
    642   national_prefix->assign(national_number_, 0, start_of_national_number);
    643   national_number_.erase(0, start_of_national_number);
    644 }
    645 
    646 bool AsYouTypeFormatter::AttemptToExtractIdd() {
    647   string accrued_input_without_formatting_stdstring;
    648   accrued_input_without_formatting_
    649       .toUTF8String(accrued_input_without_formatting_stdstring);
    650   const scoped_ptr<RegExpInput> consumed_input(
    651       regexp_factory_->CreateInput(accrued_input_without_formatting_stdstring));
    652   const RegExp& international_prefix = regexp_cache_.GetRegExp(
    653       StrCat("\\", string(&kPlusSign, 1), "|",
    654              current_metadata_->international_prefix()));
    655 
    656   if (international_prefix.Consume(consumed_input.get())) {
    657     is_complete_number_ = true;
    658     const int start_of_country_code =
    659         accrued_input_without_formatting_.length() -
    660         consumed_input->ToString().length();
    661 
    662     national_number_.clear();
    663     accrued_input_without_formatting_.tempSubString(start_of_country_code)
    664         .toUTF8String(national_number_);
    665 
    666     string before_country_code;
    667     accrued_input_without_formatting_.tempSubString(0, start_of_country_code)
    668         .toUTF8String(before_country_code);
    669     prefix_before_national_number_.clear();
    670     prefix_before_national_number_.append(before_country_code);
    671 
    672     if (accrued_input_without_formatting_[0] != kPlusSign) {
    673       prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
    674     }
    675     return true;
    676   }
    677   return false;
    678 }
    679 
    680 bool AsYouTypeFormatter::AttemptToExtractCountryCode() {
    681   if (national_number_.length() == 0) {
    682     return false;
    683   }
    684   string number_without_country_code(national_number_);
    685   int country_code =
    686     phone_util_.ExtractCountryCode(&number_without_country_code);
    687   if (country_code == 0) {
    688     return false;
    689   }
    690   national_number_.assign(number_without_country_code);
    691   string new_region_code;
    692   phone_util_.GetRegionCodeForCountryCode(country_code, &new_region_code);
    693   if (PhoneNumberUtil::kRegionCodeForNonGeoEntity == new_region_code) {
    694     current_metadata_ =
    695         phone_util_.GetMetadataForNonGeographicalRegion(country_code);
    696   } else if (new_region_code != default_country_) {
    697     current_metadata_ = GetMetadataForRegion(new_region_code);
    698   }
    699   StrAppend(&prefix_before_national_number_, country_code);
    700   prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
    701 
    702   return true;
    703 }
    704 
    705 char AsYouTypeFormatter::NormalizeAndAccrueDigitsAndPlusSign(
    706     char32 next_char,
    707     bool remember_position) {
    708   char normalized_char = next_char;
    709 
    710   if (next_char == kPlusSign) {
    711     accrued_input_without_formatting_.append(next_char);
    712   } else {
    713     string number;
    714     UnicodeString(next_char).toUTF8String(number);
    715     phone_util_.NormalizeDigitsOnly(&number);
    716     accrued_input_without_formatting_.append(next_char);
    717     national_number_.append(number);
    718     normalized_char = number[0];
    719   }
    720   if (remember_position) {
    721     position_to_remember_ = accrued_input_without_formatting_.length();
    722   }
    723   return normalized_char;
    724 }
    725 
    726 void AsYouTypeFormatter::InputDigitHelper(char next_char, string* number) {
    727   DCHECK(number);
    728   number->clear();
    729   const char32 placeholder_codepoint = UnicodeString(kDigitPlaceholder)[0];
    730   int placeholder_pos = formatting_template_
    731       .tempSubString(last_match_position_).indexOf(placeholder_codepoint);
    732   if (placeholder_pos != -1) {
    733     UnicodeString temp_template = formatting_template_;
    734     placeholder_pos = temp_template.indexOf(placeholder_codepoint);
    735     temp_template.setCharAt(placeholder_pos, UnicodeString(next_char)[0]);
    736     last_match_position_ = placeholder_pos;
    737     formatting_template_.replace(0, temp_template.length(), temp_template);
    738     formatting_template_.tempSubString(0, last_match_position_ + 1)
    739         .toUTF8String(*number);
    740   } else {
    741     if (possible_formats_.size() == 1) {
    742       // More digits are entered than we could handle, and there are no other
    743       // valid patterns to try.
    744       able_to_format_ = false;
    745     }  // else, we just reset the formatting pattern.
    746     current_formatting_pattern_.clear();
    747     accrued_input_.toUTF8String(*number);
    748   }
    749 }
    750 
    751 // Returns the number of bytes contained in the given UnicodeString up to the
    752 // specified position.
    753 // static
    754 int AsYouTypeFormatter::ConvertUnicodeStringPosition(const UnicodeString& s,
    755                                                      int pos) {
    756   if (pos > s.length()) {
    757     return -1;
    758   }
    759   string substring;
    760   s.tempSubString(0, pos).toUTF8String(substring);
    761   return substring.length();
    762 }
    763 
    764 }  // namespace phonenumbers
    765 }  // namespace i18n
    766