Home | History | Annotate | Download | only in browser
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/autofill/core/browser/phone_field.h"
      6 
      7 #include "base/logging.h"
      8 #include "base/memory/scoped_ptr.h"
      9 #include "base/strings/string16.h"
     10 #include "base/strings/string_util.h"
     11 #include "base/strings/utf_string_conversions.h"
     12 #include "components/autofill/core/browser/autofill_field.h"
     13 #include "components/autofill/core/browser/autofill_regex_constants.h"
     14 #include "components/autofill/core/browser/autofill_scanner.h"
     15 #include "ui/base/l10n/l10n_util.h"
     16 
     17 namespace autofill {
     18 namespace {
     19 
     20 // This string includes all area code separators, including NoText.
     21 base::string16 GetAreaRegex() {
     22   base::string16 area_code = UTF8ToUTF16(autofill::kAreaCodeRe);
     23   area_code.append(ASCIIToUTF16("|"));  // Regexp separator.
     24   area_code.append(UTF8ToUTF16(autofill::kAreaCodeNotextRe));
     25   return area_code;
     26 }
     27 
     28 }  // namespace
     29 
     30 PhoneField::~PhoneField() {}
     31 
     32 // Phone field grammars - first matched grammar will be parsed. Grammars are
     33 // separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
     34 // parsed separately unless they are necessary parts of the match.
     35 // The following notation is used to describe the patterns:
     36 // <cc> - country code field.
     37 // <ac> - area code field.
     38 // <phone> - phone or prefix.
     39 // <suffix> - suffix.
     40 // <ext> - extension.
     41 // :N means field is limited to N characters, otherwise it is unlimited.
     42 // (pattern <field>)? means pattern is optional and matched separately.
     43 const PhoneField::Parser PhoneField::kPhoneFieldGrammars[] = {
     44   // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
     45   // (Ext: <ext>)?)?
     46   { REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0 },
     47   { REGEX_AREA, FIELD_AREA_CODE, 0 },
     48   { REGEX_PHONE, FIELD_PHONE, 0 },
     49   { REGEX_SEPARATOR, FIELD_NONE, 0 },
     50   // \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)?
     51   { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3 },
     52   { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
     53   { REGEX_PHONE, FIELD_SUFFIX, 4 },
     54   { REGEX_SEPARATOR, FIELD_NONE, 0 },
     55   // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
     56   { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
     57   { REGEX_PHONE, FIELD_AREA_CODE, 3 },
     58   { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
     59   { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4 },
     60   { REGEX_SEPARATOR, FIELD_NONE, 0 },
     61   // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
     62   { REGEX_PHONE, FIELD_COUNTRY_CODE, 3 },
     63   { REGEX_PHONE, FIELD_AREA_CODE, 3 },
     64   { REGEX_PHONE, FIELD_PHONE, 3 },
     65   { REGEX_PHONE, FIELD_SUFFIX, 4 },
     66   { REGEX_SEPARATOR, FIELD_NONE, 0 },
     67   // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
     68   { REGEX_AREA, FIELD_AREA_CODE, 0 },
     69   { REGEX_PHONE, FIELD_PHONE, 0 },
     70   { REGEX_SEPARATOR, FIELD_NONE, 0 },
     71   // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
     72   { REGEX_PHONE, FIELD_AREA_CODE, 0 },
     73   { REGEX_PHONE, FIELD_PHONE, 3 },
     74   { REGEX_PHONE, FIELD_SUFFIX, 4 },
     75   { REGEX_SEPARATOR, FIELD_NONE, 0 },
     76   // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
     77   { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
     78   { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0 },
     79   { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
     80   { REGEX_SEPARATOR, FIELD_NONE, 0 },
     81   // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
     82   { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
     83   { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0 },
     84   { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
     85   { REGEX_SEPARATOR, FIELD_NONE, 0 },
     86   // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
     87   { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
     88   { REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0 },
     89   { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
     90   { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0 },
     91   { REGEX_SEPARATOR, FIELD_NONE, 0 },
     92   // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
     93   { REGEX_PHONE, FIELD_AREA_CODE, 0 },
     94   { REGEX_PREFIX, FIELD_PHONE, 0 },
     95   { REGEX_SUFFIX, FIELD_SUFFIX, 0 },
     96   { REGEX_SEPARATOR, FIELD_NONE, 0 },
     97   // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
     98   { REGEX_PHONE, FIELD_AREA_CODE, 0 },
     99   { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
    100   { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4 },
    101   { REGEX_SEPARATOR, FIELD_NONE, 0 },
    102   // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
    103   { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
    104   { REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0 },
    105   { REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0 },
    106   { REGEX_SEPARATOR, FIELD_NONE, 0 },
    107   // Phone: <ac> - <phone> (Ext: <ext>)?
    108   { REGEX_AREA, FIELD_AREA_CODE, 0 },
    109   { REGEX_PHONE, FIELD_PHONE, 0 },
    110   { REGEX_SEPARATOR, FIELD_NONE, 0 },
    111   // Phone: <cc>:3 - <phone>:10 (Ext: <ext>)?
    112   { REGEX_PHONE, FIELD_COUNTRY_CODE, 3 },
    113   { REGEX_PHONE, FIELD_PHONE, 10 },
    114   { REGEX_SEPARATOR, FIELD_NONE, 0 },
    115   // Phone: <phone> (Ext: <ext>)?
    116   { REGEX_PHONE, FIELD_PHONE, 0 },
    117   { REGEX_SEPARATOR, FIELD_NONE, 0 },
    118 };
    119 
    120 // static
    121 FormField* PhoneField::Parse(AutofillScanner* scanner) {
    122   if (scanner->IsEnd())
    123     return NULL;
    124 
    125   scanner->SaveCursor();
    126 
    127   // The form owns the following variables, so they should not be deleted.
    128   const AutofillField* parsed_fields[FIELD_MAX];
    129 
    130   for (size_t i = 0; i < arraysize(kPhoneFieldGrammars); ++i) {
    131     memset(parsed_fields, 0, sizeof(parsed_fields));
    132     scanner->SaveCursor();
    133 
    134     // Attempt to parse according to the next grammar.
    135     for (; i < arraysize(kPhoneFieldGrammars) &&
    136          kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR; ++i) {
    137       if (!ParseFieldSpecifics(
    138               scanner,
    139               GetRegExp(kPhoneFieldGrammars[i].regex),
    140               MATCH_DEFAULT | MATCH_TELEPHONE,
    141               &parsed_fields[kPhoneFieldGrammars[i].phone_part]))
    142         break;
    143       if (kPhoneFieldGrammars[i].max_size &&
    144           (!parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length ||
    145             kPhoneFieldGrammars[i].max_size <
    146             parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length)) {
    147         break;
    148       }
    149     }
    150 
    151     if (i >= arraysize(kPhoneFieldGrammars)) {
    152       scanner->Rewind();
    153       return NULL;  // Parsing failed.
    154     }
    155     if (kPhoneFieldGrammars[i].regex == REGEX_SEPARATOR)
    156       break;  // Parsing succeeded.
    157 
    158     // Proceed to the next grammar.
    159     do {
    160       ++i;
    161     } while (i < arraysize(kPhoneFieldGrammars) &&
    162              kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR);
    163 
    164     if (i + 1 == arraysize(kPhoneFieldGrammars)) {
    165       scanner->Rewind();
    166       return NULL;  // Tried through all the possibilities - did not match.
    167     }
    168 
    169     scanner->Rewind();
    170   }
    171 
    172   if (!parsed_fields[FIELD_PHONE]) {
    173     scanner->Rewind();
    174     return NULL;
    175   }
    176 
    177   scoped_ptr<PhoneField> phone_field(new PhoneField);
    178   for (int i = 0; i < FIELD_MAX; ++i)
    179     phone_field->parsed_phone_fields_[i] = parsed_fields[i];
    180 
    181   // Look for optional fields.
    182 
    183   // Look for a third text box.
    184   if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) {
    185     if (!ParseField(scanner, UTF8ToUTF16(autofill::kPhoneSuffixRe),
    186                     &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) {
    187       ParseField(scanner, UTF8ToUTF16(autofill::kPhoneSuffixSeparatorRe),
    188                  &phone_field->parsed_phone_fields_[FIELD_SUFFIX]);
    189     }
    190   }
    191 
    192   // Now look for an extension.
    193   ParseField(scanner, UTF8ToUTF16(autofill::kPhoneExtensionRe),
    194              &phone_field->parsed_phone_fields_[FIELD_EXTENSION]);
    195 
    196   return phone_field.release();
    197 }
    198 
    199 bool PhoneField::ClassifyField(ServerFieldTypeMap* map) const {
    200   bool ok = true;
    201 
    202   DCHECK(parsed_phone_fields_[FIELD_PHONE]);  // Phone was correctly parsed.
    203 
    204   if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) ||
    205       (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) ||
    206       (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) {
    207     if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) {
    208       ok = ok && AddClassification(parsed_phone_fields_[FIELD_COUNTRY_CODE],
    209                                    PHONE_HOME_COUNTRY_CODE,
    210                                    map);
    211     }
    212 
    213     ServerFieldType field_number_type = PHONE_HOME_NUMBER;
    214     if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) {
    215       ok = ok && AddClassification(parsed_phone_fields_[FIELD_AREA_CODE],
    216                                    PHONE_HOME_CITY_CODE,
    217                                    map);
    218     } else if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) {
    219       // Only if we can find country code without city code, it means the phone
    220       // number include city code.
    221       field_number_type = PHONE_HOME_CITY_AND_NUMBER;
    222     }
    223     // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
    224     // we fill only the prefix depending on the size of the input field.
    225     ok = ok && AddClassification(parsed_phone_fields_[FIELD_PHONE],
    226                                  field_number_type,
    227                                  map);
    228     // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
    229     // we fill only the suffix depending on the size of the input field.
    230     if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) {
    231       ok = ok && AddClassification(parsed_phone_fields_[FIELD_SUFFIX],
    232                                    PHONE_HOME_NUMBER,
    233                                    map);
    234     }
    235   } else {
    236     ok = AddClassification(parsed_phone_fields_[FIELD_PHONE],
    237                            PHONE_HOME_WHOLE_NUMBER,
    238                            map);
    239   }
    240 
    241   return ok;
    242 }
    243 
    244 PhoneField::PhoneField() {
    245   memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_));
    246 }
    247 
    248 // static
    249 base::string16 PhoneField::GetRegExp(RegexType regex_id) {
    250   switch (regex_id) {
    251     case REGEX_COUNTRY:
    252       return UTF8ToUTF16(autofill::kCountryCodeRe);
    253     case REGEX_AREA:
    254       return GetAreaRegex();
    255     case REGEX_AREA_NOTEXT:
    256       return UTF8ToUTF16(autofill::kAreaCodeNotextRe);
    257     case REGEX_PHONE:
    258       return UTF8ToUTF16(autofill::kPhoneRe);
    259     case REGEX_PREFIX_SEPARATOR:
    260       return UTF8ToUTF16(autofill::kPhonePrefixSeparatorRe);
    261     case REGEX_PREFIX:
    262       return UTF8ToUTF16(autofill::kPhonePrefixRe);
    263     case REGEX_SUFFIX_SEPARATOR:
    264       return UTF8ToUTF16(autofill::kPhoneSuffixSeparatorRe);
    265     case REGEX_SUFFIX:
    266       return UTF8ToUTF16(autofill::kPhoneSuffixRe);
    267     case REGEX_EXTENSION:
    268       return UTF8ToUTF16(autofill::kPhoneExtensionRe);
    269     default:
    270       NOTREACHED();
    271       break;
    272   }
    273   return base::string16();
    274 }
    275 
    276 }  // namespace autofill
    277