Home | History | Annotate | Download | only in autofill
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/autofill/address_field.h"
      6 
      7 #include <stddef.h>
      8 
      9 #include "base/logging.h"
     10 #include "base/memory/scoped_ptr.h"
     11 #include "base/string16.h"
     12 #include "base/string_util.h"
     13 #include "base/utf_string_conversions.h"
     14 #include "chrome/browser/autofill/autofill_field.h"
     15 #include "grit/autofill_resources.h"
     16 #include "ui/base/l10n/l10n_util.h"
     17 
     18 bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const {
     19   AutofillFieldType address_company;
     20   AutofillFieldType address_line1;
     21   AutofillFieldType address_line2;
     22   AutofillFieldType address_city;
     23   AutofillFieldType address_state;
     24   AutofillFieldType address_zip;
     25   AutofillFieldType address_country;
     26 
     27   switch (type_) {
     28     case kShippingAddress:
     29      // Fall through. Autofill does not support shipping addresses.
     30     case kGenericAddress:
     31       address_company = COMPANY_NAME;
     32       address_line1 = ADDRESS_HOME_LINE1;
     33       address_line2 = ADDRESS_HOME_LINE2;
     34       address_city = ADDRESS_HOME_CITY;
     35       address_state = ADDRESS_HOME_STATE;
     36       address_zip = ADDRESS_HOME_ZIP;
     37       address_country = ADDRESS_HOME_COUNTRY;
     38       break;
     39 
     40     case kBillingAddress:
     41       address_company = COMPANY_NAME;
     42       address_line1 = ADDRESS_BILLING_LINE1;
     43       address_line2 = ADDRESS_BILLING_LINE2;
     44       address_city = ADDRESS_BILLING_CITY;
     45       address_state = ADDRESS_BILLING_STATE;
     46       address_zip = ADDRESS_BILLING_ZIP;
     47       address_country = ADDRESS_BILLING_COUNTRY;
     48       break;
     49 
     50     default:
     51       NOTREACHED();
     52       return false;
     53   }
     54 
     55   bool ok;
     56   ok = Add(field_type_map, company_, AutofillType(address_company));
     57   DCHECK(ok);
     58   ok = ok && Add(field_type_map, address1_, AutofillType(address_line1));
     59   DCHECK(ok);
     60   ok = ok && Add(field_type_map, address2_, AutofillType(address_line2));
     61   DCHECK(ok);
     62   ok = ok && Add(field_type_map, city_, AutofillType(address_city));
     63   DCHECK(ok);
     64   ok = ok && Add(field_type_map, state_, AutofillType(address_state));
     65   DCHECK(ok);
     66   ok = ok && Add(field_type_map, zip_, AutofillType(address_zip));
     67   DCHECK(ok);
     68   ok = ok && Add(field_type_map, country_, AutofillType(address_country));
     69   DCHECK(ok);
     70 
     71   return ok;
     72 }
     73 
     74 FormFieldType AddressField::GetFormFieldType() const {
     75   return kAddressType;
     76 }
     77 
     78 AddressField* AddressField::Parse(
     79     std::vector<AutofillField*>::const_iterator* iter,
     80     bool is_ecml) {
     81   DCHECK(iter);
     82   if (!iter)
     83     return NULL;
     84 
     85   scoped_ptr<AddressField> address_field(new AddressField);
     86   std::vector<AutofillField*>::const_iterator q = *iter;
     87   string16 pattern;
     88 
     89   // The ECML standard uses 2 letter country codes.  So we will
     90   // have to remember that this is an ECML form, for when we fill
     91   // it out.
     92   address_field->is_ecml_ = is_ecml;
     93 
     94   string16 attention_ignored =
     95       l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE);
     96   string16 region_ignored =
     97       l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE);
     98 
     99   // Allow address fields to appear in any order.
    100   while (true) {
    101     if (ParseCompany(&q, is_ecml, address_field.get()) ||
    102         ParseAddressLines(&q, is_ecml, address_field.get()) ||
    103         ParseCity(&q, is_ecml, address_field.get()) ||
    104         ParseState(&q, is_ecml, address_field.get()) ||
    105         ParseZipCode(&q, is_ecml, address_field.get()) ||
    106         ParseCountry(&q, is_ecml, address_field.get())) {
    107       continue;
    108     } else if (ParseText(&q, attention_ignored) ||
    109                ParseText(&q, region_ignored)) {
    110       // We ignore the following:
    111       // * Attention.
    112       // * Province/Region/Other.
    113       continue;
    114     } else if (*q != **iter && ParseEmpty(&q)) {
    115       // Ignore non-labeled fields within an address; the page
    116       // MapQuest Driving Directions North America.html contains such a field.
    117       // We only ignore such fields after we've parsed at least one other field;
    118       // otherwise we'd effectively parse address fields before other field
    119       // types after any non-labeled fields, and we want email address fields to
    120       // have precedence since some pages contain fields labeled
    121       // "Email address".
    122       continue;
    123     } else {
    124       // No field found.
    125       break;
    126     }
    127   }
    128 
    129   // If we have identified any address fields in this field then it should be
    130   // added to the list of fields.
    131   if (address_field->company_ != NULL ||
    132       address_field->address1_ != NULL || address_field->address2_ != NULL ||
    133       address_field->city_ != NULL || address_field->state_ != NULL ||
    134       address_field->zip_ != NULL || address_field->zip4_ ||
    135       address_field->country_ != NULL) {
    136     *iter = q;
    137     return address_field.release();
    138   }
    139 
    140   return NULL;
    141 }
    142 
    143 AddressType AddressField::FindType() const {
    144   // This is not a full address, so don't even bother trying to figure
    145   // out its type.
    146   if (address1_ == NULL)
    147     return kGenericAddress;
    148 
    149   // First look at the field name, which itself will sometimes contain
    150   // "bill" or "ship".  We could check for the ECML type prefixes
    151   // here, but there's no need to since ECML's prefixes Ecom_BillTo
    152   // and Ecom_ShipTo contain "bill" and "ship" anyway.
    153   string16 name = StringToLowerASCII(address1_->name);
    154   return AddressTypeFromText(name);
    155 }
    156 
    157 bool AddressField::IsFullAddress() {
    158   return address1_ != NULL;
    159 }
    160 
    161 AddressField::AddressField()
    162     : company_(NULL),
    163       address1_(NULL),
    164       address2_(NULL),
    165       city_(NULL),
    166       state_(NULL),
    167       zip_(NULL),
    168       zip4_(NULL),
    169       country_(NULL),
    170       type_(kGenericAddress),
    171       is_ecml_(false) {
    172 }
    173 
    174 // static
    175 bool AddressField::ParseCompany(
    176     std::vector<AutofillField*>::const_iterator* iter,
    177     bool is_ecml, AddressField* address_field) {
    178   if (address_field->company_ && !address_field->company_->IsEmpty())
    179     return false;
    180 
    181   string16 pattern;
    182   if (is_ecml)
    183     pattern = GetEcmlPattern(kEcmlShipToCompanyName,
    184                              kEcmlBillToCompanyName, '|');
    185   else
    186     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE);
    187 
    188   if (!ParseText(iter, pattern, &address_field->company_))
    189     return false;
    190 
    191   return true;
    192 }
    193 
    194 // static
    195 bool AddressField::ParseAddressLines(
    196     std::vector<AutofillField*>::const_iterator* iter,
    197     bool is_ecml, AddressField* address_field) {
    198   // We only match the string "address" in page text, not in element names,
    199   // because sometimes every element in a group of address fields will have
    200   // a name containing the string "address"; for example, on the page
    201   // Kohl's - Register Billing Address.html the text element labeled "city"
    202   // has the name "BILL_TO_ADDRESS<>city".  We do match address labels
    203   // such as "address1", which appear as element names on various pages (eg
    204   // AmericanGirl-Registration.html, BloomingdalesBilling.html,
    205   // EBay Registration Enter Information.html).
    206   if (address_field->address1_)
    207     return false;
    208 
    209   string16 pattern;
    210   if (is_ecml) {
    211     pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|');
    212     if (!ParseText(iter, pattern, &address_field->address1_))
    213       return false;
    214   } else {
    215     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE);
    216     string16 label_pattern =
    217         l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
    218 
    219     if (!ParseText(iter, pattern, &address_field->address1_))
    220       if (!ParseLabelText(iter, label_pattern, &address_field->address1_))
    221         return false;
    222   }
    223 
    224   // Optionally parse more address lines, which may have empty labels.
    225   // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
    226   // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
    227   if (is_ecml) {
    228     pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|');
    229     if (!ParseEmptyText(iter, &address_field->address2_))
    230       ParseText(iter, pattern, &address_field->address2_);
    231   } else {
    232     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE);
    233     string16 label_pattern =
    234         l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
    235     if (!ParseEmptyText(iter, &address_field->address2_))
    236       if (!ParseText(iter, pattern, &address_field->address2_))
    237         ParseLabelText(iter, label_pattern, &address_field->address2_);
    238   }
    239 
    240   // Try for a third line, which we will promptly discard.
    241   if (address_field->address2_ != NULL) {
    242     if (is_ecml) {
    243       pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|');
    244       ParseText(iter, pattern);
    245     } else {
    246       pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE);
    247       if (!ParseEmptyText(iter, NULL))
    248         ParseText(iter, pattern, NULL);
    249     }
    250   }
    251 
    252   return true;
    253 }
    254 
    255 // static
    256 bool AddressField::ParseCountry(
    257     std::vector<AutofillField*>::const_iterator* iter,
    258     bool is_ecml, AddressField* address_field) {
    259   // Parse a country.  The occasional page (e.g.
    260   // Travelocity_New Member Information1.html) calls this a "location".
    261   // Note: ECML standard uses 2 letter country code (ISO 3166)
    262   if (address_field->country_ && !address_field->country_->IsEmpty())
    263     return false;
    264 
    265   string16 pattern;
    266   if (is_ecml)
    267     pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|');
    268   else
    269     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE);
    270 
    271   if (!ParseText(iter, pattern, &address_field->country_))
    272     return false;
    273 
    274   return true;
    275 }
    276 
    277 // static
    278 bool AddressField::ParseZipCode(
    279     std::vector<AutofillField*>::const_iterator* iter,
    280     bool is_ecml, AddressField* address_field) {
    281   // Parse a zip code.  On some UK pages (e.g. The China Shop2.html) this
    282   // is called a "post code".
    283   //
    284   // HACK: Just for the MapQuest driving directions page we match the
    285   // exact name "1z", which MapQuest uses to label its zip code field.
    286   // Hopefully before long we'll be smart enough to find the zip code
    287   // on that page automatically.
    288   if (address_field->zip_)
    289     return false;
    290 
    291   // We may be out of fields.
    292   if (!**iter)
    293     return false;
    294 
    295   string16 pattern;
    296   if (is_ecml) {
    297     pattern = GetEcmlPattern(kEcmlShipToPostalCode, kEcmlBillToPostalCode, '|');
    298   } else {
    299     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE);
    300   }
    301 
    302   AddressType tempType;
    303   string16 name = (**iter)->name;
    304 
    305   // Note: comparisons using the ecml compliant name as a prefix must be used in
    306   // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for
    307   // more detail.
    308   string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode));
    309   if (StartsWith(name, bill_to_postal_code_field, false)) {
    310     tempType = kBillingAddress;
    311   } else if (StartsWith(name, bill_to_postal_code_field, false)) {
    312     tempType = kShippingAddress;
    313   } else {
    314     tempType = kGenericAddress;
    315   }
    316 
    317   if (!ParseText(iter, pattern, &address_field->zip_))
    318     return false;
    319 
    320   address_field->type_ = tempType;
    321   if (!is_ecml) {
    322     // Look for a zip+4, whose field name will also often contain
    323     // the substring "zip".
    324     ParseText(iter,
    325               l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE),
    326               &address_field->zip4_);
    327   }
    328 
    329   return true;
    330 }
    331 
    332 // static
    333 bool AddressField::ParseCity(
    334     std::vector<AutofillField*>::const_iterator* iter,
    335     bool is_ecml, AddressField* address_field) {
    336   // Parse a city name.  Some UK pages (e.g. The China Shop2.html) use
    337   // the term "town".
    338   if (address_field->city_)
    339     return false;
    340 
    341   string16 pattern;
    342   if (is_ecml)
    343     pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|');
    344   else
    345     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE);
    346 
    347   if (!ParseText(iter, pattern, &address_field->city_))
    348     return false;
    349 
    350   return true;
    351 }
    352 
    353 // static
    354 bool AddressField::ParseState(
    355     std::vector<AutofillField*>::const_iterator* iter,
    356     bool is_ecml, AddressField* address_field) {
    357   if (address_field->state_)
    358     return false;
    359 
    360   string16 pattern;
    361   if (is_ecml)
    362     pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|');
    363   else
    364     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE);
    365 
    366   if (!ParseText(iter, pattern, &address_field->state_))
    367     return false;
    368 
    369   return true;
    370 }
    371 
    372 AddressType AddressField::AddressTypeFromText(const string16 &text) {
    373   if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE))
    374           != string16::npos ||
    375       text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE))
    376           != string16::npos)
    377     // This text could be a checkbox label such as "same as my billing
    378     // address" or "use my shipping address".
    379     // ++ It would help if we generally skipped all text that appears
    380     // after a check box.
    381     return kGenericAddress;
    382 
    383   // Not all pages say "billing address" and "shipping address" explicitly;
    384   // for example, Craft Catalog1.html has "Bill-to Address" and
    385   // "Ship-to Address".
    386   size_t bill = text.rfind(
    387       l10n_util::GetStringUTF16(IDS_AUTOFILL_BILLING_DESIGNATOR_RE));
    388   size_t ship = text.rfind(
    389       l10n_util::GetStringUTF16(IDS_AUTOFILL_SHIPPING_DESIGNATOR_RE));
    390 
    391   if (bill == string16::npos && ship == string16::npos)
    392     return kGenericAddress;
    393 
    394   if (bill != string16::npos && ship == string16::npos)
    395     return kBillingAddress;
    396 
    397   if (bill == string16::npos && ship != string16::npos)
    398     return kShippingAddress;
    399 
    400   if (bill > ship)
    401     return kBillingAddress;
    402 
    403   return kShippingAddress;
    404 }
    405