1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/autofill/address_field.h" 6 7 #include <stddef.h> 8 9 #include "base/logging.h" 10 #include "base/memory/scoped_ptr.h" 11 #include "base/string16.h" 12 #include "base/string_util.h" 13 #include "base/utf_string_conversions.h" 14 #include "chrome/browser/autofill/autofill_field.h" 15 #include "grit/autofill_resources.h" 16 #include "ui/base/l10n/l10n_util.h" 17 18 bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const { 19 AutofillFieldType address_company; 20 AutofillFieldType address_line1; 21 AutofillFieldType address_line2; 22 AutofillFieldType address_city; 23 AutofillFieldType address_state; 24 AutofillFieldType address_zip; 25 AutofillFieldType address_country; 26 27 switch (type_) { 28 case kShippingAddress: 29 // Fall through. Autofill does not support shipping addresses. 30 case kGenericAddress: 31 address_company = COMPANY_NAME; 32 address_line1 = ADDRESS_HOME_LINE1; 33 address_line2 = ADDRESS_HOME_LINE2; 34 address_city = ADDRESS_HOME_CITY; 35 address_state = ADDRESS_HOME_STATE; 36 address_zip = ADDRESS_HOME_ZIP; 37 address_country = ADDRESS_HOME_COUNTRY; 38 break; 39 40 case kBillingAddress: 41 address_company = COMPANY_NAME; 42 address_line1 = ADDRESS_BILLING_LINE1; 43 address_line2 = ADDRESS_BILLING_LINE2; 44 address_city = ADDRESS_BILLING_CITY; 45 address_state = ADDRESS_BILLING_STATE; 46 address_zip = ADDRESS_BILLING_ZIP; 47 address_country = ADDRESS_BILLING_COUNTRY; 48 break; 49 50 default: 51 NOTREACHED(); 52 return false; 53 } 54 55 bool ok; 56 ok = Add(field_type_map, company_, AutofillType(address_company)); 57 DCHECK(ok); 58 ok = ok && Add(field_type_map, address1_, AutofillType(address_line1)); 59 DCHECK(ok); 60 ok = ok && Add(field_type_map, address2_, AutofillType(address_line2)); 61 DCHECK(ok); 62 ok = ok && Add(field_type_map, city_, AutofillType(address_city)); 63 DCHECK(ok); 64 ok = ok && Add(field_type_map, state_, AutofillType(address_state)); 65 DCHECK(ok); 66 ok = ok && Add(field_type_map, zip_, AutofillType(address_zip)); 67 DCHECK(ok); 68 ok = ok && Add(field_type_map, country_, AutofillType(address_country)); 69 DCHECK(ok); 70 71 return ok; 72 } 73 74 FormFieldType AddressField::GetFormFieldType() const { 75 return kAddressType; 76 } 77 78 AddressField* AddressField::Parse( 79 std::vector<AutofillField*>::const_iterator* iter, 80 bool is_ecml) { 81 DCHECK(iter); 82 if (!iter) 83 return NULL; 84 85 scoped_ptr<AddressField> address_field(new AddressField); 86 std::vector<AutofillField*>::const_iterator q = *iter; 87 string16 pattern; 88 89 // The ECML standard uses 2 letter country codes. So we will 90 // have to remember that this is an ECML form, for when we fill 91 // it out. 92 address_field->is_ecml_ = is_ecml; 93 94 string16 attention_ignored = 95 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); 96 string16 region_ignored = 97 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); 98 99 // Allow address fields to appear in any order. 100 while (true) { 101 if (ParseCompany(&q, is_ecml, address_field.get()) || 102 ParseAddressLines(&q, is_ecml, address_field.get()) || 103 ParseCity(&q, is_ecml, address_field.get()) || 104 ParseState(&q, is_ecml, address_field.get()) || 105 ParseZipCode(&q, is_ecml, address_field.get()) || 106 ParseCountry(&q, is_ecml, address_field.get())) { 107 continue; 108 } else if (ParseText(&q, attention_ignored) || 109 ParseText(&q, region_ignored)) { 110 // We ignore the following: 111 // * Attention. 112 // * Province/Region/Other. 113 continue; 114 } else if (*q != **iter && ParseEmpty(&q)) { 115 // Ignore non-labeled fields within an address; the page 116 // MapQuest Driving Directions North America.html contains such a field. 117 // We only ignore such fields after we've parsed at least one other field; 118 // otherwise we'd effectively parse address fields before other field 119 // types after any non-labeled fields, and we want email address fields to 120 // have precedence since some pages contain fields labeled 121 // "Email address". 122 continue; 123 } else { 124 // No field found. 125 break; 126 } 127 } 128 129 // If we have identified any address fields in this field then it should be 130 // added to the list of fields. 131 if (address_field->company_ != NULL || 132 address_field->address1_ != NULL || address_field->address2_ != NULL || 133 address_field->city_ != NULL || address_field->state_ != NULL || 134 address_field->zip_ != NULL || address_field->zip4_ || 135 address_field->country_ != NULL) { 136 *iter = q; 137 return address_field.release(); 138 } 139 140 return NULL; 141 } 142 143 AddressType AddressField::FindType() const { 144 // This is not a full address, so don't even bother trying to figure 145 // out its type. 146 if (address1_ == NULL) 147 return kGenericAddress; 148 149 // First look at the field name, which itself will sometimes contain 150 // "bill" or "ship". We could check for the ECML type prefixes 151 // here, but there's no need to since ECML's prefixes Ecom_BillTo 152 // and Ecom_ShipTo contain "bill" and "ship" anyway. 153 string16 name = StringToLowerASCII(address1_->name); 154 return AddressTypeFromText(name); 155 } 156 157 bool AddressField::IsFullAddress() { 158 return address1_ != NULL; 159 } 160 161 AddressField::AddressField() 162 : company_(NULL), 163 address1_(NULL), 164 address2_(NULL), 165 city_(NULL), 166 state_(NULL), 167 zip_(NULL), 168 zip4_(NULL), 169 country_(NULL), 170 type_(kGenericAddress), 171 is_ecml_(false) { 172 } 173 174 // static 175 bool AddressField::ParseCompany( 176 std::vector<AutofillField*>::const_iterator* iter, 177 bool is_ecml, AddressField* address_field) { 178 if (address_field->company_ && !address_field->company_->IsEmpty()) 179 return false; 180 181 string16 pattern; 182 if (is_ecml) 183 pattern = GetEcmlPattern(kEcmlShipToCompanyName, 184 kEcmlBillToCompanyName, '|'); 185 else 186 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); 187 188 if (!ParseText(iter, pattern, &address_field->company_)) 189 return false; 190 191 return true; 192 } 193 194 // static 195 bool AddressField::ParseAddressLines( 196 std::vector<AutofillField*>::const_iterator* iter, 197 bool is_ecml, AddressField* address_field) { 198 // We only match the string "address" in page text, not in element names, 199 // because sometimes every element in a group of address fields will have 200 // a name containing the string "address"; for example, on the page 201 // Kohl's - Register Billing Address.html the text element labeled "city" 202 // has the name "BILL_TO_ADDRESS<>city". We do match address labels 203 // such as "address1", which appear as element names on various pages (eg 204 // AmericanGirl-Registration.html, BloomingdalesBilling.html, 205 // EBay Registration Enter Information.html). 206 if (address_field->address1_) 207 return false; 208 209 string16 pattern; 210 if (is_ecml) { 211 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); 212 if (!ParseText(iter, pattern, &address_field->address1_)) 213 return false; 214 } else { 215 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); 216 string16 label_pattern = 217 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); 218 219 if (!ParseText(iter, pattern, &address_field->address1_)) 220 if (!ParseLabelText(iter, label_pattern, &address_field->address1_)) 221 return false; 222 } 223 224 // Optionally parse more address lines, which may have empty labels. 225 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) 226 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! 227 if (is_ecml) { 228 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); 229 if (!ParseEmptyText(iter, &address_field->address2_)) 230 ParseText(iter, pattern, &address_field->address2_); 231 } else { 232 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); 233 string16 label_pattern = 234 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); 235 if (!ParseEmptyText(iter, &address_field->address2_)) 236 if (!ParseText(iter, pattern, &address_field->address2_)) 237 ParseLabelText(iter, label_pattern, &address_field->address2_); 238 } 239 240 // Try for a third line, which we will promptly discard. 241 if (address_field->address2_ != NULL) { 242 if (is_ecml) { 243 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); 244 ParseText(iter, pattern); 245 } else { 246 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); 247 if (!ParseEmptyText(iter, NULL)) 248 ParseText(iter, pattern, NULL); 249 } 250 } 251 252 return true; 253 } 254 255 // static 256 bool AddressField::ParseCountry( 257 std::vector<AutofillField*>::const_iterator* iter, 258 bool is_ecml, AddressField* address_field) { 259 // Parse a country. The occasional page (e.g. 260 // Travelocity_New Member Information1.html) calls this a "location". 261 // Note: ECML standard uses 2 letter country code (ISO 3166) 262 if (address_field->country_ && !address_field->country_->IsEmpty()) 263 return false; 264 265 string16 pattern; 266 if (is_ecml) 267 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); 268 else 269 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); 270 271 if (!ParseText(iter, pattern, &address_field->country_)) 272 return false; 273 274 return true; 275 } 276 277 // static 278 bool AddressField::ParseZipCode( 279 std::vector<AutofillField*>::const_iterator* iter, 280 bool is_ecml, AddressField* address_field) { 281 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this 282 // is called a "post code". 283 // 284 // HACK: Just for the MapQuest driving directions page we match the 285 // exact name "1z", which MapQuest uses to label its zip code field. 286 // Hopefully before long we'll be smart enough to find the zip code 287 // on that page automatically. 288 if (address_field->zip_) 289 return false; 290 291 // We may be out of fields. 292 if (!**iter) 293 return false; 294 295 string16 pattern; 296 if (is_ecml) { 297 pattern = GetEcmlPattern(kEcmlShipToPostalCode, kEcmlBillToPostalCode, '|'); 298 } else { 299 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE); 300 } 301 302 AddressType tempType; 303 string16 name = (**iter)->name; 304 305 // Note: comparisons using the ecml compliant name as a prefix must be used in 306 // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for 307 // more detail. 308 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); 309 if (StartsWith(name, bill_to_postal_code_field, false)) { 310 tempType = kBillingAddress; 311 } else if (StartsWith(name, bill_to_postal_code_field, false)) { 312 tempType = kShippingAddress; 313 } else { 314 tempType = kGenericAddress; 315 } 316 317 if (!ParseText(iter, pattern, &address_field->zip_)) 318 return false; 319 320 address_field->type_ = tempType; 321 if (!is_ecml) { 322 // Look for a zip+4, whose field name will also often contain 323 // the substring "zip". 324 ParseText(iter, 325 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), 326 &address_field->zip4_); 327 } 328 329 return true; 330 } 331 332 // static 333 bool AddressField::ParseCity( 334 std::vector<AutofillField*>::const_iterator* iter, 335 bool is_ecml, AddressField* address_field) { 336 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use 337 // the term "town". 338 if (address_field->city_) 339 return false; 340 341 string16 pattern; 342 if (is_ecml) 343 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); 344 else 345 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); 346 347 if (!ParseText(iter, pattern, &address_field->city_)) 348 return false; 349 350 return true; 351 } 352 353 // static 354 bool AddressField::ParseState( 355 std::vector<AutofillField*>::const_iterator* iter, 356 bool is_ecml, AddressField* address_field) { 357 if (address_field->state_) 358 return false; 359 360 string16 pattern; 361 if (is_ecml) 362 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); 363 else 364 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); 365 366 if (!ParseText(iter, pattern, &address_field->state_)) 367 return false; 368 369 return true; 370 } 371 372 AddressType AddressField::AddressTypeFromText(const string16 &text) { 373 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) 374 != string16::npos || 375 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) 376 != string16::npos) 377 // This text could be a checkbox label such as "same as my billing 378 // address" or "use my shipping address". 379 // ++ It would help if we generally skipped all text that appears 380 // after a check box. 381 return kGenericAddress; 382 383 // Not all pages say "billing address" and "shipping address" explicitly; 384 // for example, Craft Catalog1.html has "Bill-to Address" and 385 // "Ship-to Address". 386 size_t bill = text.rfind( 387 l10n_util::GetStringUTF16(IDS_AUTOFILL_BILLING_DESIGNATOR_RE)); 388 size_t ship = text.rfind( 389 l10n_util::GetStringUTF16(IDS_AUTOFILL_SHIPPING_DESIGNATOR_RE)); 390 391 if (bill == string16::npos && ship == string16::npos) 392 return kGenericAddress; 393 394 if (bill != string16::npos && ship == string16::npos) 395 return kBillingAddress; 396 397 if (bill == string16::npos && ship != string16::npos) 398 return kShippingAddress; 399 400 if (bill > ship) 401 return kBillingAddress; 402 403 return kShippingAddress; 404 } 405