1 // Copyright (C) 2013 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <libaddressinput/address_validator.h> 16 17 #include <libaddressinput/address_data.h> 18 #include <libaddressinput/downloader.h> 19 #include <libaddressinput/load_rules_delegate.h> 20 #include <libaddressinput/storage.h> 21 #include <libaddressinput/util/basictypes.h> 22 #include <libaddressinput/util/scoped_ptr.h> 23 24 #include <algorithm> 25 #include <bitset> 26 #include <cassert> 27 #include <cstddef> 28 #include <map> 29 #include <set> 30 #include <string> 31 #include <utility> 32 #include <vector> 33 34 #include <re2/re2.h> 35 36 #include "country_rules_aggregator.h" 37 #include "grit.h" 38 #include "grit/libaddressinput_strings.h" 39 #include "region_data_constants.h" 40 #include "retriever.h" 41 #include "rule.h" 42 #include "ruleset.h" 43 #include "util/stl_util.h" 44 #include "util/string_util.h" 45 46 namespace i18n { 47 namespace addressinput { 48 49 namespace { 50 51 // A type to store a list of pointers to Ruleset objects. 52 typedef std::set<const Ruleset*> Rulesets; 53 54 // A type to map the field in a rule to rulesets. 55 typedef std::map<Rule::IdentityField, Rulesets> IdentityFieldRulesets; 56 57 // A type to map the field in an address to rulesets. 58 typedef std::map<AddressField, IdentityFieldRulesets> AddressFieldRulesets; 59 60 // A set of Rule::IdentityField values that match user input. 61 typedef std::bitset<Rule::IDENTITY_FIELDS_SIZE> MatchingRuleFields; 62 63 // Returns true if |prefix_regex| matches a prefix of |value|. For example, 64 // "(90|81)" matches a prefix of "90291". 65 bool ValueMatchesPrefixRegex(const std::string& value, 66 const std::string& prefix_regex) { 67 return RE2::FullMatch(value, "^(" + prefix_regex + ").*"); 68 } 69 70 // Returns true if the filter is empty (all problems allowed) or contains the 71 // |field|->|problem| mapping (explicitly allowed). 72 bool FilterAllows(const AddressProblemFilter& filter, 73 AddressField field, 74 AddressProblem::Type problem) { 75 if (filter.empty()) { 76 return true; 77 } 78 79 for (AddressProblemFilter::const_iterator it = filter.begin(); 80 it != filter.end(); ++it) { 81 if (it->first == field && it->second == problem) { 82 return true; 83 } 84 } 85 86 return false; 87 } 88 89 // Returns |true| if the |street_address| is empty or contains only empty 90 // strings. 91 bool IsEmptyStreetAddress(const std::vector<std::string>& street_address) { 92 for (std::vector<std::string>::const_iterator it = street_address.begin(); 93 it != street_address.end(); ++it) { 94 if (!it->empty()) { 95 return false; 96 } 97 } 98 return true; 99 } 100 101 // Returns the ID of the string that should be displayed when the given field 102 // is invalid in the context of |country_rule|. 103 int GetInvalidFieldMessageId(const Rule& country_rule, AddressField field) { 104 switch (field) { 105 case LOCALITY: 106 return IDS_LIBADDRESSINPUT_I18N_INVALID_LOCALITY_LABEL; 107 case DEPENDENT_LOCALITY: 108 return IDS_LIBADDRESSINPUT_I18N_INVALID_DEPENDENT_LOCALITY_LABEL; 109 110 case ADMIN_AREA: { 111 const std::string& admin_area_name_type = 112 country_rule.GetAdminAreaNameType(); 113 if (admin_area_name_type == "area") { 114 return IDS_LIBADDRESSINPUT_I18N_INVALID_AREA; 115 } 116 if (admin_area_name_type == "county") { 117 return IDS_LIBADDRESSINPUT_I18N_INVALID_COUNTY_LABEL; 118 } 119 if (admin_area_name_type == "department") { 120 return IDS_LIBADDRESSINPUT_I18N_INVALID_DEPARTMENT; 121 } 122 if (admin_area_name_type == "district") { 123 return IDS_LIBADDRESSINPUT_I18N_INVALID_DEPENDENT_LOCALITY_LABEL; 124 } 125 if (admin_area_name_type == "do_si") { 126 return IDS_LIBADDRESSINPUT_I18N_INVALID_DO_SI; 127 } 128 if (admin_area_name_type == "emirate") { 129 return IDS_LIBADDRESSINPUT_I18N_INVALID_EMIRATE; 130 } 131 if (admin_area_name_type == "island") { 132 return IDS_LIBADDRESSINPUT_I18N_INVALID_ISLAND; 133 } 134 if (admin_area_name_type == "parish") { 135 return IDS_LIBADDRESSINPUT_I18N_INVALID_PARISH; 136 } 137 if (admin_area_name_type == "prefecture") { 138 return IDS_LIBADDRESSINPUT_I18N_INVALID_PREFECTURE; 139 } 140 if (admin_area_name_type == "province") { 141 return IDS_LIBADDRESSINPUT_I18N_INVALID_PROVINCE; 142 } 143 if (admin_area_name_type == "state") { 144 return IDS_LIBADDRESSINPUT_I18N_INVALID_STATE_LABEL; 145 } 146 return INVALID_MESSAGE_ID; 147 } 148 149 case POSTAL_CODE: { 150 const std::string& postal_code_name_type = 151 country_rule.GetPostalCodeNameType(); 152 if (postal_code_name_type == "postal") { 153 return IDS_LIBADDRESSINPUT_I18N_INVALID_POSTAL_CODE_LABEL; 154 } 155 if (postal_code_name_type == "zip") { 156 return IDS_LIBADDRESSINPUT_I18N_INVALID_ZIP_CODE_LABEL; 157 } 158 return INVALID_MESSAGE_ID; 159 } 160 161 default: 162 return IDS_LIBADDRESSINPUT_I18N_INVALID_ENTRY; 163 } 164 } 165 166 // Collects rulesets based on whether they have a parent in the given list. 167 class ParentedRulesetCollector { 168 public: 169 // Retains a reference to both of the parameters. Does not make a copy of 170 // |parent_rulesets|. Does not take ownership of |rulesets_with_parents|. The 171 // |rulesets_with_parents| parameter should not be NULL. 172 ParentedRulesetCollector(const Rulesets& parent_rulesets, 173 Rulesets* rulesets_with_parents) 174 : parent_rulesets_(parent_rulesets), 175 rulesets_with_parents_(rulesets_with_parents) { 176 assert(rulesets_with_parents_ != NULL); 177 } 178 179 ~ParentedRulesetCollector() {} 180 181 // Adds |ruleset_to_test| to the |rulesets_with_parents_| collection, if the 182 // given ruleset has a parent in |parent_rulesets_|. The |ruleset_to_test| 183 // parameter should not be NULL. 184 void operator()(const Ruleset* ruleset_to_test) { 185 assert(ruleset_to_test != NULL); 186 if (parent_rulesets_.find(ruleset_to_test->parent()) != 187 parent_rulesets_.end()) { 188 rulesets_with_parents_->insert(ruleset_to_test); 189 } 190 } 191 192 private: 193 const Rulesets& parent_rulesets_; 194 Rulesets* rulesets_with_parents_; 195 }; 196 197 // Validates AddressData structure. 198 class AddressValidatorImpl : public AddressValidator { 199 public: 200 // Takes ownership of |downloader| and |storage|. Does not take ownership of 201 // |load_rules_delegate|. 202 AddressValidatorImpl(const std::string& validation_data_url, 203 scoped_ptr<Downloader> downloader, 204 scoped_ptr<Storage> storage, 205 LoadRulesDelegate* load_rules_delegate) 206 : aggregator_(scoped_ptr<Retriever>(new Retriever( 207 validation_data_url, 208 downloader.Pass(), 209 storage.Pass()))), 210 load_rules_delegate_(load_rules_delegate), 211 loading_rules_(), 212 rules_() {} 213 214 virtual ~AddressValidatorImpl() { 215 STLDeleteValues(&rules_); 216 } 217 218 // AddressValidator implementation. 219 virtual void LoadRules(const std::string& country_code) { 220 if (rules_.find(country_code) == rules_.end() && 221 loading_rules_.find(country_code) == loading_rules_.end()) { 222 loading_rules_.insert(country_code); 223 aggregator_.AggregateRules( 224 country_code, 225 BuildScopedPtrCallback(this, &AddressValidatorImpl::OnRulesLoaded)); 226 } 227 } 228 229 // AddressValidator implementation. 230 virtual Status ValidateAddress( 231 const AddressData& address, 232 const AddressProblemFilter& filter, 233 AddressProblems* problems) const { 234 std::map<std::string, Ruleset*>::const_iterator ruleset_it = 235 rules_.find(address.country_code); 236 237 // We can still validate the required fields even if the full ruleset isn't 238 // ready. 239 if (ruleset_it == rules_.end()) { 240 if (problems != NULL) { 241 Rule rule; 242 rule.CopyFrom(Rule::GetDefault()); 243 if (rule.ParseSerializedRule( 244 RegionDataConstants::GetRegionData(address.country_code))) { 245 EnforceRequiredFields(rule, address, filter, problems); 246 } 247 } 248 249 return loading_rules_.find(address.country_code) != loading_rules_.end() 250 ? RULES_NOT_READY 251 : RULES_UNAVAILABLE; 252 } 253 254 if (problems == NULL) { 255 return SUCCESS; 256 } 257 258 const Ruleset* ruleset = ruleset_it->second; 259 assert(ruleset != NULL); 260 const Rule& country_rule = 261 ruleset->GetLanguageCodeRule(address.language_code); 262 EnforceRequiredFields(country_rule, address, filter, problems); 263 264 // Validate general postal code format. A country-level rule specifies the 265 // regular expression for the whole postal code. 266 if (!address.postal_code.empty() && 267 !country_rule.GetPostalCodeFormat().empty() && 268 FilterAllows(filter, 269 POSTAL_CODE, 270 AddressProblem::UNRECOGNIZED_FORMAT) && 271 !RE2::FullMatch( 272 address.postal_code, country_rule.GetPostalCodeFormat())) { 273 problems->push_back(AddressProblem( 274 POSTAL_CODE, 275 AddressProblem::UNRECOGNIZED_FORMAT, 276 GetInvalidFieldMessageId(country_rule, POSTAL_CODE))); 277 } 278 279 while (ruleset != NULL) { 280 const Rule& rule = ruleset->GetLanguageCodeRule(address.language_code); 281 282 // Validate the field values, e.g. state names in US. 283 AddressField sub_field_type = 284 static_cast<AddressField>(ruleset->field() + 1); 285 std::string sub_key; 286 const std::string& user_input = address.GetFieldValue(sub_field_type); 287 if (!user_input.empty() && 288 FilterAllows(filter, sub_field_type, AddressProblem::UNKNOWN_VALUE) && 289 !rule.CanonicalizeSubKey(user_input, false, &sub_key)) { 290 problems->push_back(AddressProblem( 291 sub_field_type, 292 AddressProblem::UNKNOWN_VALUE, 293 GetInvalidFieldMessageId(country_rule, sub_field_type))); 294 } 295 296 // Validate sub-region specific postal code format. A sub-region specifies 297 // the regular expression for a prefix of the postal code. 298 if (ruleset->field() > COUNTRY && 299 !address.postal_code.empty() && 300 !rule.GetPostalCodeFormat().empty() && 301 FilterAllows(filter, 302 POSTAL_CODE, 303 AddressProblem::MISMATCHING_VALUE) && 304 !ValueMatchesPrefixRegex( 305 address.postal_code, rule.GetPostalCodeFormat())) { 306 problems->push_back(AddressProblem( 307 POSTAL_CODE, 308 AddressProblem::MISMATCHING_VALUE, 309 GetInvalidFieldMessageId(country_rule, POSTAL_CODE))); 310 } 311 312 ruleset = ruleset->GetSubRegionRuleset(sub_key); 313 } 314 315 return SUCCESS; 316 } 317 318 // AddressValidator implementation. 319 virtual Status GetSuggestions(const AddressData& user_input, 320 AddressField focused_field, 321 size_t suggestions_limit, 322 std::vector<AddressData>* suggestions) const { 323 std::map<std::string, Ruleset*>::const_iterator ruleset_it = 324 rules_.find(user_input.country_code); 325 326 if (ruleset_it == rules_.end()) { 327 return 328 loading_rules_.find(user_input.country_code) != loading_rules_.end() 329 ? RULES_NOT_READY 330 : RULES_UNAVAILABLE; 331 } 332 333 if (suggestions == NULL) { 334 return SUCCESS; 335 } 336 suggestions->clear(); 337 338 assert(ruleset_it->second != NULL); 339 340 // Do not suggest anything if the user is typing in the field for which 341 // there's no validation data. 342 if (focused_field != POSTAL_CODE && 343 (focused_field < ADMIN_AREA || focused_field > DEPENDENT_LOCALITY)) { 344 return SUCCESS; 345 } 346 347 // Do not suggest anything if the user input is empty. 348 if (user_input.GetFieldValue(focused_field).empty()) { 349 return SUCCESS; 350 } 351 352 const Ruleset& country_ruleset = *ruleset_it->second; 353 const Rule& country_rule = 354 country_ruleset.GetLanguageCodeRule(user_input.language_code); 355 356 // Do not suggest anything if the user is typing the postal code that is not 357 // valid for the country. 358 if (!user_input.postal_code.empty() && 359 focused_field == POSTAL_CODE && 360 !country_rule.GetPostalCodeFormat().empty() && 361 !ValueMatchesPrefixRegex( 362 user_input.postal_code, country_rule.GetPostalCodeFormat())) { 363 return SUCCESS; 364 } 365 366 // Initialize the prefix search index lazily. 367 if (!ruleset_it->second->prefix_search_index_ready()) { 368 ruleset_it->second->BuildPrefixSearchIndex(); 369 } 370 371 if (focused_field != POSTAL_CODE && 372 focused_field > country_ruleset.deepest_ruleset_level()) { 373 return SUCCESS; 374 } 375 376 // Determine the most specific address field that can be suggested. 377 AddressField suggestion_field = focused_field != POSTAL_CODE 378 ? focused_field : DEPENDENT_LOCALITY; 379 if (suggestion_field > country_ruleset.deepest_ruleset_level()) { 380 suggestion_field = country_ruleset.deepest_ruleset_level(); 381 } 382 if (focused_field != POSTAL_CODE) { 383 while (user_input.GetFieldValue(suggestion_field).empty() && 384 suggestion_field > ADMIN_AREA) { 385 suggestion_field = static_cast<AddressField>(suggestion_field - 1); 386 } 387 } 388 389 // Find all rulesets that match user input. 390 AddressFieldRulesets rulesets; 391 for (int i = ADMIN_AREA; i <= suggestion_field; ++i) { 392 for (int j = Rule::KEY; j <= Rule::LATIN_NAME; ++j) { 393 AddressField address_field = static_cast<AddressField>(i); 394 Rule::IdentityField rule_field = static_cast<Rule::IdentityField>(j); 395 396 // Find all rulesets at |address_field| level whose |rule_field| starts 397 // with user input value. 398 country_ruleset.FindRulesetsByPrefix( 399 user_input.language_code, address_field, rule_field, 400 user_input.GetFieldValue(address_field), 401 &rulesets[address_field][rule_field]); 402 403 // Filter out the rulesets whose parents do not match the user input. 404 if (address_field > ADMIN_AREA) { 405 AddressField parent_field = 406 static_cast<AddressField>(address_field - 1); 407 Rulesets rulesets_with_parents; 408 std::for_each( 409 rulesets[address_field][rule_field].begin(), 410 rulesets[address_field][rule_field].end(), 411 ParentedRulesetCollector(rulesets[parent_field][rule_field], 412 &rulesets_with_parents)); 413 rulesets[address_field][rule_field].swap(rulesets_with_parents); 414 } 415 } 416 } 417 418 // Determine the fields in the rules that match the user input. This 419 // operation converts a map of Rule::IdentityField value -> Ruleset into a 420 // map of Ruleset -> Rule::IdentityField bitset. 421 std::map<const Ruleset*, MatchingRuleFields> suggestion_rulesets; 422 for (IdentityFieldRulesets::const_iterator rule_field_it = 423 rulesets[suggestion_field].begin(); 424 rule_field_it != rulesets[suggestion_field].end(); 425 ++rule_field_it) { 426 const Rule::IdentityField rule_identity_field = rule_field_it->first; 427 for (Rulesets::const_iterator ruleset_it = rule_field_it->second.begin(); 428 ruleset_it != rule_field_it->second.end(); 429 ++ruleset_it) { 430 suggestion_rulesets[*ruleset_it].set(rule_identity_field); 431 } 432 } 433 434 // Generate suggestions based on the rulesets. Use a Rule::IdentityField 435 // from the bitset to generate address field values. 436 for (std::map<const Ruleset*, MatchingRuleFields>::const_iterator 437 suggestion_it = suggestion_rulesets.begin(); 438 suggestion_it != suggestion_rulesets.end(); 439 ++suggestion_it) { 440 const Ruleset& ruleset = *suggestion_it->first; 441 const Rule& rule = ruleset.GetLanguageCodeRule(user_input.language_code); 442 const MatchingRuleFields& matching_rule_fields = suggestion_it->second; 443 444 // Do not suggest this region if the postal code in user input does not 445 // match it. 446 if (!user_input.postal_code.empty() && 447 !rule.GetPostalCodeFormat().empty() && 448 !ValueMatchesPrefixRegex( 449 user_input.postal_code, rule.GetPostalCodeFormat())) { 450 continue; 451 } 452 453 // Do not add more suggestions than |suggestions_limit|. 454 if (suggestions->size() >= suggestions_limit) { 455 suggestions->clear(); 456 return SUCCESS; 457 } 458 459 // If the user's language is not one of the supported languages of a 460 // country that has latinized names for its regions, then prefer to 461 // suggest the latinized region names. If the user types in local script 462 // instead, then the local script names will be suggested. 463 Rule::IdentityField rule_field = Rule::KEY; 464 if (!country_rule.GetLanguage().empty() && 465 country_rule.GetLanguage() != user_input.language_code && 466 !rule.GetLatinName().empty() && 467 matching_rule_fields.test(Rule::LATIN_NAME)) { 468 rule_field = Rule::LATIN_NAME; 469 } else if (matching_rule_fields.test(Rule::KEY)) { 470 rule_field = Rule::KEY; 471 } else if (matching_rule_fields.test(Rule::NAME)) { 472 rule_field = Rule::NAME; 473 } else if (matching_rule_fields.test(Rule::LATIN_NAME)) { 474 rule_field = Rule::LATIN_NAME; 475 } else { 476 assert(false); 477 } 478 479 AddressData suggestion; 480 suggestion.country_code = user_input.country_code; 481 suggestion.postal_code = user_input.postal_code; 482 483 // Traverse the tree of rulesets from the most specific |ruleset| to the 484 // country-wide "root" of the tree. Use the region names found at each of 485 // the levels of the ruleset tree to build the |suggestion|. 486 for (const Ruleset* suggestion_ruleset = &ruleset; 487 suggestion_ruleset->parent() != NULL; 488 suggestion_ruleset = suggestion_ruleset->parent()) { 489 const Rule& suggestion_rule = 490 suggestion_ruleset->GetLanguageCodeRule(user_input.language_code); 491 suggestion.SetFieldValue(suggestion_ruleset->field(), 492 suggestion_rule.GetIdentityField(rule_field)); 493 } 494 495 suggestions->push_back(suggestion); 496 } 497 498 return SUCCESS; 499 } 500 501 // AddressValidator implementation. 502 virtual bool CanonicalizeAdministrativeArea(AddressData* address_data) const { 503 std::map<std::string, Ruleset*>::const_iterator ruleset_it = 504 rules_.find(address_data->country_code); 505 if (ruleset_it == rules_.end()) { 506 return false; 507 } 508 const Rule& rule = 509 ruleset_it->second->GetLanguageCodeRule(address_data->language_code); 510 511 return rule.CanonicalizeSubKey(address_data->administrative_area, 512 true, // Keep input latin. 513 &address_data->administrative_area); 514 } 515 516 private: 517 // Called when CountryRulesAggregator::AggregateRules loads the |ruleset| for 518 // the |country_code|. 519 void OnRulesLoaded(bool success, 520 const std::string& country_code, 521 scoped_ptr<Ruleset> ruleset) { 522 assert(rules_.find(country_code) == rules_.end()); 523 loading_rules_.erase(country_code); 524 if (success) { 525 assert(ruleset != NULL); 526 assert(ruleset->field() == COUNTRY); 527 rules_[country_code] = ruleset.release(); 528 } 529 if (load_rules_delegate_ != NULL) { 530 load_rules_delegate_->OnAddressValidationRulesLoaded( 531 country_code, success); 532 } 533 } 534 535 // Adds problems for just the required fields portion of |country_rule|. 536 void EnforceRequiredFields(const Rule& country_rule, 537 const AddressData& address, 538 const AddressProblemFilter& filter, 539 AddressProblems* problems) const { 540 assert(problems != NULL); 541 for (std::vector<AddressField>::const_iterator 542 field_it = country_rule.GetRequired().begin(); 543 field_it != country_rule.GetRequired().end(); 544 ++field_it) { 545 bool field_empty = *field_it != STREET_ADDRESS 546 ? address.GetFieldValue(*field_it).empty() 547 : IsEmptyStreetAddress(address.address_lines); 548 if (field_empty && 549 FilterAllows( 550 filter, *field_it, AddressProblem::MISSING_REQUIRED_FIELD)) { 551 problems->push_back(AddressProblem( 552 *field_it, 553 AddressProblem::MISSING_REQUIRED_FIELD, 554 IDS_LIBADDRESSINPUT_I18N_MISSING_REQUIRED_FIELD)); 555 } 556 } 557 } 558 559 // Loads the ruleset for a country code. 560 CountryRulesAggregator aggregator_; 561 562 // An optional delegate to be invoked when a ruleset finishes loading. 563 LoadRulesDelegate* load_rules_delegate_; 564 565 // A set of country codes for which a ruleset is being loaded. 566 std::set<std::string> loading_rules_; 567 568 // A mapping of a country code to the owned ruleset for that country code. 569 std::map<std::string, Ruleset*> rules_; 570 571 DISALLOW_COPY_AND_ASSIGN(AddressValidatorImpl); 572 }; 573 574 } // namespace 575 576 AddressValidator::~AddressValidator() {} 577 578 // static 579 scoped_ptr<AddressValidator> AddressValidator::Build( 580 scoped_ptr<Downloader> downloader, 581 scoped_ptr<Storage> storage, 582 LoadRulesDelegate* load_rules_delegate) { 583 return scoped_ptr<AddressValidator>(new AddressValidatorImpl( 584 VALIDATION_DATA_URL, downloader.Pass(), storage.Pass(), 585 load_rules_delegate)); 586 } 587 588 scoped_ptr<AddressValidator> BuildAddressValidatorForTesting( 589 const std::string& validation_data_url, 590 scoped_ptr<Downloader> downloader, 591 scoped_ptr<Storage> storage, 592 LoadRulesDelegate* load_rules_delegate) { 593 return scoped_ptr<AddressValidator>(new AddressValidatorImpl( 594 validation_data_url, downloader.Pass(), storage.Pass(), 595 load_rules_delegate)); 596 } 597 598 } // namespace addressinput 599 } // namespace i18n 600