1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "third_party/libaddressinput/chromium/input_suggester.h" 6 7 #include <cstddef> 8 #include <set> 9 #include <utility> 10 11 #include "base/logging.h" 12 #include "third_party/libaddressinput/chromium/trie.h" 13 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h" 14 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h" 15 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h" 16 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h" 17 18 namespace autofill { 19 20 using ::i18n::addressinput::AddressData; 21 using ::i18n::addressinput::AddressField; 22 using ::i18n::addressinput::BuildCallback; 23 using ::i18n::addressinput::FieldProblemMap; 24 using ::i18n::addressinput::PreloadSupplier; 25 using ::i18n::addressinput::RegionData; 26 using ::i18n::addressinput::RegionDataBuilder; 27 28 using ::i18n::addressinput::ADMIN_AREA; 29 using ::i18n::addressinput::COUNTRY; 30 using ::i18n::addressinput::DEPENDENT_LOCALITY; 31 using ::i18n::addressinput::LOCALITY; 32 using ::i18n::addressinput::POSTAL_CODE; 33 34 using ::i18n::addressinput::INVALID_FORMAT; 35 using ::i18n::addressinput::MISMATCHING_VALUE; 36 37 namespace { 38 39 // Initial size for the buffer used in the canonicalizer. 40 static const size_t kInitialBufferSize = 32; 41 42 // A region and its metadata useful for constructing a suggestion. 43 struct Suggestion { 44 public: 45 // Builds a suggestion of |region_to_suggest|. Does not take ownership of 46 // |region_to_suggest|, which should not be NULL. 47 Suggestion(const RegionData* region_to_suggest, 48 AddressField matching_address_field, 49 bool region_key_matches) 50 : region_to_suggest(region_to_suggest), 51 matching_address_field(matching_address_field), 52 region_key_matches(region_key_matches) { 53 DCHECK(region_to_suggest); 54 } 55 56 ~Suggestion() {} 57 58 // The region that should be suggested. For example, if the region is ("CA", 59 // "California"), then either "CA" or "California" should be suggested. 60 const RegionData* region_to_suggest; 61 62 // The field in the address for which the suggestion should be made. For 63 // example, ADMIN_AREA in US means the suggestion should be made for the field 64 // labeled "State". 65 AddressField matching_address_field; 66 67 // True if the key of the region matches user input (the name may or may not 68 // match). "CA" should be suggested for a ("CA", "California") region. 69 // 70 // False if only the name of the region matches user input (the key does not 71 // match). "California" should be suggested for a ("CA", "California") region. 72 bool region_key_matches; 73 }; 74 75 // Suggestions for an address. Contains lists of suggestions for administrative 76 // area, locality, and dependent locality fields of an address. 77 class AddressSuggestions { 78 public: 79 AddressSuggestions() {} 80 ~AddressSuggestions() {} 81 82 // Marks all regions at |address_field| level as matching user input. 83 void AllRegionsMatchForField(AddressField address_field) { 84 all_regions_match_input_.insert(address_field); 85 } 86 87 // Marks given regions at |address_field| level as matching user input. The 88 // |regions_match_key| parameter contains the regions that match user input by 89 // their keys. The |regions_match_name| parameter contains the regions that 90 // match user input by their names. 91 // 92 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or 93 // DEPENDENT_LOCALITY. 94 bool AddRegions(AddressField address_field, 95 const std::set<const RegionData*>& regions_match_key, 96 const std::set<const RegionData*>& regions_match_name) { 97 DCHECK(address_field >= ADMIN_AREA); 98 DCHECK(address_field <= DEPENDENT_LOCALITY); 99 100 AddressField parent_address_field = 101 static_cast<AddressField>(address_field - 1); 102 103 bool all_parents_match = 104 parent_address_field == COUNTRY || 105 all_regions_match_input_.find(parent_address_field) != 106 all_regions_match_input_.end(); 107 108 // Cannot build |address_field| level suggestions if there are no matches in 109 // |parent_address_field| level regions. 110 const RegionsMatchInput* parents = NULL; 111 if (address_field > ADMIN_AREA && !all_parents_match) { 112 parents = ®ions_match_input_[parent_address_field]; 113 if (parents->keys.empty() && parents->names.empty()) 114 return false; 115 } 116 117 RegionsMatchInput* regions = NULL; 118 if (address_field < DEPENDENT_LOCALITY) 119 regions = ®ions_match_input_[address_field]; 120 121 std::vector<Suggestion>* suggestions = &suggestions_[address_field]; 122 bool added_suggestions = false; 123 124 // Iterate over both |regions_match_key| and |regions_match_name| and build 125 // Suggestion objects based on the given RegionData objects. Advance either 126 // one iterator at a time (if they point to different data) or both 127 // iterators at once (if they point to the same data). 128 for (std::set<const RegionData*>::const_iterator 129 key_it = regions_match_key.begin(), 130 name_it = regions_match_name.begin(); 131 key_it != regions_match_key.end() || 132 name_it != regions_match_name.end();) { 133 const RegionData* key_region = 134 key_it != regions_match_key.end() ? *key_it : NULL; 135 const RegionData* name_region = 136 name_it != regions_match_name.end() ? *name_it : NULL; 137 138 // Regions that do not have a parent that also matches input will not 139 // become suggestions. 140 bool key_region_has_parent = 141 all_parents_match || 142 (parents && !parents->keys.empty() && key_region && 143 parents->keys.find(&key_region->parent()) != parents->keys.end()); 144 bool name_region_has_parent = 145 all_parents_match || 146 (parents && !parents->names.empty() && name_region && 147 parents->names.find(&name_region->parent()) != parents->names.end()); 148 149 if (name_region && (!key_region || name_region < key_region)) { 150 if (name_region_has_parent) { 151 suggestions->push_back(Suggestion(name_region, address_field, false)); 152 added_suggestions = true; 153 if (regions) 154 regions->names.insert(name_region); 155 } 156 157 ++name_it; 158 } else if (key_region && (!name_region || key_region < name_region)) { 159 if (key_region_has_parent) { 160 suggestions->push_back(Suggestion(key_region, address_field, true)); 161 added_suggestions = true; 162 if (regions) 163 regions->keys.insert(key_region); 164 } 165 166 ++key_it; 167 } else { 168 if (key_region_has_parent) { 169 suggestions->push_back(Suggestion(key_region, address_field, true)); 170 added_suggestions = true; 171 if (regions) { 172 regions->keys.insert(key_region); 173 regions->names.insert(name_region); 174 } 175 } 176 177 ++key_it; 178 ++name_it; 179 } 180 } 181 182 return added_suggestions; 183 } 184 185 // Swaps the suggestions for the smallest sub-region into |suggestions|. 186 // |this| is not usable after this call due to using the swap() operation. 187 // 188 // The |suggestions| parameter should not be NULL. 189 void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) { 190 DCHECK(suggestions); 191 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) { 192 std::vector<Suggestion>* result = 193 &suggestions_[static_cast<AddressField>(i)]; 194 if (!result->empty()) { 195 suggestions->swap(*result); 196 return; 197 } 198 } 199 } 200 201 private: 202 // The sets of non-owned regions used for looking up regions that match user 203 // input by keys and names. 204 struct RegionsMatchInput { 205 std::set<const RegionData*> keys; 206 std::set<const RegionData*> names; 207 }; 208 209 // The regions that match user input at ADMIN_AREA and LOCALITY levels. 210 std::map<AddressField, RegionsMatchInput> regions_match_input_; 211 212 // The set of fields for which all regions match user input. Used to avoid 213 // storing a long list in |regions_match_input_| and later looking it up 214 // there. 215 std::set<AddressField> all_regions_match_input_; 216 217 // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels. 218 std::map<AddressField, std::vector<Suggestion> > suggestions_; 219 220 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions); 221 }; 222 223 } // namespace 224 225 InputSuggester::StringCanonicalizer::StringCanonicalizer() 226 : buffer_(kInitialBufferSize, 0) { 227 UErrorCode error_code = U_ZERO_ERROR; 228 collator_.reset( 229 icu::Collator::createInstance(icu::Locale::getRoot(), error_code)); 230 DCHECK(U_SUCCESS(error_code)); 231 collator_->setStrength(icu::Collator::PRIMARY); 232 } 233 234 InputSuggester::StringCanonicalizer::~StringCanonicalizer() {} 235 236 const std::vector<uint8_t>& InputSuggester::StringCanonicalizer::Canonicalize( 237 const std::string& original) const { 238 DCHECK(!original.empty()); 239 240 icu::UnicodeString icu_str(original.c_str(), 241 static_cast<int32_t>(original.length())); 242 int32_t sort_key_size = 243 collator_->getSortKey(icu_str, &buffer_[0], buffer_size()); 244 DCHECK_LT(0, sort_key_size); 245 246 if (sort_key_size > buffer_size()) { 247 buffer_.resize(sort_key_size * 2, 0); 248 sort_key_size = collator_->getSortKey(icu_str, &buffer_[0], buffer_size()); 249 DCHECK_LT(0, sort_key_size); 250 DCHECK_GT(buffer_size(), sort_key_size); 251 } 252 253 return buffer_; 254 } 255 256 int32_t InputSuggester::StringCanonicalizer::buffer_size() const { 257 return static_cast<int32_t>(buffer_.size()); 258 } 259 260 // All sub-regions of a COUNTRY level region, organized into tries for lookup by 261 // region name or key. 262 class InputSuggester::SubRegionData { 263 public: 264 SubRegionData() 265 : initialized_(false), 266 smallest_region_size_(COUNTRY), 267 canonicalizer_(NULL) {} 268 269 ~SubRegionData() {} 270 271 bool is_initialized() const { return initialized_; } 272 273 // Adds the sub-regions of |country_region| into tries. Uses 274 // |shared_canonicalizer| for case and diacritic insensitive lookup of the 275 // sub-regions. Should be called at most once. 276 void Initialize(const RegionData& country_region, 277 const StringCanonicalizer& shared_canonicalizer) { 278 DCHECK(!initialized_); 279 DCHECK(!country_region.has_parent()); 280 281 initialized_ = true; 282 canonicalizer_ = &shared_canonicalizer; 283 284 if (!country_region.sub_regions().empty()) 285 AddSubRegionsOf(country_region, COUNTRY); 286 } 287 288 // Adds the suggestions for |user_input| into |suggestions| when user is 289 // typing in |focused_field|. 290 void BuildSuggestions(const AddressData& user_input, 291 AddressField focused_field, 292 std::vector<Suggestion>* suggestions) { 293 DCHECK(initialized_); 294 295 // Do not suggest anything if there's no suggestion data for the focused 296 // field. 297 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field) 298 return; 299 300 // Non-owned regions that match a field value by region key. 301 std::set<const RegionData*> regions_match_key; 302 303 // Non-owned regions that match a field value by region name. 304 std::set<const RegionData*> regions_match_name; 305 306 AddressSuggestions address_suggestions; 307 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY; 308 ++i) { 309 AddressField address_field = static_cast<AddressField>(i); 310 AddressField parent_address_field = static_cast<AddressField>(i - 1); 311 312 const std::string& field_value = user_input.GetFieldValue(address_field); 313 const std::string& parent_field_value = 314 user_input.GetFieldValue(parent_address_field); 315 316 if (field_value.empty() && 317 (address_field == ADMIN_AREA || parent_field_value.empty())) { 318 address_suggestions.AllRegionsMatchForField(address_field); 319 continue; 320 } 321 322 if (field_value.empty()) { 323 DCHECK_NE(address_field, focused_field); 324 continue; 325 } 326 327 regions_match_key.clear(); 328 regions_match_name.clear(); 329 330 const FieldTries& field_tries = field_tries_[address_field]; 331 332 const std::vector<uint8_t>& canonicalized_value = 333 canonicalizer_->Canonicalize(field_value); 334 335 field_tries.keys.FindDataForKeyPrefix(canonicalized_value, 336 ®ions_match_key); 337 field_tries.names.FindDataForKeyPrefix(canonicalized_value, 338 ®ions_match_name); 339 340 bool added_suggestions = address_suggestions.AddRegions( 341 address_field, regions_match_key, regions_match_name); 342 343 // Do not suggest anything if the focused field does not have suggestions. 344 if (address_field == focused_field && !added_suggestions) 345 return; 346 } 347 348 address_suggestions.SwapSmallestSubRegionSuggestions(suggestions); 349 } 350 351 private: 352 // The tries to lookup regions for a specific field by keys and names. For 353 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK", 354 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The 355 // struct is uncopyable due to Trie objects being uncopyable. 356 struct FieldTries { 357 Trie<const RegionData*> keys; 358 Trie<const RegionData*> names; 359 }; 360 361 // Adds the sub-regions of |parent_region| into tries. 362 void AddSubRegionsOf(const RegionData& parent_region, 363 AddressField parent_field) { 364 DCHECK(!parent_region.sub_regions().empty()); 365 366 AddressField address_field = static_cast<AddressField>(parent_field + 1); 367 DCHECK(address_field >= ADMIN_AREA); 368 DCHECK(address_field <= DEPENDENT_LOCALITY); 369 370 FieldTries* field_tries = &field_tries_[address_field]; 371 for (std::vector<const RegionData*>::const_iterator it = 372 parent_region.sub_regions().begin(); 373 it != parent_region.sub_regions().end(); 374 ++it) { 375 const RegionData* region = *it; 376 DCHECK(region); 377 DCHECK(!region->key().empty()); 378 DCHECK(!region->name().empty()); 379 380 field_tries->keys.AddDataForKey( 381 canonicalizer_->Canonicalize(region->key()), region); 382 383 field_tries->names.AddDataForKey( 384 canonicalizer_->Canonicalize(region->name()), region); 385 386 if (smallest_region_size_ < address_field) 387 smallest_region_size_ = address_field; 388 389 if (!region->sub_regions().empty()) 390 AddSubRegionsOf(*region, address_field); 391 } 392 } 393 394 // True after Initialize() has been called. 395 bool initialized_; 396 397 // The tries to lookup regions for ADMIN_AREA, LOCALITY, and 398 // DEPENDENT_LOCALITY. 399 std::map<AddressField, FieldTries> field_tries_; 400 401 // The smallest size of a sub-region that has data. For example, this is 402 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN. 403 AddressField smallest_region_size_; 404 405 // A shared instance of string canonicalizer for case and diacritic comparison 406 // of region keys and names. 407 const StringCanonicalizer* canonicalizer_; 408 }; 409 410 InputSuggester::InputSuggester(PreloadSupplier* supplier) 411 : region_data_builder_(supplier), 412 input_helper_(supplier), 413 validator_(supplier), 414 validated_(BuildCallback(this, &InputSuggester::Validated)) {} 415 416 InputSuggester::~InputSuggester() {} 417 418 void InputSuggester::GetSuggestions(const AddressData& user_input, 419 AddressField focused_field, 420 size_t suggestions_limit, 421 std::vector<AddressData>* suggestions) { 422 DCHECK(suggestions); 423 DCHECK(focused_field == POSTAL_CODE || 424 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY)); 425 426 AddressData address_copy = user_input; 427 428 // Do not suggest anything if the user input is empty. 429 if (address_copy.IsFieldEmpty(focused_field)) 430 return; 431 432 if (focused_field == POSTAL_CODE) { 433 // Do not suggest anything if the user is typing an invalid postal code. 434 FieldProblemMap problems; 435 FieldProblemMap filter; 436 filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT)); 437 validator_.Validate(address_copy, 438 true, // Allow postal office boxes. 439 false, // Do not require recipient name. 440 &filter, 441 &problems, 442 *validated_); 443 if (!problems.empty()) 444 return; 445 446 // Fill in the sub-regions based on the postal code. 447 input_helper_.FillAddress(&address_copy); 448 } 449 450 // Lazily initialize the mapping from COUNTRY level regions to all of their 451 // sub-regions with metadata for generating suggestions. 452 std::string unused_best_language; 453 const RegionData& region_data = 454 region_data_builder_.Build(address_copy.region_code, 455 address_copy.language_code, 456 &unused_best_language); 457 SubRegionData* sub_region_data = &sub_regions_[®ion_data]; 458 if (!sub_region_data->is_initialized()) 459 sub_region_data->Initialize(region_data, canonicalizer_); 460 461 // Build the list of regions that match |address_copy| when the user is typing 462 // in the |focused_field|. 463 std::vector<Suggestion> suggested_regions; 464 sub_region_data->BuildSuggestions( 465 address_copy, focused_field, &suggested_regions); 466 467 FieldProblemMap problems; 468 FieldProblemMap filter; 469 filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE)); 470 471 // Generate suggestions based on the regions. 472 for (std::vector<Suggestion>::const_iterator suggested_region_it = 473 suggested_regions.begin(); 474 suggested_region_it != suggested_regions.end(); 475 ++suggested_region_it) { 476 AddressData address; 477 address.region_code = address_copy.region_code; 478 address.postal_code = address_copy.postal_code; 479 480 // Traverse the tree of regions from the smallest |region_to_suggest| to the 481 // country-wide "root" of the tree. Use the region names or keys found at 482 // each of the levels of the tree to build the |address| to suggest. 483 AddressField address_field = suggested_region_it->matching_address_field; 484 for (const RegionData* region = suggested_region_it->region_to_suggest; 485 region->has_parent(); 486 region = ®ion->parent()) { 487 address.SetFieldValue(address_field, 488 suggested_region_it->region_key_matches 489 ? region->key() 490 : region->name()); 491 address_field = static_cast<AddressField>(address_field - 1); 492 } 493 494 // Do not suggest an address with a mismatching postal code. 495 problems.clear(); 496 validator_.Validate(address, 497 true, // Allow postal office boxes. 498 false, // Do not require recipient name. 499 &filter, 500 &problems, 501 *validated_); 502 if (!problems.empty()) 503 continue; 504 505 // Do not add more suggestions than |suggestions_limit|. 506 if (suggestions->size() >= suggestions_limit) { 507 suggestions->clear(); 508 return; 509 } 510 511 suggestions->push_back(address); 512 } 513 } 514 515 void InputSuggester::Validated(bool success, 516 const AddressData&, 517 const FieldProblemMap&) { 518 DCHECK(success); 519 } 520 521 } // namespace autofill 522