Home | History | Annotate | Download | only in chromium
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "third_party/libaddressinput/chromium/input_suggester.h"
      6 
      7 #include <cstddef>
      8 #include <set>
      9 #include <utility>
     10 
     11 #include "base/logging.h"
     12 #include "third_party/libaddressinput/chromium/trie.h"
     13 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h"
     14 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h"
     15 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h"
     16 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h"
     17 
     18 namespace autofill {
     19 
     20 using ::i18n::addressinput::AddressData;
     21 using ::i18n::addressinput::AddressField;
     22 using ::i18n::addressinput::BuildCallback;
     23 using ::i18n::addressinput::FieldProblemMap;
     24 using ::i18n::addressinput::PreloadSupplier;
     25 using ::i18n::addressinput::RegionData;
     26 using ::i18n::addressinput::RegionDataBuilder;
     27 
     28 using ::i18n::addressinput::ADMIN_AREA;
     29 using ::i18n::addressinput::COUNTRY;
     30 using ::i18n::addressinput::DEPENDENT_LOCALITY;
     31 using ::i18n::addressinput::LOCALITY;
     32 using ::i18n::addressinput::POSTAL_CODE;
     33 
     34 using ::i18n::addressinput::INVALID_FORMAT;
     35 using ::i18n::addressinput::MISMATCHING_VALUE;
     36 
     37 namespace {
     38 
     39 // Initial size for the buffer used in the canonicalizer.
     40 static const size_t kInitialBufferSize = 32;
     41 
     42 // A region and its metadata useful for constructing a suggestion.
     43 struct Suggestion {
     44  public:
     45   // Builds a suggestion of |region_to_suggest|. Does not take ownership of
     46   // |region_to_suggest|, which should not be NULL.
     47   Suggestion(const RegionData* region_to_suggest,
     48              AddressField matching_address_field,
     49              bool region_key_matches)
     50       : region_to_suggest(region_to_suggest),
     51         matching_address_field(matching_address_field),
     52         region_key_matches(region_key_matches) {
     53     DCHECK(region_to_suggest);
     54   }
     55 
     56   ~Suggestion() {}
     57 
     58   // The region that should be suggested. For example, if the region is ("CA",
     59   // "California"), then either "CA" or "California" should be suggested.
     60   const RegionData* region_to_suggest;
     61 
     62   // The field in the address for which the suggestion should be made. For
     63   // example, ADMIN_AREA in US means the suggestion should be made for the field
     64   // labeled "State".
     65   AddressField matching_address_field;
     66 
     67   // True if the key of the region matches user input (the name may or may not
     68   // match). "CA" should be suggested for a ("CA", "California") region.
     69   //
     70   // False if only the name of the region matches user input (the key does not
     71   // match). "California" should be suggested for a ("CA", "California") region.
     72   bool region_key_matches;
     73 };
     74 
     75 // Suggestions for an address. Contains lists of suggestions for administrative
     76 // area, locality, and dependent locality fields of an address.
     77 class AddressSuggestions {
     78  public:
     79   AddressSuggestions() {}
     80   ~AddressSuggestions() {}
     81 
     82   // Marks all regions at |address_field| level as matching user input.
     83   void AllRegionsMatchForField(AddressField address_field) {
     84     all_regions_match_input_.insert(address_field);
     85   }
     86 
     87   // Marks given regions at |address_field| level as matching user input. The
     88   // |regions_match_key| parameter contains the regions that match user input by
     89   // their keys. The |regions_match_name| parameter contains the regions that
     90   // match user input by their names.
     91   //
     92   // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
     93   // DEPENDENT_LOCALITY.
     94   bool AddRegions(AddressField address_field,
     95                   const std::set<const RegionData*>& regions_match_key,
     96                   const std::set<const RegionData*>& regions_match_name) {
     97     DCHECK(address_field >= ADMIN_AREA);
     98     DCHECK(address_field <= DEPENDENT_LOCALITY);
     99 
    100     AddressField parent_address_field =
    101         static_cast<AddressField>(address_field - 1);
    102 
    103     bool all_parents_match =
    104         parent_address_field == COUNTRY ||
    105         all_regions_match_input_.find(parent_address_field) !=
    106             all_regions_match_input_.end();
    107 
    108     // Cannot build |address_field| level suggestions if there are no matches in
    109     // |parent_address_field| level regions.
    110     const RegionsMatchInput* parents = NULL;
    111     if (address_field > ADMIN_AREA && !all_parents_match) {
    112       parents = &regions_match_input_[parent_address_field];
    113       if (parents->keys.empty() && parents->names.empty())
    114         return false;
    115     }
    116 
    117     RegionsMatchInput* regions = NULL;
    118     if (address_field < DEPENDENT_LOCALITY)
    119       regions = &regions_match_input_[address_field];
    120 
    121     std::vector<Suggestion>* suggestions = &suggestions_[address_field];
    122     bool added_suggestions = false;
    123 
    124     // Iterate over both |regions_match_key| and |regions_match_name| and build
    125     // Suggestion objects based on the given RegionData objects. Advance either
    126     // one iterator at a time (if they point to different data) or both
    127     // iterators at once (if they point to the same data).
    128     for (std::set<const RegionData*>::const_iterator
    129              key_it = regions_match_key.begin(),
    130              name_it = regions_match_name.begin();
    131          key_it != regions_match_key.end() ||
    132              name_it != regions_match_name.end();) {
    133       const RegionData* key_region =
    134           key_it != regions_match_key.end() ? *key_it : NULL;
    135       const RegionData* name_region =
    136           name_it != regions_match_name.end() ? *name_it : NULL;
    137 
    138       // Regions that do not have a parent that also matches input will not
    139       // become suggestions.
    140       bool key_region_has_parent =
    141           all_parents_match ||
    142           (parents && !parents->keys.empty() && key_region &&
    143            parents->keys.find(&key_region->parent()) != parents->keys.end());
    144       bool name_region_has_parent =
    145           all_parents_match ||
    146           (parents && !parents->names.empty() && name_region &&
    147            parents->names.find(&name_region->parent()) != parents->names.end());
    148 
    149       if (name_region && (!key_region || name_region < key_region)) {
    150         if (name_region_has_parent) {
    151           suggestions->push_back(Suggestion(name_region, address_field, false));
    152           added_suggestions = true;
    153           if (regions)
    154             regions->names.insert(name_region);
    155         }
    156 
    157         ++name_it;
    158       } else if (key_region && (!name_region || key_region < name_region)) {
    159         if (key_region_has_parent) {
    160           suggestions->push_back(Suggestion(key_region, address_field, true));
    161           added_suggestions = true;
    162           if (regions)
    163             regions->keys.insert(key_region);
    164         }
    165 
    166         ++key_it;
    167       } else {
    168         if (key_region_has_parent) {
    169           suggestions->push_back(Suggestion(key_region, address_field, true));
    170           added_suggestions = true;
    171           if (regions) {
    172             regions->keys.insert(key_region);
    173             regions->names.insert(name_region);
    174           }
    175         }
    176 
    177         ++key_it;
    178         ++name_it;
    179       }
    180     }
    181 
    182     return added_suggestions;
    183   }
    184 
    185   // Swaps the suggestions for the smallest sub-region into |suggestions|.
    186   // |this| is not usable after this call due to using the swap() operation.
    187   //
    188   // The |suggestions| parameter should not be NULL.
    189   void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) {
    190     DCHECK(suggestions);
    191     for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
    192       std::vector<Suggestion>* result =
    193           &suggestions_[static_cast<AddressField>(i)];
    194       if (!result->empty()) {
    195         suggestions->swap(*result);
    196         return;
    197       }
    198     }
    199   }
    200 
    201  private:
    202   // The sets of non-owned regions used for looking up regions that match user
    203   // input by keys and names.
    204   struct RegionsMatchInput {
    205     std::set<const RegionData*> keys;
    206     std::set<const RegionData*> names;
    207   };
    208 
    209   // The regions that match user input at ADMIN_AREA and LOCALITY levels.
    210   std::map<AddressField, RegionsMatchInput> regions_match_input_;
    211 
    212   // The set of fields for which all regions match user input. Used to avoid
    213   // storing a long list in |regions_match_input_| and later looking it up
    214   // there.
    215   std::set<AddressField> all_regions_match_input_;
    216 
    217   // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels.
    218   std::map<AddressField, std::vector<Suggestion> > suggestions_;
    219 
    220   DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
    221 };
    222 
    223 }  // namespace
    224 
    225 InputSuggester::StringCanonicalizer::StringCanonicalizer()
    226     : buffer_(kInitialBufferSize, 0) {
    227   UErrorCode error_code = U_ZERO_ERROR;
    228   collator_.reset(
    229       icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
    230   DCHECK(U_SUCCESS(error_code));
    231   collator_->setStrength(icu::Collator::PRIMARY);
    232 }
    233 
    234 InputSuggester::StringCanonicalizer::~StringCanonicalizer() {}
    235 
    236 const std::vector<uint8_t>& InputSuggester::StringCanonicalizer::Canonicalize(
    237     const std::string& original) const {
    238   DCHECK(!original.empty());
    239 
    240   icu::UnicodeString icu_str(original.c_str(),
    241                              static_cast<int32_t>(original.length()));
    242   int32_t sort_key_size =
    243       collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
    244   DCHECK_LT(0, sort_key_size);
    245 
    246   if (sort_key_size > buffer_size()) {
    247     buffer_.resize(sort_key_size * 2, 0);
    248     sort_key_size = collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
    249     DCHECK_LT(0, sort_key_size);
    250     DCHECK_GT(buffer_size(), sort_key_size);
    251   }
    252 
    253   return buffer_;
    254 }
    255 
    256 int32_t InputSuggester::StringCanonicalizer::buffer_size() const {
    257   return static_cast<int32_t>(buffer_.size());
    258 }
    259 
    260 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
    261 // region name or key.
    262 class InputSuggester::SubRegionData {
    263  public:
    264   SubRegionData()
    265       : initialized_(false),
    266         smallest_region_size_(COUNTRY),
    267         canonicalizer_(NULL) {}
    268 
    269   ~SubRegionData() {}
    270 
    271   bool is_initialized() const { return initialized_; }
    272 
    273   // Adds the sub-regions of |country_region| into tries. Uses
    274   // |shared_canonicalizer| for case and diacritic insensitive lookup of the
    275   // sub-regions. Should be called at most once.
    276   void Initialize(const RegionData& country_region,
    277                   const StringCanonicalizer& shared_canonicalizer) {
    278     DCHECK(!initialized_);
    279     DCHECK(!country_region.has_parent());
    280 
    281     initialized_ = true;
    282     canonicalizer_ = &shared_canonicalizer;
    283 
    284     if (!country_region.sub_regions().empty())
    285       AddSubRegionsOf(country_region, COUNTRY);
    286   }
    287 
    288   // Adds the suggestions for |user_input| into |suggestions| when user is
    289   // typing in |focused_field|.
    290   void BuildSuggestions(const AddressData& user_input,
    291                         AddressField focused_field,
    292                         std::vector<Suggestion>* suggestions) {
    293     DCHECK(initialized_);
    294 
    295     // Do not suggest anything if there's no suggestion data for the focused
    296     // field.
    297     if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
    298       return;
    299 
    300     // Non-owned regions that match a field value by region key.
    301     std::set<const RegionData*> regions_match_key;
    302 
    303     // Non-owned regions that match a field value by region name.
    304     std::set<const RegionData*> regions_match_name;
    305 
    306     AddressSuggestions address_suggestions;
    307     for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
    308          ++i) {
    309       AddressField address_field = static_cast<AddressField>(i);
    310       AddressField parent_address_field = static_cast<AddressField>(i - 1);
    311 
    312       const std::string& field_value = user_input.GetFieldValue(address_field);
    313       const std::string& parent_field_value =
    314           user_input.GetFieldValue(parent_address_field);
    315 
    316       if (field_value.empty() &&
    317           (address_field == ADMIN_AREA || parent_field_value.empty())) {
    318         address_suggestions.AllRegionsMatchForField(address_field);
    319         continue;
    320       }
    321 
    322       if (field_value.empty()) {
    323         DCHECK_NE(address_field, focused_field);
    324         continue;
    325       }
    326 
    327       regions_match_key.clear();
    328       regions_match_name.clear();
    329 
    330       const FieldTries& field_tries = field_tries_[address_field];
    331 
    332       const std::vector<uint8_t>& canonicalized_value =
    333           canonicalizer_->Canonicalize(field_value);
    334 
    335       field_tries.keys.FindDataForKeyPrefix(canonicalized_value,
    336                                             &regions_match_key);
    337       field_tries.names.FindDataForKeyPrefix(canonicalized_value,
    338                                              &regions_match_name);
    339 
    340       bool added_suggestions = address_suggestions.AddRegions(
    341           address_field, regions_match_key, regions_match_name);
    342 
    343       // Do not suggest anything if the focused field does not have suggestions.
    344       if (address_field == focused_field && !added_suggestions)
    345         return;
    346     }
    347 
    348     address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
    349   }
    350 
    351  private:
    352   // The tries to lookup regions for a specific field by keys and names. For
    353   // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
    354   // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
    355   // struct is uncopyable due to Trie objects being uncopyable.
    356   struct FieldTries {
    357     Trie<const RegionData*> keys;
    358     Trie<const RegionData*> names;
    359   };
    360 
    361   // Adds the sub-regions of |parent_region| into tries.
    362   void AddSubRegionsOf(const RegionData& parent_region,
    363                        AddressField parent_field) {
    364     DCHECK(!parent_region.sub_regions().empty());
    365 
    366     AddressField address_field = static_cast<AddressField>(parent_field + 1);
    367     DCHECK(address_field >= ADMIN_AREA);
    368     DCHECK(address_field <= DEPENDENT_LOCALITY);
    369 
    370     FieldTries* field_tries = &field_tries_[address_field];
    371     for (std::vector<const RegionData*>::const_iterator it =
    372              parent_region.sub_regions().begin();
    373          it != parent_region.sub_regions().end();
    374          ++it) {
    375       const RegionData* region = *it;
    376       DCHECK(region);
    377       DCHECK(!region->key().empty());
    378       DCHECK(!region->name().empty());
    379 
    380       field_tries->keys.AddDataForKey(
    381           canonicalizer_->Canonicalize(region->key()), region);
    382 
    383       field_tries->names.AddDataForKey(
    384           canonicalizer_->Canonicalize(region->name()), region);
    385 
    386       if (smallest_region_size_ < address_field)
    387         smallest_region_size_ = address_field;
    388 
    389       if (!region->sub_regions().empty())
    390         AddSubRegionsOf(*region, address_field);
    391     }
    392   }
    393 
    394   // True after Initialize() has been called.
    395   bool initialized_;
    396 
    397   // The tries to lookup regions for ADMIN_AREA, LOCALITY, and
    398   // DEPENDENT_LOCALITY.
    399   std::map<AddressField, FieldTries> field_tries_;
    400 
    401   // The smallest size of a sub-region that has data. For example, this is
    402   // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
    403   AddressField smallest_region_size_;
    404 
    405   // A shared instance of string canonicalizer for case and diacritic comparison
    406   // of region keys and names.
    407   const StringCanonicalizer* canonicalizer_;
    408 };
    409 
    410 InputSuggester::InputSuggester(PreloadSupplier* supplier)
    411     : region_data_builder_(supplier),
    412       input_helper_(supplier),
    413       validator_(supplier),
    414       validated_(BuildCallback(this, &InputSuggester::Validated)) {}
    415 
    416 InputSuggester::~InputSuggester() {}
    417 
    418 void InputSuggester::GetSuggestions(const AddressData& user_input,
    419                                     AddressField focused_field,
    420                                     size_t suggestions_limit,
    421                                     std::vector<AddressData>* suggestions) {
    422   DCHECK(suggestions);
    423   DCHECK(focused_field == POSTAL_CODE ||
    424          (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
    425 
    426   AddressData address_copy = user_input;
    427 
    428   // Do not suggest anything if the user input is empty.
    429   if (address_copy.IsFieldEmpty(focused_field))
    430     return;
    431 
    432   if (focused_field == POSTAL_CODE) {
    433     // Do not suggest anything if the user is typing an invalid postal code.
    434     FieldProblemMap problems;
    435     FieldProblemMap filter;
    436     filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
    437     validator_.Validate(address_copy,
    438                         true,   // Allow postal office boxes.
    439                         false,  // Do not require recipient name.
    440                         &filter,
    441                         &problems,
    442                         *validated_);
    443     if (!problems.empty())
    444       return;
    445 
    446     // Fill in the sub-regions based on the postal code.
    447     input_helper_.FillAddress(&address_copy);
    448   }
    449 
    450   // Lazily initialize the mapping from COUNTRY level regions to all of their
    451   // sub-regions with metadata for generating suggestions.
    452   std::string unused_best_language;
    453   const RegionData& region_data =
    454       region_data_builder_.Build(address_copy.region_code,
    455                                  address_copy.language_code,
    456                                  &unused_best_language);
    457   SubRegionData* sub_region_data = &sub_regions_[&region_data];
    458   if (!sub_region_data->is_initialized())
    459     sub_region_data->Initialize(region_data, canonicalizer_);
    460 
    461   // Build the list of regions that match |address_copy| when the user is typing
    462   // in the |focused_field|.
    463   std::vector<Suggestion> suggested_regions;
    464   sub_region_data->BuildSuggestions(
    465       address_copy, focused_field, &suggested_regions);
    466 
    467   FieldProblemMap problems;
    468   FieldProblemMap filter;
    469   filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
    470 
    471   // Generate suggestions based on the regions.
    472   for (std::vector<Suggestion>::const_iterator suggested_region_it =
    473            suggested_regions.begin();
    474        suggested_region_it != suggested_regions.end();
    475        ++suggested_region_it) {
    476     AddressData address;
    477     address.region_code = address_copy.region_code;
    478     address.postal_code = address_copy.postal_code;
    479 
    480     // Traverse the tree of regions from the smallest |region_to_suggest| to the
    481     // country-wide "root" of the tree. Use the region names or keys found at
    482     // each of the levels of the tree to build the |address| to suggest.
    483     AddressField address_field = suggested_region_it->matching_address_field;
    484     for (const RegionData* region = suggested_region_it->region_to_suggest;
    485          region->has_parent();
    486          region = &region->parent()) {
    487       address.SetFieldValue(address_field,
    488                             suggested_region_it->region_key_matches
    489                                 ? region->key()
    490                                 : region->name());
    491       address_field = static_cast<AddressField>(address_field - 1);
    492     }
    493 
    494     // Do not suggest an address with a mismatching postal code.
    495     problems.clear();
    496     validator_.Validate(address,
    497                         true,   // Allow postal office boxes.
    498                         false,  // Do not require recipient name.
    499                         &filter,
    500                         &problems,
    501                         *validated_);
    502     if (!problems.empty())
    503       continue;
    504 
    505     // Do not add more suggestions than |suggestions_limit|.
    506     if (suggestions->size() >= suggestions_limit) {
    507       suggestions->clear();
    508       return;
    509     }
    510 
    511     suggestions->push_back(address);
    512   }
    513 }
    514 
    515 void InputSuggester::Validated(bool success,
    516                                const AddressData&,
    517                                const FieldProblemMap&) {
    518   DCHECK(success);
    519 }
    520 
    521 }  // namespace autofill
    522