Home | History | Annotate | Download | only in src
      1 // Copyright (C) 2014 Google Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 // http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include <libaddressinput/address_input_helper.h>
     16 
     17 #include <libaddressinput/address_data.h>
     18 #include <libaddressinput/address_field.h>
     19 #include <libaddressinput/preload_supplier.h>
     20 #include <libaddressinput/util/basictypes.h>
     21 
     22 #include <cassert>
     23 #include <cstddef>
     24 #include <string>
     25 #include <vector>
     26 
     27 #include <re2/re2.h>
     28 
     29 #include "language.h"
     30 #include "lookup_key.h"
     31 #include "region_data_constants.h"
     32 #include "rule.h"
     33 #include "util/re2ptr.h"
     34 
     35 namespace i18n {
     36 namespace addressinput {
     37 
     38 // Used for building a hierarchy of rules, each one connected to its parent.
     39 struct Node {
     40   const Node* parent;
     41   const Rule* rule;
     42 };
     43 
     44 namespace {
     45 
     46 const char kLookupKeySeparator = '/';
     47 
     48 const size_t kHierarchyDepth = arraysize(LookupKey::kHierarchy);
     49 
     50 // Gets the best name for the entity represented by the current rule, using the
     51 // language provided. The language is currently used to distinguish whether a
     52 // Latin-script name should be fetched; if it is not explicitly Latin-script, we
     53 // prefer IDs over names (so return CA instead of California for an English
     54 // user.) If there is no Latin-script name, we fall back to the ID.
     55 std::string GetBestName(const Language& language, const Rule& rule) {
     56   if (language.has_latin_script) {
     57     const std::string& name = rule.GetLatinName();
     58     if (!name.empty()) {
     59       return name;
     60     }
     61   }
     62   // The ID is stored as data/US/CA for "CA", for example, and we only want the
     63   // last part.
     64   const std::string& id = rule.GetId();
     65   std::string::size_type pos = id.rfind(kLookupKeySeparator);
     66   assert(pos != std::string::npos);
     67   return id.substr(pos + 1);
     68 }
     69 
     70 void FillAddressFromMatchedRules(
     71     const std::vector<Node>* hierarchy,
     72     AddressData* address) {
     73   assert(hierarchy != NULL);
     74   assert(address != NULL);
     75   // We skip region code, because we never try and fill that in if it isn't
     76   // already set.
     77   Language language(address->language_code);
     78   for (size_t depth = kHierarchyDepth - 1; depth > 0; --depth) {
     79     // If there is only one match at this depth, then we should populate the
     80     // address, using this rule and its parents.
     81     if (hierarchy[depth].size() == 1) {
     82       for (const Node* node = &hierarchy[depth].front();
     83            node != NULL; node = node->parent, --depth) {
     84         const Rule* rule = node->rule;
     85         assert(rule != NULL);
     86 
     87         AddressField field = LookupKey::kHierarchy[depth];
     88         // Note only empty fields are permitted to be overwritten.
     89         if (address->IsFieldEmpty(field)) {
     90           address->SetFieldValue(field, GetBestName(language, *rule));
     91         }
     92       }
     93       break;
     94     }
     95   }
     96 }
     97 
     98 }  // namespace;
     99 
    100 AddressInputHelper::AddressInputHelper(PreloadSupplier* supplier)
    101     : supplier_(supplier) {
    102   assert(supplier_ != NULL);
    103 }
    104 
    105 AddressInputHelper::~AddressInputHelper() {
    106 }
    107 
    108 void AddressInputHelper::FillAddress(AddressData* address) const {
    109   assert(address != NULL);
    110   const std::string& region_code = address->region_code;
    111   if (!RegionDataConstants::IsSupported(region_code)) {
    112     // If we don't have a region code, we can't do anything reliably to fill
    113     // this address.
    114     return;
    115   }
    116 
    117   AddressData lookup_key_address;
    118   lookup_key_address.region_code = region_code;
    119   // First try and fill in the postal code if it is missing.
    120   LookupKey lookup_key;
    121   lookup_key.FromAddress(lookup_key_address);
    122   const Rule* region_rule = supplier_->GetRule(lookup_key);
    123   // We have already checked that the region is supported; and users of this
    124   // method must have called LoadRules() first, so we check this here.
    125   assert(region_rule != NULL);
    126 
    127   const RE2ptr* postal_code_reg_exp = region_rule->GetPostalCodeMatcher();
    128   if (postal_code_reg_exp != NULL) {
    129     if (address->postal_code.empty()) {
    130       address->postal_code = region_rule->GetSolePostalCode();
    131     }
    132 
    133     // If we have a valid postal code, try and work out the most specific
    134     // hierarchy that matches the postal code. Note that the postal code might
    135     // have been added in the previous check.
    136     if (!address->postal_code.empty() &&
    137         RE2::FullMatch(address->postal_code, *postal_code_reg_exp->ptr)) {
    138 
    139       // This hierarchy is used to store rules that represent possible matches
    140       // at each level of the hierarchy.
    141       std::vector<Node> hierarchy[kHierarchyDepth];
    142       CheckChildrenForPostCodeMatches(*address, lookup_key, NULL, hierarchy);
    143 
    144       FillAddressFromMatchedRules(hierarchy, address);
    145     }
    146   }
    147 
    148   // TODO: When we have the data, we should fill in the state for countries with
    149   // state required and only one possible value, e.g. American Samoa.
    150 }
    151 
    152 void AddressInputHelper::CheckChildrenForPostCodeMatches(
    153     const AddressData& address,
    154     const LookupKey& lookup_key,
    155     const Node* parent,
    156     // An array of vectors.
    157     std::vector<Node>* hierarchy) const {
    158   const Rule* rule = supplier_->GetRule(lookup_key);
    159   assert(rule != NULL);
    160 
    161   const RE2ptr* postal_code_prefix = rule->GetPostalCodeMatcher();
    162   if (postal_code_prefix == NULL ||
    163       RE2::PartialMatch(address.postal_code, *postal_code_prefix->ptr)) {
    164     // This was a match, so store it and its parent in the hierarchy.
    165     hierarchy[lookup_key.GetDepth()].push_back(Node());
    166     Node* node = &hierarchy[lookup_key.GetDepth()].back();
    167     node->parent = parent;
    168     node->rule = rule;
    169 
    170     // If there are children, check them too.
    171     for (std::vector<std::string>::const_iterator child_it =
    172              rule->GetSubKeys().begin();
    173          child_it != rule->GetSubKeys().end(); ++child_it) {
    174       LookupKey child_key;
    175       child_key.FromLookupKey(lookup_key, *child_it);
    176       CheckChildrenForPostCodeMatches(address, child_key, node, hierarchy);
    177     }
    178   }
    179 }
    180 
    181 }  // namespace addressinput
    182 }  // namespace i18n
    183