Home | History | Annotate | Download | only in src
      1 // Copyright (C) 2014 Google Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 // http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #ifndef I18N_ADDRESSINPUT_RULESET_H_
     16 #define I18N_ADDRESSINPUT_RULESET_H_
     17 
     18 #include <libaddressinput/address_field.h>
     19 #include <libaddressinput/util/basictypes.h>
     20 #include <libaddressinput/util/scoped_ptr.h>
     21 
     22 #include <map>
     23 #include <set>
     24 #include <string>
     25 
     26 #include "rule.h"
     27 #include "util/canonicalize_string.h"
     28 #include "util/trie.h"
     29 
     30 namespace i18n {
     31 namespace addressinput {
     32 
     33 // A recursive data structure that stores a set of rules for a region. Can store
     34 // the rules for a country, its administrative areas, localities, and dependent
     35 // localities, in addition to the language-specific rules.
     36 //
     37 // Example for Canada and some of its provinces:
     38 //                   CA-->fr
     39 //                   |
     40 // -------------------------------------
     41 // |        |        |        |        |
     42 // v        v        v        v        v
     43 // AB-->fr  BC-->fr  MB-->fr  NB-->fr  NL-->fr
     44 //
     45 // The rules in Canada are in English by default. Each rule also has a French
     46 // language version.
     47 class Ruleset {
     48  public:
     49   // Builds a ruleset for |field| with a region-wide |rule| in the default
     50   // language of the country. The |field| should be between COUNTRY and
     51   // DEPENDENT_LOCALITY (inclusively). The |rule| should not be NULL.
     52   Ruleset(AddressField field, scoped_ptr<Rule> rule);
     53 
     54   ~Ruleset();
     55 
     56   // Returns the parent ruleset. This is NULL until this ruleset has been passed
     57   // into a AddSubRegionRuleset() method. Consequently, this is always NULL for
     58   // a country-level ruleset.
     59   Ruleset* parent() const { return parent_; }
     60 
     61   // Returns the field type for this ruleset.
     62   AddressField field() const { return field_; }
     63 
     64   // Returns the region-wide rule for this ruleset in the default language of
     65   // the country.
     66   const Rule& rule() const { return *rule_; }
     67 
     68   // Adds the |ruleset| for |sub_region| and sets this to be its parent. A
     69   // |sub_region| should be added at most once. The |ruleset| should not be
     70   // NULL.
     71   //
     72   // The field of the |ruleset| parameter must be exactly one smaller than the
     73   // field of this ruleset. For example, a COUNTRY ruleset can contain
     74   // ADMIN_AREA rulesets, but should not contain COUNTRY or LOCALITY rulesets.
     75   void AddSubRegionRuleset(const std::string& sub_region,
     76                            scoped_ptr<Ruleset> ruleset);
     77 
     78   // Adds a language-specific |rule| for |language_code| for this region. A
     79   // |language_code| should be added at most once. The |rule| should not be
     80   // NULL.
     81   void AddLanguageCodeRule(const std::string& language_code,
     82                            scoped_ptr<Rule> rule);
     83 
     84   // Returns the set of rules for |sub_region|. The result is NULL if there's no
     85   // such |sub_region|. The caller does not own the result.
     86   Ruleset* GetSubRegionRuleset(const std::string& sub_region) const;
     87 
     88   // If there's a language-specific rule for |language_code|, then returns this
     89   // rule. Otherwise returns the rule in the default language of the country.
     90   const Rule& GetLanguageCodeRule(const std::string& language_code) const;
     91 
     92   // Returns a mapping of sub-region keys to rulesets. The caller does now own
     93   // the result. The values are not NULL.
     94   const std::map<std::string, Ruleset*>& GetSubRegionRulesets() const {
     95     return sub_regions_;
     96   }
     97 
     98   // Enables using FindRulesetsByPrefix() method. Should be called only once and
     99   // on a COUNTRY level ruleset.
    100   void BuildPrefixSearchIndex();
    101 
    102   // Returns true if BuildPrefixSearchIndex() has been called.
    103   bool prefix_search_index_ready() const { return !tries_.empty(); }
    104 
    105   // Returns the deepest possible ruleset level for this country. Must be called
    106   // on a COUNTRY level ruleset. Must be called after BuildPrefixSearchIndex()
    107   // has been called.
    108   AddressField deepest_ruleset_level() const { return deepest_ruleset_level_; }
    109 
    110   // Finds all rulesets at |ruleset_level| where the rule for |language_code|
    111   // has the |identity_field| that starts with |prefix|. Ignores diacritics and
    112   // capitalization differences between the rule data and |prefix|.
    113   //
    114   // If there're no rules for |language_code| (or |language_code| is an empty
    115   // string), then the default language code is used.
    116   //
    117   // Should be called only on a COUNTRY level ruleset. Should be called only
    118   // after BuildPrefixSearchIndex() has been called.
    119   //
    120   // The |field| parameter should be only ADMIN_AREA, LOCALITY, or
    121   // DEPENDENT_LOCALITY. The result parameter should not be NULL.
    122   void FindRulesetsByPrefix(const std::string& language_code,
    123                             AddressField ruleset_level,
    124                             Rule::IdentityField identity_field,
    125                             const std::string& prefix,
    126                             std::set<const Ruleset*>* result) const;
    127 
    128  private:
    129   // The type that maps rule identity field to tries of rulesets.
    130   typedef std::map<Rule::IdentityField, Trie<const Ruleset*>*>
    131       IdentityFieldTries;
    132 
    133   // The type that maps address field to IdentityFieldTries.
    134   typedef std::map<AddressField, IdentityFieldTries*> AddressFieldTries;
    135 
    136   // The type that maps language code to AddressFieldTries.
    137   typedef std::map<std::string, AddressFieldTries*> LanguageCodeTries;
    138 
    139   // Adds all children of |parent_ruleset| into |tries_| of this ruleset. Should
    140   // be called only on a COUNTRY level ruleset.
    141   void AddSubRegionRulesetsToTrie(const Ruleset& parent_ruleset);
    142 
    143   // The tries to lookup rulesets by a prefix of key, name, or latin-name in a
    144   // rule. Has data only in a COUNTRY level ruleset. Owns the map and trie
    145   // objects. Does not own the ruleset objects.
    146   LanguageCodeTries tries_;
    147 
    148   // Canonicalizes region keys, names, and latin names when building a trie.
    149   scoped_ptr<StringCanonicalizer> canonicalizer_;
    150 
    151   // The parent ruleset of this object. The parent ruleset owns this object.
    152   Ruleset* parent_;
    153 
    154   // The field of this ruleset.
    155   const AddressField field_;
    156 
    157   // The deepest possible ruleset level for this country. Set in
    158   // BuildPrefixSearchIndex() method and, therefore, meaningful only on a
    159   // COUNTRY level ruleset.
    160   AddressField deepest_ruleset_level_;
    161 
    162   // The region-wide rule in the default language of the country.
    163   const scoped_ptr<const Rule> rule_;
    164 
    165   // Owned rulesets for sub-regions.
    166   std::map<std::string, Ruleset*> sub_regions_;
    167 
    168   // Owned language-specific rules for the region.
    169   std::map<std::string, const Rule*> language_codes_;
    170 
    171   DISALLOW_COPY_AND_ASSIGN(Ruleset);
    172 };
    173 
    174 }  // namespace addressinput
    175 }  // namespace i18n
    176 
    177 #endif  // I18N_ADDRESSINPUT_RULESET_H_
    178