Home | History | Annotate | Download | only in addressinput
      1 /*
      2  * Copyright (C) 2010 Google Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.i18n.addressinput;
     18 
     19 import com.android.i18n.addressinput.LookupKey.ScriptType;
     20 
     21 import java.util.EnumSet;
     22 import java.util.HashSet;
     23 import java.util.Map;
     24 import java.util.Set;
     25 import java.util.regex.Pattern;
     26 
     27 /**
     28  * Accesses address verification data used to verify components of an address.
     29  * <p> Not all fields require all types of validation, although this could be done. In particular,
     30  * the current implementation only provides known value verification for the hierarchical fields,
     31  * and only provides format and match verification for the postal code field.
     32  */
     33 public class FieldVerifier {
     34     // Node data values are delimited by this symbol.
     35     private static final String DATA_DELIMITER = "~";
     36     // Keys are built up using this delimiter: eg data/US, data/US/CA.
     37     private static final String KEY_DELIMITER = "/";
     38 
     39     private String mId;
     40     private DataSource mDataSource;
     41 
     42     private Set<AddressField> mPossibleFields;
     43     private Set<AddressField> mRequired;
     44     // Known values. Can be either a key, a name in Latin, or a name in native script.
     45     private Map<String, String> mCandidateValues;
     46 
     47     // Keys for the subnodes of this verifier. For example, a key for the US would be CA, since
     48     // there is a sub-verifier with the ID "data/US/CA". Keys may be the local names of the
     49     // locations in the next level of the hierarchy, or the abbreviations if suitable abbreviations
     50     // exist.
     51     private String[] mKeys;
     52     // Names in Latin. These are only populated if the native/local names are in a script other than
     53     // latin.
     54     private String[] mLatinNames;
     55     // Names in native script.
     56     private String[] mLocalNames;
     57 
     58     // Pattern representing the format of a postal code number.
     59     private Pattern mFormat;
     60     // Defines the valid range of a postal code number.
     61     private Pattern mMatch;
     62 
     63     /**
     64      * Creates the root field verifier for a particular data source.
     65      */
     66     public FieldVerifier(DataSource dataSource) {
     67         mDataSource = dataSource;
     68         populateRootVerifier();
     69     }
     70 
     71     /**
     72      * Creates a field verifier based on its parent and on the new data for this node supplied by
     73      * nodeData (which may be null).
     74      */
     75     private FieldVerifier(FieldVerifier parent, AddressVerificationNodeData nodeData) {
     76         // Most information is inherited from the parent.
     77         mPossibleFields = parent.mPossibleFields;
     78         mRequired = parent.mRequired;
     79         mDataSource = parent.mDataSource;
     80         mFormat = parent.mFormat;
     81         mMatch = parent.mMatch;
     82         // Here we add in any overrides from this particular node as well as information such as
     83         // localNames, latinNames and keys.
     84         populate(nodeData);
     85         // candidateValues should never be inherited from the parent, but built up from the
     86         // localNames in this node.
     87         mCandidateValues = Util.buildNameToKeyMap(mKeys, mLocalNames, mLatinNames);
     88     }
     89 
     90     /**
     91      * Sets possibleFieldsUsed, required, keys and candidateValues for the root field verifier. This
     92      * is a little messy at the moment since not all the appropriate information is actually under
     93      * the root "data" node in the metadata. For example, "possibleFields" and "required" are not
     94      * present there.
     95      */
     96     private void populateRootVerifier() {
     97         mId = "data";
     98         // Keys come from the countries under "data".
     99         AddressVerificationNodeData rootNode = mDataSource.getDefaultData("data");
    100         if (rootNode.containsKey(AddressDataKey.COUNTRIES)) {
    101             mKeys = rootNode.get(AddressDataKey.COUNTRIES).split(DATA_DELIMITER);
    102         }
    103         // candidateValues is just the set of keys.
    104         mCandidateValues = Util.buildNameToKeyMap(mKeys, null, null);
    105 
    106         // Copy "possibleFieldsUsed" and "required" from the defaults here for bootstrapping.
    107         // TODO: Investigate a cleaner way of doing this - maybe we should populate "data" with this
    108         // information instead.
    109         AddressVerificationNodeData defaultZZ = mDataSource.getDefaultData("data/ZZ");
    110         mPossibleFields = new HashSet<AddressField>();
    111         if (defaultZZ.containsKey(AddressDataKey.FMT)) {
    112             mPossibleFields = parseAddressFields(defaultZZ.get(AddressDataKey.FMT));
    113         }
    114         mRequired = new HashSet<AddressField>();
    115         if (defaultZZ.containsKey(AddressDataKey.REQUIRE)) {
    116             mRequired = parseRequireString(defaultZZ.get(AddressDataKey.REQUIRE));
    117         }
    118     }
    119 
    120     /**
    121      * Populates this verifier with data from the node data passed in. This may be null.
    122      */
    123     private void populate(AddressVerificationNodeData nodeData) {
    124         if (nodeData == null) {
    125             return;
    126         }
    127         if (nodeData.containsKey(AddressDataKey.ID)) {
    128             mId = nodeData.get(AddressDataKey.ID);
    129         }
    130         if (nodeData.containsKey(AddressDataKey.SUB_KEYS)) {
    131             mKeys = nodeData.get(AddressDataKey.SUB_KEYS).split(DATA_DELIMITER);
    132         }
    133         if (nodeData.containsKey(AddressDataKey.SUB_LNAMES)) {
    134             mLatinNames = nodeData.get(AddressDataKey.SUB_LNAMES).split(DATA_DELIMITER);
    135         }
    136         if (nodeData.containsKey(AddressDataKey.SUB_NAMES)) {
    137             mLocalNames = nodeData.get(AddressDataKey.SUB_NAMES).split(DATA_DELIMITER);
    138         }
    139         if (nodeData.containsKey(AddressDataKey.FMT)) {
    140             mPossibleFields = parseAddressFields(nodeData.get(AddressDataKey.FMT));
    141         }
    142         if (nodeData.containsKey(AddressDataKey.REQUIRE)) {
    143             mRequired = parseRequireString(nodeData.get(AddressDataKey.REQUIRE));
    144         }
    145         if (nodeData.containsKey(AddressDataKey.XZIP)) {
    146             mFormat = Pattern.compile(nodeData.get(AddressDataKey.XZIP), Pattern.CASE_INSENSITIVE);
    147         }
    148         if (nodeData.containsKey(AddressDataKey.ZIP)) {
    149             // This key has two different meanings, depending on whether this is a country-level key
    150             // or not.
    151             if (isCountryKey()) {
    152                 mFormat = Pattern.compile(nodeData.get(AddressDataKey.ZIP),
    153                                           Pattern.CASE_INSENSITIVE);
    154             } else {
    155                 mMatch = Pattern.compile(nodeData.get(AddressDataKey.ZIP),
    156                                          Pattern.CASE_INSENSITIVE);
    157             }
    158         }
    159         // If there are latin names but no local names, and there are the same number of latin names
    160         // as there are keys, then we assume the local names are the same as the keys.
    161         if (mKeys != null && mLocalNames == null && mLatinNames != null &&
    162             mKeys.length == mLatinNames.length) {
    163             mLocalNames = mKeys;
    164         }
    165     }
    166 
    167     FieldVerifier refineVerifier(String sublevel) {
    168         if (Util.trimToNull(sublevel) == null) {
    169             return new FieldVerifier(this, null);
    170         }
    171         // If the parent node didn't exist, then the subLevelName will start with "null".
    172         String subLevelName = mId + KEY_DELIMITER + sublevel;
    173         // For names with no Latin equivalent, we can look up the sublevel name directly.
    174         AddressVerificationNodeData nodeData = mDataSource.get(subLevelName);
    175         if (nodeData != null) {
    176             return new FieldVerifier(this, nodeData);
    177         }
    178         // If that failed, then we try to look up the local name equivalent of this latin name.
    179         // First check these exist.
    180         if (mLatinNames == null) {
    181             return new FieldVerifier(this, null);
    182         }
    183         for (int n = 0; n < mLatinNames.length; n++) {
    184             if (mLatinNames[n].equalsIgnoreCase(sublevel)) {
    185                 // We found a match - we should try looking up a key with the local name at the same
    186                 // index.
    187                 subLevelName = mId + KEY_DELIMITER + mLocalNames[n];
    188                 nodeData = mDataSource.get(subLevelName);
    189                 if (nodeData != null) {
    190                     return new FieldVerifier(this, nodeData);
    191                 }
    192             }
    193         }
    194         // No sub-verifiers were found.
    195         return new FieldVerifier(this, null);
    196     }
    197 
    198     /**
    199      * Returns the ID of this verifier.
    200      */
    201     @Override
    202     public String toString() {
    203         return mId;
    204     }
    205 
    206     /**
    207      * Checks a value in a particular script for a particular field to see if it causes the problem
    208      * specified. If so, this problem is added to the AddressProblems collection passed in. Returns
    209      * true if no problem was found.
    210      */
    211     protected boolean check(ScriptType script, AddressProblemType problem, AddressField field,
    212             String value, AddressProblems problems) {
    213         boolean problemFound = false;
    214 
    215         String trimmedValue = Util.trimToNull(value);
    216         switch (problem) {
    217             case USING_UNUSED_FIELD:
    218                 if (trimmedValue != null && !mPossibleFields.contains(field)) {
    219                     problemFound = true;
    220                 }
    221                 break;
    222             case MISSING_REQUIRED_FIELD:
    223                 if (mRequired.contains(field) && trimmedValue == null) {
    224                     problemFound = true;
    225                 }
    226                 break;
    227             case UNKNOWN_VALUE:
    228                 // An empty string will never be an UNKNOWN_VALUE. It is invalid
    229                 // only when it appears in a required field (In that case it will
    230                 // be reported as MISSING_REQUIRED_FIELD).
    231                 if (trimmedValue == null) {
    232                     break;
    233                 }
    234                 problemFound = !isKnownInScript(script, trimmedValue);
    235                 break;
    236             case UNRECOGNIZED_FORMAT:
    237                 if (trimmedValue != null && mFormat != null &&
    238                         !mFormat.matcher(trimmedValue).matches()) {
    239                     problemFound = true;
    240                 }
    241                 break;
    242             case MISMATCHING_VALUE:
    243                 if (trimmedValue != null && mMatch != null &&
    244                         !mMatch.matcher(trimmedValue).lookingAt()) {
    245                     problemFound = true;
    246                 }
    247                 break;
    248             default:
    249                 throw new RuntimeException("Unknown problem: " + problem);
    250         }
    251         if (problemFound) {
    252             problems.add(field, problem);
    253         }
    254         return !problemFound;
    255     }
    256 
    257     /**
    258      * Checks the value of a particular field in a particular script against the known values for
    259      * this field. If script is null, it checks both the local and the latin values. Otherwise it
    260      * checks only the values in the script specified.
    261      */
    262     private boolean isKnownInScript(ScriptType script, String value) {
    263         String trimmedValue = Util.trimToNull(value);
    264         Util.checkNotNull(trimmedValue);
    265         if (script == null) {
    266             return (mCandidateValues == null ||
    267                     mCandidateValues.containsKey(trimmedValue.toLowerCase()));
    268         }
    269         // Otherwise, if we know the script, we want to restrict the candidates to only names in
    270         // that script.
    271         String[] namesToConsider = (script == ScriptType.LATIN) ? mLatinNames : mLocalNames;
    272         Set<String> candidates = new HashSet<String>();
    273         if (namesToConsider != null) {
    274             for (String name : namesToConsider) {
    275                 candidates.add(name.toLowerCase());
    276             }
    277         }
    278         if (mKeys != null) {
    279             for (String name : mKeys) {
    280                 candidates.add(name.toLowerCase());
    281             }
    282         }
    283 
    284         if (candidates.size() == 0 || trimmedValue == null) {
    285             return true;
    286         }
    287 
    288         return candidates.contains(value.toLowerCase());
    289     }
    290 
    291     /**
    292      * Parses the value of the "fmt" key in the data to see which fields are used for a particular
    293      * country. Returns a list of all fields found. Country is always assumed to be present. Skips
    294      * characters that indicate new-lines in the format information, as well as any characters not
    295      * escaped with "%".
    296      */
    297     private static Set<AddressField> parseAddressFields(String value) {
    298         EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY);
    299         boolean escaped = false;
    300         for (char c : value.toCharArray()) {
    301             if (escaped) {
    302                 escaped = false;
    303                 if (c == 'n') {
    304                     continue;
    305                 }
    306                 AddressField f = AddressField.of(c);
    307                 if (f == null) {
    308                     throw new RuntimeException(
    309                             "Unrecognized character '" + c + "' in format pattern: " + value);
    310                 }
    311                 result.add(f);
    312             } else if (c == '%') {
    313                 escaped = true;
    314             }
    315         }
    316         // These fields are not mentioned in the metadata at the moment since there is an effort to
    317         // move away from STREET_ADDRESS and use these fields instead. This means they have to be
    318         // removed here.
    319         result.remove(AddressField.ADDRESS_LINE_1);
    320         result.remove(AddressField.ADDRESS_LINE_2);
    321 
    322         return result;
    323     }
    324 
    325     /**
    326      * Parses the value of the "required" key in the data. Adds country as well as any other field
    327      * mentioned in the string.
    328      */
    329     private static Set<AddressField> parseRequireString(String value) {
    330         // Country is always required
    331         EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY);
    332 
    333         for (char c : value.toCharArray()) {
    334             AddressField f = AddressField.of(c);
    335             if (f == null) {
    336                 throw new RuntimeException("Unrecognized character '" + c + "' in require pattern: "
    337                         + value);
    338             }
    339             result.add(f);
    340         }
    341         // These fields are not mentioned in the metadata at the moment since there is an effort to
    342         // move away from STREET_ADDRESS and use these fields instead. This means they have to be
    343         // removed here.
    344         result.remove(AddressField.ADDRESS_LINE_1);
    345         result.remove(AddressField.ADDRESS_LINE_2);
    346 
    347         return result;
    348     }
    349 
    350     /**
    351      * Returns true if this key represents a country. We assume all keys with only one delimiter are
    352      * at the country level (such as "data/US").
    353      */
    354     private boolean isCountryKey() {
    355         Util.checkNotNull(mId, "Cannot use null as key");
    356         return mId.split(KEY_DELIMITER).length == 2;
    357     }
    358 }
    359