1 /* 2 * Copyright (C) 2010 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.i18n.addressinput; 18 19 import com.android.i18n.addressinput.LookupKey.ScriptType; 20 21 import java.util.EnumSet; 22 import java.util.HashSet; 23 import java.util.Map; 24 import java.util.Set; 25 import java.util.regex.Pattern; 26 27 /** 28 * Accesses address verification data used to verify components of an address. 29 * <p> Not all fields require all types of validation, although this could be done. In particular, 30 * the current implementation only provides known value verification for the hierarchical fields, 31 * and only provides format and match verification for the postal code field. 32 */ 33 public class FieldVerifier { 34 // Node data values are delimited by this symbol. 35 private static final String DATA_DELIMITER = "~"; 36 // Keys are built up using this delimiter: eg data/US, data/US/CA. 37 private static final String KEY_DELIMITER = "/"; 38 39 private String mId; 40 private DataSource mDataSource; 41 42 private Set<AddressField> mPossibleFields; 43 private Set<AddressField> mRequired; 44 // Known values. Can be either a key, a name in Latin, or a name in native script. 45 private Map<String, String> mCandidateValues; 46 47 // Keys for the subnodes of this verifier. For example, a key for the US would be CA, since 48 // there is a sub-verifier with the ID "data/US/CA". Keys may be the local names of the 49 // locations in the next level of the hierarchy, or the abbreviations if suitable abbreviations 50 // exist. 51 private String[] mKeys; 52 // Names in Latin. These are only populated if the native/local names are in a script other than 53 // latin. 54 private String[] mLatinNames; 55 // Names in native script. 56 private String[] mLocalNames; 57 58 // Pattern representing the format of a postal code number. 59 private Pattern mFormat; 60 // Defines the valid range of a postal code number. 61 private Pattern mMatch; 62 63 /** 64 * Creates the root field verifier for a particular data source. 65 */ 66 public FieldVerifier(DataSource dataSource) { 67 mDataSource = dataSource; 68 populateRootVerifier(); 69 } 70 71 /** 72 * Creates a field verifier based on its parent and on the new data for this node supplied by 73 * nodeData (which may be null). 74 */ 75 private FieldVerifier(FieldVerifier parent, AddressVerificationNodeData nodeData) { 76 // Most information is inherited from the parent. 77 mPossibleFields = parent.mPossibleFields; 78 mRequired = parent.mRequired; 79 mDataSource = parent.mDataSource; 80 mFormat = parent.mFormat; 81 mMatch = parent.mMatch; 82 // Here we add in any overrides from this particular node as well as information such as 83 // localNames, latinNames and keys. 84 populate(nodeData); 85 // candidateValues should never be inherited from the parent, but built up from the 86 // localNames in this node. 87 mCandidateValues = Util.buildNameToKeyMap(mKeys, mLocalNames, mLatinNames); 88 } 89 90 /** 91 * Sets possibleFieldsUsed, required, keys and candidateValues for the root field verifier. This 92 * is a little messy at the moment since not all the appropriate information is actually under 93 * the root "data" node in the metadata. For example, "possibleFields" and "required" are not 94 * present there. 95 */ 96 private void populateRootVerifier() { 97 mId = "data"; 98 // Keys come from the countries under "data". 99 AddressVerificationNodeData rootNode = mDataSource.getDefaultData("data"); 100 if (rootNode.containsKey(AddressDataKey.COUNTRIES)) { 101 mKeys = rootNode.get(AddressDataKey.COUNTRIES).split(DATA_DELIMITER); 102 } 103 // candidateValues is just the set of keys. 104 mCandidateValues = Util.buildNameToKeyMap(mKeys, null, null); 105 106 // Copy "possibleFieldsUsed" and "required" from the defaults here for bootstrapping. 107 // TODO: Investigate a cleaner way of doing this - maybe we should populate "data" with this 108 // information instead. 109 AddressVerificationNodeData defaultZZ = mDataSource.getDefaultData("data/ZZ"); 110 mPossibleFields = new HashSet<AddressField>(); 111 if (defaultZZ.containsKey(AddressDataKey.FMT)) { 112 mPossibleFields = parseAddressFields(defaultZZ.get(AddressDataKey.FMT)); 113 } 114 mRequired = new HashSet<AddressField>(); 115 if (defaultZZ.containsKey(AddressDataKey.REQUIRE)) { 116 mRequired = parseRequireString(defaultZZ.get(AddressDataKey.REQUIRE)); 117 } 118 } 119 120 /** 121 * Populates this verifier with data from the node data passed in. This may be null. 122 */ 123 private void populate(AddressVerificationNodeData nodeData) { 124 if (nodeData == null) { 125 return; 126 } 127 if (nodeData.containsKey(AddressDataKey.ID)) { 128 mId = nodeData.get(AddressDataKey.ID); 129 } 130 if (nodeData.containsKey(AddressDataKey.SUB_KEYS)) { 131 mKeys = nodeData.get(AddressDataKey.SUB_KEYS).split(DATA_DELIMITER); 132 } 133 if (nodeData.containsKey(AddressDataKey.SUB_LNAMES)) { 134 mLatinNames = nodeData.get(AddressDataKey.SUB_LNAMES).split(DATA_DELIMITER); 135 } 136 if (nodeData.containsKey(AddressDataKey.SUB_NAMES)) { 137 mLocalNames = nodeData.get(AddressDataKey.SUB_NAMES).split(DATA_DELIMITER); 138 } 139 if (nodeData.containsKey(AddressDataKey.FMT)) { 140 mPossibleFields = parseAddressFields(nodeData.get(AddressDataKey.FMT)); 141 } 142 if (nodeData.containsKey(AddressDataKey.REQUIRE)) { 143 mRequired = parseRequireString(nodeData.get(AddressDataKey.REQUIRE)); 144 } 145 if (nodeData.containsKey(AddressDataKey.XZIP)) { 146 mFormat = Pattern.compile(nodeData.get(AddressDataKey.XZIP), Pattern.CASE_INSENSITIVE); 147 } 148 if (nodeData.containsKey(AddressDataKey.ZIP)) { 149 // This key has two different meanings, depending on whether this is a country-level key 150 // or not. 151 if (isCountryKey()) { 152 mFormat = Pattern.compile(nodeData.get(AddressDataKey.ZIP), 153 Pattern.CASE_INSENSITIVE); 154 } else { 155 mMatch = Pattern.compile(nodeData.get(AddressDataKey.ZIP), 156 Pattern.CASE_INSENSITIVE); 157 } 158 } 159 // If there are latin names but no local names, and there are the same number of latin names 160 // as there are keys, then we assume the local names are the same as the keys. 161 if (mKeys != null && mLocalNames == null && mLatinNames != null && 162 mKeys.length == mLatinNames.length) { 163 mLocalNames = mKeys; 164 } 165 } 166 167 FieldVerifier refineVerifier(String sublevel) { 168 if (Util.trimToNull(sublevel) == null) { 169 return new FieldVerifier(this, null); 170 } 171 // If the parent node didn't exist, then the subLevelName will start with "null". 172 String subLevelName = mId + KEY_DELIMITER + sublevel; 173 // For names with no Latin equivalent, we can look up the sublevel name directly. 174 AddressVerificationNodeData nodeData = mDataSource.get(subLevelName); 175 if (nodeData != null) { 176 return new FieldVerifier(this, nodeData); 177 } 178 // If that failed, then we try to look up the local name equivalent of this latin name. 179 // First check these exist. 180 if (mLatinNames == null) { 181 return new FieldVerifier(this, null); 182 } 183 for (int n = 0; n < mLatinNames.length; n++) { 184 if (mLatinNames[n].equalsIgnoreCase(sublevel)) { 185 // We found a match - we should try looking up a key with the local name at the same 186 // index. 187 subLevelName = mId + KEY_DELIMITER + mLocalNames[n]; 188 nodeData = mDataSource.get(subLevelName); 189 if (nodeData != null) { 190 return new FieldVerifier(this, nodeData); 191 } 192 } 193 } 194 // No sub-verifiers were found. 195 return new FieldVerifier(this, null); 196 } 197 198 /** 199 * Returns the ID of this verifier. 200 */ 201 @Override 202 public String toString() { 203 return mId; 204 } 205 206 /** 207 * Checks a value in a particular script for a particular field to see if it causes the problem 208 * specified. If so, this problem is added to the AddressProblems collection passed in. Returns 209 * true if no problem was found. 210 */ 211 protected boolean check(ScriptType script, AddressProblemType problem, AddressField field, 212 String value, AddressProblems problems) { 213 boolean problemFound = false; 214 215 String trimmedValue = Util.trimToNull(value); 216 switch (problem) { 217 case USING_UNUSED_FIELD: 218 if (trimmedValue != null && !mPossibleFields.contains(field)) { 219 problemFound = true; 220 } 221 break; 222 case MISSING_REQUIRED_FIELD: 223 if (mRequired.contains(field) && trimmedValue == null) { 224 problemFound = true; 225 } 226 break; 227 case UNKNOWN_VALUE: 228 // An empty string will never be an UNKNOWN_VALUE. It is invalid 229 // only when it appears in a required field (In that case it will 230 // be reported as MISSING_REQUIRED_FIELD). 231 if (trimmedValue == null) { 232 break; 233 } 234 problemFound = !isKnownInScript(script, trimmedValue); 235 break; 236 case UNRECOGNIZED_FORMAT: 237 if (trimmedValue != null && mFormat != null && 238 !mFormat.matcher(trimmedValue).matches()) { 239 problemFound = true; 240 } 241 break; 242 case MISMATCHING_VALUE: 243 if (trimmedValue != null && mMatch != null && 244 !mMatch.matcher(trimmedValue).lookingAt()) { 245 problemFound = true; 246 } 247 break; 248 default: 249 throw new RuntimeException("Unknown problem: " + problem); 250 } 251 if (problemFound) { 252 problems.add(field, problem); 253 } 254 return !problemFound; 255 } 256 257 /** 258 * Checks the value of a particular field in a particular script against the known values for 259 * this field. If script is null, it checks both the local and the latin values. Otherwise it 260 * checks only the values in the script specified. 261 */ 262 private boolean isKnownInScript(ScriptType script, String value) { 263 String trimmedValue = Util.trimToNull(value); 264 Util.checkNotNull(trimmedValue); 265 if (script == null) { 266 return (mCandidateValues == null || 267 mCandidateValues.containsKey(trimmedValue.toLowerCase())); 268 } 269 // Otherwise, if we know the script, we want to restrict the candidates to only names in 270 // that script. 271 String[] namesToConsider = (script == ScriptType.LATIN) ? mLatinNames : mLocalNames; 272 Set<String> candidates = new HashSet<String>(); 273 if (namesToConsider != null) { 274 for (String name : namesToConsider) { 275 candidates.add(name.toLowerCase()); 276 } 277 } 278 if (mKeys != null) { 279 for (String name : mKeys) { 280 candidates.add(name.toLowerCase()); 281 } 282 } 283 284 if (candidates.size() == 0 || trimmedValue == null) { 285 return true; 286 } 287 288 return candidates.contains(value.toLowerCase()); 289 } 290 291 /** 292 * Parses the value of the "fmt" key in the data to see which fields are used for a particular 293 * country. Returns a list of all fields found. Country is always assumed to be present. Skips 294 * characters that indicate new-lines in the format information, as well as any characters not 295 * escaped with "%". 296 */ 297 private static Set<AddressField> parseAddressFields(String value) { 298 EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY); 299 boolean escaped = false; 300 for (char c : value.toCharArray()) { 301 if (escaped) { 302 escaped = false; 303 if (c == 'n') { 304 continue; 305 } 306 AddressField f = AddressField.of(c); 307 if (f == null) { 308 throw new RuntimeException( 309 "Unrecognized character '" + c + "' in format pattern: " + value); 310 } 311 result.add(f); 312 } else if (c == '%') { 313 escaped = true; 314 } 315 } 316 // These fields are not mentioned in the metadata at the moment since there is an effort to 317 // move away from STREET_ADDRESS and use these fields instead. This means they have to be 318 // removed here. 319 result.remove(AddressField.ADDRESS_LINE_1); 320 result.remove(AddressField.ADDRESS_LINE_2); 321 322 return result; 323 } 324 325 /** 326 * Parses the value of the "required" key in the data. Adds country as well as any other field 327 * mentioned in the string. 328 */ 329 private static Set<AddressField> parseRequireString(String value) { 330 // Country is always required 331 EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY); 332 333 for (char c : value.toCharArray()) { 334 AddressField f = AddressField.of(c); 335 if (f == null) { 336 throw new RuntimeException("Unrecognized character '" + c + "' in require pattern: " 337 + value); 338 } 339 result.add(f); 340 } 341 // These fields are not mentioned in the metadata at the moment since there is an effort to 342 // move away from STREET_ADDRESS and use these fields instead. This means they have to be 343 // removed here. 344 result.remove(AddressField.ADDRESS_LINE_1); 345 result.remove(AddressField.ADDRESS_LINE_2); 346 347 return result; 348 } 349 350 /** 351 * Returns true if this key represents a country. We assume all keys with only one delimiter are 352 * at the country level (such as "data/US"). 353 */ 354 private boolean isCountryKey() { 355 Util.checkNotNull(mId, "Cannot use null as key"); 356 return mId.split(KEY_DELIMITER).length == 2; 357 } 358 } 359