1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.utils; 18 19 import android.content.ContentValues; 20 import android.content.Context; 21 import android.content.res.AssetManager; 22 import android.content.res.Resources; 23 import android.text.TextUtils; 24 import android.util.Log; 25 26 import com.android.inputmethod.latin.AssetFileAddress; 27 import com.android.inputmethod.latin.BinaryDictionaryGetter; 28 import com.android.inputmethod.latin.Constants; 29 import com.android.inputmethod.latin.R; 30 import com.android.inputmethod.latin.makedict.DictionaryHeader; 31 import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 32 import com.android.inputmethod.latin.settings.SpacingAndPunctuations; 33 34 import java.io.File; 35 import java.io.IOException; 36 import java.util.ArrayList; 37 import java.util.Iterator; 38 import java.util.Locale; 39 import java.util.concurrent.TimeUnit; 40 41 /** 42 * This class encapsulates the logic for the Latin-IME side of dictionary information management. 43 */ 44 public class DictionaryInfoUtils { 45 private static final String TAG = DictionaryInfoUtils.class.getSimpleName(); 46 private static final String RESOURCE_PACKAGE_NAME = R.class.getPackage().getName(); 47 private static final String DEFAULT_MAIN_DICT = "main"; 48 private static final String MAIN_DICT_PREFIX = "main_"; 49 // 6 digits - unicode is limited to 21 bits 50 private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6; 51 52 public static class DictionaryInfo { 53 private static final String LOCALE_COLUMN = "locale"; 54 private static final String WORDLISTID_COLUMN = "id"; 55 private static final String LOCAL_FILENAME_COLUMN = "filename"; 56 private static final String DESCRIPTION_COLUMN = "description"; 57 private static final String DATE_COLUMN = "date"; 58 private static final String FILESIZE_COLUMN = "filesize"; 59 private static final String VERSION_COLUMN = "version"; 60 public final String mId; 61 public final Locale mLocale; 62 public final String mDescription; 63 public final AssetFileAddress mFileAddress; 64 public final int mVersion; 65 public DictionaryInfo(final String id, final Locale locale, final String description, 66 final AssetFileAddress fileAddress, final int version) { 67 mId = id; 68 mLocale = locale; 69 mDescription = description; 70 mFileAddress = fileAddress; 71 mVersion = version; 72 } 73 public ContentValues toContentValues() { 74 final ContentValues values = new ContentValues(); 75 values.put(WORDLISTID_COLUMN, mId); 76 values.put(LOCALE_COLUMN, mLocale.toString()); 77 values.put(DESCRIPTION_COLUMN, mDescription); 78 values.put(LOCAL_FILENAME_COLUMN, mFileAddress.mFilename); 79 values.put(DATE_COLUMN, TimeUnit.MILLISECONDS.toSeconds( 80 new File(mFileAddress.mFilename).lastModified())); 81 values.put(FILESIZE_COLUMN, mFileAddress.mLength); 82 values.put(VERSION_COLUMN, mVersion); 83 return values; 84 } 85 } 86 87 private DictionaryInfoUtils() { 88 // Private constructor to forbid instantation of this helper class. 89 } 90 91 /** 92 * Returns whether we may want to use this character as part of a file name. 93 * 94 * This basically only accepts ascii letters and numbers, and rejects everything else. 95 */ 96 private static boolean isFileNameCharacter(int codePoint) { 97 if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit 98 if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase 99 if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase 100 return codePoint == '_'; // Underscore 101 } 102 103 /** 104 * Escapes a string for any characters that may be suspicious for a file or directory name. 105 * 106 * Concretely this does a sort of URL-encoding except it will encode everything that's not 107 * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which 108 * we cannot allow here) 109 */ 110 // TODO: create a unit test for this method 111 public static String replaceFileNameDangerousCharacters(final String name) { 112 // This assumes '%' is fully available as a non-separator, normal 113 // character in a file name. This is probably true for all file systems. 114 final StringBuilder sb = new StringBuilder(); 115 final int nameLength = name.length(); 116 for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) { 117 final int codePoint = name.codePointAt(i); 118 if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) { 119 sb.appendCodePoint(codePoint); 120 } else { 121 sb.append(String.format((Locale)null, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", 122 codePoint)); 123 } 124 } 125 return sb.toString(); 126 } 127 128 /** 129 * Helper method to get the top level cache directory. 130 */ 131 private static String getWordListCacheDirectory(final Context context) { 132 return context.getFilesDir() + File.separator + "dicts"; 133 } 134 135 /** 136 * Helper method to get the top level temp directory. 137 */ 138 public static String getWordListTempDirectory(final Context context) { 139 return context.getFilesDir() + File.separator + "tmp"; 140 } 141 142 /** 143 * Reverse escaping done by replaceFileNameDangerousCharacters. 144 */ 145 public static String getWordListIdFromFileName(final String fname) { 146 final StringBuilder sb = new StringBuilder(); 147 final int fnameLength = fname.length(); 148 for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) { 149 final int codePoint = fname.codePointAt(i); 150 if ('%' != codePoint) { 151 sb.appendCodePoint(codePoint); 152 } else { 153 // + 1 to pass the % sign 154 final int encodedCodePoint = Integer.parseInt( 155 fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16); 156 i += MAX_HEX_DIGITS_FOR_CODEPOINT; 157 sb.appendCodePoint(encodedCodePoint); 158 } 159 } 160 return sb.toString(); 161 } 162 163 /** 164 * Helper method to the list of cache directories, one for each distinct locale. 165 */ 166 public static File[] getCachedDirectoryList(final Context context) { 167 return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles(); 168 } 169 170 /** 171 * Returns the category for a given file name. 172 * 173 * This parses the file name, extracts the category, and returns it. See 174 * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}. 175 * @return The category as a string or null if it can't be found in the file name. 176 */ 177 public static String getCategoryFromFileName(final String fileName) { 178 final String id = getWordListIdFromFileName(fileName); 179 final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR); 180 // An id is supposed to be in format category:locale, so splitting on the separator 181 // should yield a 2-elements array 182 if (2 != idArray.length) return null; 183 return idArray[0]; 184 } 185 186 /** 187 * Find out the cache directory associated with a specific locale. 188 */ 189 private static String getCacheDirectoryForLocale(final String locale, final Context context) { 190 final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale); 191 final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator 192 + relativeDirectoryName; 193 final File directory = new File(absoluteDirectoryName); 194 if (!directory.exists()) { 195 if (!directory.mkdirs()) { 196 Log.e(TAG, "Could not create the directory for locale" + locale); 197 } 198 } 199 return absoluteDirectoryName; 200 } 201 202 /** 203 * Generates a file name for the id and locale passed as an argument. 204 * 205 * In the current implementation the file name returned will always be unique for 206 * any id/locale pair, but please do not expect that the id can be the same for 207 * different dictionaries with different locales. An id should be unique for any 208 * dictionary. 209 * The file name is pretty much an URL-encoded version of the id inside a directory 210 * named like the locale, except it will also escape characters that look dangerous 211 * to some file systems. 212 * @param id the id of the dictionary for which to get a file name 213 * @param locale the locale for which to get the file name as a string 214 * @param context the context to use for getting the directory 215 * @return the name of the file to be created 216 */ 217 public static String getCacheFileName(String id, String locale, Context context) { 218 final String fileName = replaceFileNameDangerousCharacters(id); 219 return getCacheDirectoryForLocale(locale, context) + File.separator + fileName; 220 } 221 222 public static boolean isMainWordListId(final String id) { 223 final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR); 224 // An id is supposed to be in format category:locale, so splitting on the separator 225 // should yield a 2-elements array 226 if (2 != idArray.length) return false; 227 return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY.equals(idArray[0]); 228 } 229 230 /** 231 * Helper method to return a dictionary res id for a locale, or 0 if none. 232 * @param locale dictionary locale 233 * @return main dictionary resource id 234 */ 235 public static int getMainDictionaryResourceIdIfAvailableForLocale(final Resources res, 236 final Locale locale) { 237 int resId; 238 // Try to find main_language_country dictionary. 239 if (!locale.getCountry().isEmpty()) { 240 final String dictLanguageCountry = 241 MAIN_DICT_PREFIX + locale.toString().toLowerCase(Locale.ROOT); 242 if ((resId = res.getIdentifier( 243 dictLanguageCountry, "raw", RESOURCE_PACKAGE_NAME)) != 0) { 244 return resId; 245 } 246 } 247 248 // Try to find main_language dictionary. 249 final String dictLanguage = MAIN_DICT_PREFIX + locale.getLanguage(); 250 if ((resId = res.getIdentifier(dictLanguage, "raw", RESOURCE_PACKAGE_NAME)) != 0) { 251 return resId; 252 } 253 254 // Not found, return 0 255 return 0; 256 } 257 258 /** 259 * Returns a main dictionary resource id 260 * @param locale dictionary locale 261 * @return main dictionary resource id 262 */ 263 public static int getMainDictionaryResourceId(final Resources res, final Locale locale) { 264 int resourceId = getMainDictionaryResourceIdIfAvailableForLocale(res, locale); 265 if (0 != resourceId) return resourceId; 266 return res.getIdentifier(DEFAULT_MAIN_DICT, "raw", RESOURCE_PACKAGE_NAME); 267 } 268 269 /** 270 * Returns the id associated with the main word list for a specified locale. 271 * 272 * Word lists stored in Android Keyboard's resources are referred to as the "main" 273 * word lists. Since they can be updated like any other list, we need to assign a 274 * unique ID to them. This ID is just the name of the language (locale-wise) they 275 * are for, and this method returns this ID. 276 */ 277 public static String getMainDictId(final Locale locale) { 278 // This works because we don't include by default different dictionaries for 279 // different countries. This actually needs to return the id that we would 280 // like to use for word lists included in resources, and the following is okay. 281 return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY + 282 BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR + locale.getLanguage().toString(); 283 } 284 285 public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file) { 286 return getDictionaryFileHeaderOrNull(file, 0, file.length()); 287 } 288 289 private static DictionaryHeader getDictionaryFileHeaderOrNull(final File file, 290 final long offset, final long length) { 291 try { 292 final DictionaryHeader header = 293 BinaryDictionaryUtils.getHeaderWithOffsetAndLength(file, offset, length); 294 return header; 295 } catch (UnsupportedFormatException e) { 296 return null; 297 } catch (IOException e) { 298 return null; 299 } 300 } 301 302 /** 303 * Returns information of the dictionary. 304 * 305 * @param fileAddress the asset dictionary file address. 306 * @return information of the specified dictionary. 307 */ 308 private static DictionaryInfo createDictionaryInfoFromFileAddress( 309 final AssetFileAddress fileAddress) { 310 final DictionaryHeader header = getDictionaryFileHeaderOrNull( 311 new File(fileAddress.mFilename), fileAddress.mOffset, fileAddress.mLength); 312 if (header == null) { 313 return null; 314 } 315 final String id = header.getId(); 316 final Locale locale = LocaleUtils.constructLocaleFromString(header.getLocaleString()); 317 final String description = header.getDescription(); 318 final String version = header.getVersion(); 319 return new DictionaryInfo(id, locale, description, fileAddress, Integer.parseInt(version)); 320 } 321 322 private static void addOrUpdateDictInfo(final ArrayList<DictionaryInfo> dictList, 323 final DictionaryInfo newElement) { 324 final Iterator<DictionaryInfo> iter = dictList.iterator(); 325 while (iter.hasNext()) { 326 final DictionaryInfo thisDictInfo = iter.next(); 327 if (thisDictInfo.mLocale.equals(newElement.mLocale)) { 328 if (newElement.mVersion <= thisDictInfo.mVersion) { 329 return; 330 } 331 iter.remove(); 332 } 333 } 334 dictList.add(newElement); 335 } 336 337 public static ArrayList<DictionaryInfo> getCurrentDictionaryFileNameAndVersionInfo( 338 final Context context) { 339 final ArrayList<DictionaryInfo> dictList = new ArrayList<>(); 340 341 // Retrieve downloaded dictionaries 342 final File[] directoryList = getCachedDirectoryList(context); 343 if (null != directoryList) { 344 for (final File directory : directoryList) { 345 final String localeString = getWordListIdFromFileName(directory.getName()); 346 File[] dicts = BinaryDictionaryGetter.getCachedWordLists(localeString, context); 347 for (final File dict : dicts) { 348 final String wordListId = getWordListIdFromFileName(dict.getName()); 349 if (!DictionaryInfoUtils.isMainWordListId(wordListId)) continue; 350 final Locale locale = LocaleUtils.constructLocaleFromString(localeString); 351 final AssetFileAddress fileAddress = AssetFileAddress.makeFromFile(dict); 352 final DictionaryInfo dictionaryInfo = 353 createDictionaryInfoFromFileAddress(fileAddress); 354 // Protect against cases of a less-specific dictionary being found, like an 355 // en dictionary being used for an en_US locale. In this case, the en dictionary 356 // should be used for en_US but discounted for listing purposes. 357 if (dictionaryInfo == null || !dictionaryInfo.mLocale.equals(locale)) continue; 358 addOrUpdateDictInfo(dictList, dictionaryInfo); 359 } 360 } 361 } 362 363 // Retrieve files from assets 364 final Resources resources = context.getResources(); 365 final AssetManager assets = resources.getAssets(); 366 for (final String localeString : assets.getLocales()) { 367 final Locale locale = LocaleUtils.constructLocaleFromString(localeString); 368 final int resourceId = 369 DictionaryInfoUtils.getMainDictionaryResourceIdIfAvailableForLocale( 370 context.getResources(), locale); 371 if (0 == resourceId) continue; 372 final AssetFileAddress fileAddress = 373 BinaryDictionaryGetter.loadFallbackResource(context, resourceId); 374 final DictionaryInfo dictionaryInfo = createDictionaryInfoFromFileAddress(fileAddress); 375 // Protect against cases of a less-specific dictionary being found, like an 376 // en dictionary being used for an en_US locale. In this case, the en dictionary 377 // should be used for en_US but discounted for listing purposes. 378 if (!dictionaryInfo.mLocale.equals(locale)) continue; 379 addOrUpdateDictInfo(dictList, dictionaryInfo); 380 } 381 382 return dictList; 383 } 384 385 public static boolean looksValidForDictionaryInsertion(final CharSequence text, 386 final SpacingAndPunctuations spacingAndPunctuations) { 387 if (TextUtils.isEmpty(text)) return false; 388 final int length = text.length(); 389 if (length > Constants.DICTIONARY_MAX_WORD_LENGTH) { 390 return false; 391 } 392 int i = 0; 393 int digitCount = 0; 394 while (i < length) { 395 final int codePoint = Character.codePointAt(text, i); 396 final int charCount = Character.charCount(codePoint); 397 i += charCount; 398 if (Character.isDigit(codePoint)) { 399 // Count digits: see below 400 digitCount += charCount; 401 continue; 402 } 403 if (!spacingAndPunctuations.isWordCodePoint(codePoint)) return false; 404 } 405 // We reject strings entirely comprised of digits to avoid using PIN codes or credit 406 // card numbers. It would come in handy for word prediction though; a good example is 407 // when writing one's address where the street number is usually quite discriminative, 408 // as well as the postal code. 409 return digitCount < length; 410 } 411 } 412