1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.utils; 18 19 import android.content.ContentValues; 20 import android.content.Context; 21 import android.content.res.AssetManager; 22 import android.content.res.Resources; 23 import android.util.Log; 24 25 import com.android.inputmethod.latin.AssetFileAddress; 26 import com.android.inputmethod.latin.BinaryDictionaryGetter; 27 import com.android.inputmethod.latin.R; 28 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; 29 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 30 31 import java.io.File; 32 import java.util.ArrayList; 33 import java.util.Iterator; 34 import java.util.Locale; 35 import java.util.concurrent.TimeUnit; 36 37 /** 38 * This class encapsulates the logic for the Latin-IME side of dictionary information management. 39 */ 40 public class DictionaryInfoUtils { 41 private static final String TAG = DictionaryInfoUtils.class.getSimpleName(); 42 private static final String RESOURCE_PACKAGE_NAME = R.class.getPackage().getName(); 43 private static final String DEFAULT_MAIN_DICT = "main"; 44 private static final String MAIN_DICT_PREFIX = "main_"; 45 // 6 digits - unicode is limited to 21 bits 46 private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6; 47 48 public static class DictionaryInfo { 49 private static final String LOCALE_COLUMN = "locale"; 50 private static final String WORDLISTID_COLUMN = "id"; 51 private static final String LOCAL_FILENAME_COLUMN = "filename"; 52 private static final String DESCRIPTION_COLUMN = "description"; 53 private static final String DATE_COLUMN = "date"; 54 private static final String FILESIZE_COLUMN = "filesize"; 55 private static final String VERSION_COLUMN = "version"; 56 public final String mId; 57 public final Locale mLocale; 58 public final String mDescription; 59 public final AssetFileAddress mFileAddress; 60 public final int mVersion; 61 public DictionaryInfo(final String id, final Locale locale, final String description, 62 final AssetFileAddress fileAddress, final int version) { 63 mId = id; 64 mLocale = locale; 65 mDescription = description; 66 mFileAddress = fileAddress; 67 mVersion = version; 68 } 69 public ContentValues toContentValues() { 70 final ContentValues values = new ContentValues(); 71 values.put(WORDLISTID_COLUMN, mId); 72 values.put(LOCALE_COLUMN, mLocale.toString()); 73 values.put(DESCRIPTION_COLUMN, mDescription); 74 values.put(LOCAL_FILENAME_COLUMN, mFileAddress.mFilename); 75 values.put(DATE_COLUMN, TimeUnit.MILLISECONDS.toSeconds( 76 new File(mFileAddress.mFilename).lastModified())); 77 values.put(FILESIZE_COLUMN, mFileAddress.mLength); 78 values.put(VERSION_COLUMN, mVersion); 79 return values; 80 } 81 } 82 83 private DictionaryInfoUtils() { 84 // Private constructor to forbid instantation of this helper class. 85 } 86 87 /** 88 * Returns whether we may want to use this character as part of a file name. 89 * 90 * This basically only accepts ascii letters and numbers, and rejects everything else. 91 */ 92 private static boolean isFileNameCharacter(int codePoint) { 93 if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit 94 if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase 95 if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase 96 return codePoint == '_'; // Underscore 97 } 98 99 /** 100 * Escapes a string for any characters that may be suspicious for a file or directory name. 101 * 102 * Concretely this does a sort of URL-encoding except it will encode everything that's not 103 * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which 104 * we cannot allow here) 105 */ 106 // TODO: create a unit test for this method 107 public static String replaceFileNameDangerousCharacters(final String name) { 108 // This assumes '%' is fully available as a non-separator, normal 109 // character in a file name. This is probably true for all file systems. 110 final StringBuilder sb = new StringBuilder(); 111 final int nameLength = name.length(); 112 for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) { 113 final int codePoint = name.codePointAt(i); 114 if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) { 115 sb.appendCodePoint(codePoint); 116 } else { 117 sb.append(String.format((Locale)null, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", 118 codePoint)); 119 } 120 } 121 return sb.toString(); 122 } 123 124 /** 125 * Helper method to get the top level cache directory. 126 */ 127 private static String getWordListCacheDirectory(final Context context) { 128 return context.getFilesDir() + File.separator + "dicts"; 129 } 130 131 /** 132 * Helper method to get the top level temp directory. 133 */ 134 public static String getWordListTempDirectory(final Context context) { 135 return context.getFilesDir() + File.separator + "tmp"; 136 } 137 138 /** 139 * Reverse escaping done by replaceFileNameDangerousCharacters. 140 */ 141 public static String getWordListIdFromFileName(final String fname) { 142 final StringBuilder sb = new StringBuilder(); 143 final int fnameLength = fname.length(); 144 for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) { 145 final int codePoint = fname.codePointAt(i); 146 if ('%' != codePoint) { 147 sb.appendCodePoint(codePoint); 148 } else { 149 // + 1 to pass the % sign 150 final int encodedCodePoint = Integer.parseInt( 151 fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16); 152 i += MAX_HEX_DIGITS_FOR_CODEPOINT; 153 sb.appendCodePoint(encodedCodePoint); 154 } 155 } 156 return sb.toString(); 157 } 158 159 /** 160 * Helper method to the list of cache directories, one for each distinct locale. 161 */ 162 public static File[] getCachedDirectoryList(final Context context) { 163 return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles(); 164 } 165 166 /** 167 * Returns the category for a given file name. 168 * 169 * This parses the file name, extracts the category, and returns it. See 170 * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}. 171 * @return The category as a string or null if it can't be found in the file name. 172 */ 173 public static String getCategoryFromFileName(final String fileName) { 174 final String id = getWordListIdFromFileName(fileName); 175 final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR); 176 // An id is supposed to be in format category:locale, so splitting on the separator 177 // should yield a 2-elements array 178 if (2 != idArray.length) return null; 179 return idArray[0]; 180 } 181 182 /** 183 * Find out the cache directory associated with a specific locale. 184 */ 185 private static String getCacheDirectoryForLocale(final String locale, final Context context) { 186 final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale); 187 final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator 188 + relativeDirectoryName; 189 final File directory = new File(absoluteDirectoryName); 190 if (!directory.exists()) { 191 if (!directory.mkdirs()) { 192 Log.e(TAG, "Could not create the directory for locale" + locale); 193 } 194 } 195 return absoluteDirectoryName; 196 } 197 198 /** 199 * Generates a file name for the id and locale passed as an argument. 200 * 201 * In the current implementation the file name returned will always be unique for 202 * any id/locale pair, but please do not expect that the id can be the same for 203 * different dictionaries with different locales. An id should be unique for any 204 * dictionary. 205 * The file name is pretty much an URL-encoded version of the id inside a directory 206 * named like the locale, except it will also escape characters that look dangerous 207 * to some file systems. 208 * @param id the id of the dictionary for which to get a file name 209 * @param locale the locale for which to get the file name as a string 210 * @param context the context to use for getting the directory 211 * @return the name of the file to be created 212 */ 213 public static String getCacheFileName(String id, String locale, Context context) { 214 final String fileName = replaceFileNameDangerousCharacters(id); 215 return getCacheDirectoryForLocale(locale, context) + File.separator + fileName; 216 } 217 218 public static boolean isMainWordListId(final String id) { 219 final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR); 220 // An id is supposed to be in format category:locale, so splitting on the separator 221 // should yield a 2-elements array 222 if (2 != idArray.length) return false; 223 return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY.equals(idArray[0]); 224 } 225 226 /** 227 * Helper method to return a dictionary res id for a locale, or 0 if none. 228 * @param locale dictionary locale 229 * @return main dictionary resource id 230 */ 231 public static int getMainDictionaryResourceIdIfAvailableForLocale(final Resources res, 232 final Locale locale) { 233 int resId; 234 // Try to find main_language_country dictionary. 235 if (!locale.getCountry().isEmpty()) { 236 final String dictLanguageCountry = 237 MAIN_DICT_PREFIX + locale.toString().toLowerCase(Locale.ROOT); 238 if ((resId = res.getIdentifier( 239 dictLanguageCountry, "raw", RESOURCE_PACKAGE_NAME)) != 0) { 240 return resId; 241 } 242 } 243 244 // Try to find main_language dictionary. 245 final String dictLanguage = MAIN_DICT_PREFIX + locale.getLanguage(); 246 if ((resId = res.getIdentifier(dictLanguage, "raw", RESOURCE_PACKAGE_NAME)) != 0) { 247 return resId; 248 } 249 250 // Not found, return 0 251 return 0; 252 } 253 254 /** 255 * Returns a main dictionary resource id 256 * @param locale dictionary locale 257 * @return main dictionary resource id 258 */ 259 public static int getMainDictionaryResourceId(final Resources res, final Locale locale) { 260 int resourceId = getMainDictionaryResourceIdIfAvailableForLocale(res, locale); 261 if (0 != resourceId) return resourceId; 262 return res.getIdentifier(DEFAULT_MAIN_DICT, "raw", RESOURCE_PACKAGE_NAME); 263 } 264 265 /** 266 * Returns the id associated with the main word list for a specified locale. 267 * 268 * Word lists stored in Android Keyboard's resources are referred to as the "main" 269 * word lists. Since they can be updated like any other list, we need to assign a 270 * unique ID to them. This ID is just the name of the language (locale-wise) they 271 * are for, and this method returns this ID. 272 */ 273 public static String getMainDictId(final Locale locale) { 274 // This works because we don't include by default different dictionaries for 275 // different countries. This actually needs to return the id that we would 276 // like to use for word lists included in resources, and the following is okay. 277 return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY + 278 BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR + locale.getLanguage().toString(); 279 } 280 281 public static FileHeader getDictionaryFileHeaderOrNull(final File file) { 282 return BinaryDictIOUtils.getDictionaryFileHeaderOrNull(file, 0, file.length()); 283 } 284 285 private static DictionaryInfo createDictionaryInfoFromFileAddress( 286 final AssetFileAddress fileAddress) { 287 final FileHeader header = BinaryDictIOUtils.getDictionaryFileHeaderOrNull( 288 new File(fileAddress.mFilename), fileAddress.mOffset, fileAddress.mLength); 289 final String id = header.getId(); 290 final Locale locale = LocaleUtils.constructLocaleFromString(header.getLocaleString()); 291 final String description = header.getDescription(); 292 final String version = header.getVersion(); 293 return new DictionaryInfo(id, locale, description, fileAddress, Integer.parseInt(version)); 294 } 295 296 private static void addOrUpdateDictInfo(final ArrayList<DictionaryInfo> dictList, 297 final DictionaryInfo newElement) { 298 final Iterator<DictionaryInfo> iter = dictList.iterator(); 299 while (iter.hasNext()) { 300 final DictionaryInfo thisDictInfo = iter.next(); 301 if (thisDictInfo.mLocale.equals(newElement.mLocale)) { 302 if (newElement.mVersion <= thisDictInfo.mVersion) { 303 return; 304 } 305 iter.remove(); 306 } 307 } 308 dictList.add(newElement); 309 } 310 311 public static ArrayList<DictionaryInfo> getCurrentDictionaryFileNameAndVersionInfo( 312 final Context context) { 313 final ArrayList<DictionaryInfo> dictList = CollectionUtils.newArrayList(); 314 315 // Retrieve downloaded dictionaries 316 final File[] directoryList = getCachedDirectoryList(context); 317 if (null != directoryList) { 318 for (final File directory : directoryList) { 319 final String localeString = getWordListIdFromFileName(directory.getName()); 320 File[] dicts = BinaryDictionaryGetter.getCachedWordLists(localeString, context); 321 for (final File dict : dicts) { 322 final String wordListId = getWordListIdFromFileName(dict.getName()); 323 if (!DictionaryInfoUtils.isMainWordListId(wordListId)) continue; 324 final Locale locale = LocaleUtils.constructLocaleFromString(localeString); 325 final AssetFileAddress fileAddress = AssetFileAddress.makeFromFile(dict); 326 final DictionaryInfo dictionaryInfo = 327 createDictionaryInfoFromFileAddress(fileAddress); 328 // Protect against cases of a less-specific dictionary being found, like an 329 // en dictionary being used for an en_US locale. In this case, the en dictionary 330 // should be used for en_US but discounted for listing purposes. 331 if (!dictionaryInfo.mLocale.equals(locale)) continue; 332 addOrUpdateDictInfo(dictList, dictionaryInfo); 333 } 334 } 335 } 336 337 // Retrieve files from assets 338 final Resources resources = context.getResources(); 339 final AssetManager assets = resources.getAssets(); 340 for (final String localeString : assets.getLocales()) { 341 final Locale locale = LocaleUtils.constructLocaleFromString(localeString); 342 final int resourceId = 343 DictionaryInfoUtils.getMainDictionaryResourceIdIfAvailableForLocale( 344 context.getResources(), locale); 345 if (0 == resourceId) continue; 346 final AssetFileAddress fileAddress = 347 BinaryDictionaryGetter.loadFallbackResource(context, resourceId); 348 final DictionaryInfo dictionaryInfo = createDictionaryInfoFromFileAddress(fileAddress); 349 // Protect against cases of a less-specific dictionary being found, like an 350 // en dictionary being used for an en_US locale. In this case, the en dictionary 351 // should be used for en_US but discounted for listing purposes. 352 if (!dictionaryInfo.mLocale.equals(locale)) continue; 353 addOrUpdateDictInfo(dictList, dictionaryInfo); 354 } 355 356 return dictList; 357 } 358 } 359