Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.content.Context;
     20 import android.content.SharedPreferences;
     21 import android.content.pm.PackageManager.NameNotFoundException;
     22 import android.content.res.AssetFileDescriptor;
     23 import android.util.Log;
     24 
     25 import java.io.File;
     26 import java.util.ArrayList;
     27 import java.util.HashMap;
     28 import java.util.Locale;
     29 
     30 /**
     31  * Helper class to get the address of a mmap'able dictionary file.
     32  */
     33 class BinaryDictionaryGetter {
     34 
     35     /**
     36      * Used for Log actions from this class
     37      */
     38     private static final String TAG = BinaryDictionaryGetter.class.getSimpleName();
     39 
     40     /**
     41      * Used to return empty lists
     42      */
     43     private static final File[] EMPTY_FILE_ARRAY = new File[0];
     44 
     45     /**
     46      * Name of the common preferences name to know which word list are on and which are off.
     47      */
     48     private static final String COMMON_PREFERENCES_NAME = "LatinImeDictPrefs";
     49 
     50     // Name of the category for the main dictionary
     51     private static final String MAIN_DICTIONARY_CATEGORY = "main";
     52     public static final String ID_CATEGORY_SEPARATOR = ":";
     53 
     54     // Prevents this from being instantiated
     55     private BinaryDictionaryGetter() {}
     56 
     57     /**
     58      * Returns whether we may want to use this character as part of a file name.
     59      *
     60      * This basically only accepts ascii letters and numbers, and rejects everything else.
     61      */
     62     private static boolean isFileNameCharacter(int codePoint) {
     63         if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit
     64         if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase
     65         if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase
     66         return codePoint == '_'; // Underscore
     67     }
     68 
     69     /**
     70      * Escapes a string for any characters that may be suspicious for a file or directory name.
     71      *
     72      * Concretely this does a sort of URL-encoding except it will encode everything that's not
     73      * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
     74      * we cannot allow here)
     75      */
     76     // TODO: create a unit test for this method
     77     private static String replaceFileNameDangerousCharacters(final String name) {
     78         // This assumes '%' is fully available as a non-separator, normal
     79         // character in a file name. This is probably true for all file systems.
     80         final StringBuilder sb = new StringBuilder();
     81         final int nameLength = name.length();
     82         for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) {
     83             final int codePoint = name.codePointAt(i);
     84             if (isFileNameCharacter(codePoint)) {
     85                 sb.appendCodePoint(codePoint);
     86             } else {
     87                 // 6 digits - unicode is limited to 21 bits
     88                 sb.append(String.format((Locale)null, "%%%1$06x", codePoint));
     89             }
     90         }
     91         return sb.toString();
     92     }
     93 
     94     /**
     95      * Reverse escaping done by replaceFileNameDangerousCharacters.
     96      */
     97     private static String getWordListIdFromFileName(final String fname) {
     98         final StringBuilder sb = new StringBuilder();
     99         final int fnameLength = fname.length();
    100         for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) {
    101             final int codePoint = fname.codePointAt(i);
    102             if ('%' != codePoint) {
    103                 sb.appendCodePoint(codePoint);
    104             } else {
    105                 final int encodedCodePoint = Integer.parseInt(fname.substring(i + 1, i + 7), 16);
    106                 i += 6;
    107                 sb.appendCodePoint(encodedCodePoint);
    108             }
    109         }
    110         return sb.toString();
    111     }
    112 
    113     /**
    114      * Helper method to get the top level cache directory.
    115      */
    116     private static String getWordListCacheDirectory(final Context context) {
    117         return context.getFilesDir() + File.separator + "dicts";
    118     }
    119 
    120     /**
    121      * Find out the cache directory associated with a specific locale.
    122      */
    123     private static String getCacheDirectoryForLocale(final String locale, final Context context) {
    124         final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
    125         final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
    126                 + relativeDirectoryName;
    127         final File directory = new File(absoluteDirectoryName);
    128         if (!directory.exists()) {
    129             if (!directory.mkdirs()) {
    130                 Log.e(TAG, "Could not create the directory for locale" + locale);
    131             }
    132         }
    133         return absoluteDirectoryName;
    134     }
    135 
    136     /**
    137      * Generates a file name for the id and locale passed as an argument.
    138      *
    139      * In the current implementation the file name returned will always be unique for
    140      * any id/locale pair, but please do not expect that the id can be the same for
    141      * different dictionaries with different locales. An id should be unique for any
    142      * dictionary.
    143      * The file name is pretty much an URL-encoded version of the id inside a directory
    144      * named like the locale, except it will also escape characters that look dangerous
    145      * to some file systems.
    146      * @param id the id of the dictionary for which to get a file name
    147      * @param locale the locale for which to get the file name as a string
    148      * @param context the context to use for getting the directory
    149      * @return the name of the file to be created
    150      */
    151     public static String getCacheFileName(String id, String locale, Context context) {
    152         final String fileName = replaceFileNameDangerousCharacters(id);
    153         return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
    154     }
    155 
    156     /**
    157      * Returns a file address from a resource, or null if it cannot be opened.
    158      */
    159     private static AssetFileAddress loadFallbackResource(final Context context,
    160             final int fallbackResId) {
    161         final AssetFileDescriptor afd = context.getResources().openRawResourceFd(fallbackResId);
    162         if (afd == null) {
    163             Log.e(TAG, "Found the resource but cannot read it. Is it compressed? resId="
    164                     + fallbackResId);
    165             return null;
    166         }
    167         return AssetFileAddress.makeFromFileNameAndOffset(
    168                 context.getApplicationInfo().sourceDir, afd.getStartOffset(), afd.getLength());
    169     }
    170 
    171     static private class DictPackSettings {
    172         final SharedPreferences mDictPreferences;
    173         public DictPackSettings(final Context context) {
    174             Context dictPackContext = null;
    175             try {
    176                 final String dictPackName =
    177                         context.getString(R.string.dictionary_pack_package_name);
    178                 dictPackContext = context.createPackageContext(dictPackName, 0);
    179             } catch (NameNotFoundException e) {
    180                 // The dictionary pack is not installed...
    181                 // TODO: fallback on the built-in dict, see the TODO above
    182                 Log.e(TAG, "Could not find a dictionary pack");
    183             }
    184             mDictPreferences = null == dictPackContext ? null
    185                     : dictPackContext.getSharedPreferences(COMMON_PREFERENCES_NAME,
    186                             Context.MODE_WORLD_READABLE | Context.MODE_MULTI_PROCESS);
    187         }
    188         public boolean isWordListActive(final String dictId) {
    189             if (null == mDictPreferences) {
    190                 // If we don't have preferences it basically means we can't find the dictionary
    191                 // pack - either it's not installed, or it's disabled, or there is some strange
    192                 // bug. Either way, a word list with no settings should be on by default: default
    193                 // dictionaries in LatinIME are on if there is no settings at all, and if for some
    194                 // reason some dictionaries have been installed BUT the dictionary pack can't be
    195                 // found anymore it's safer to actually supply installed dictionaries.
    196                 return true;
    197             } else {
    198                 // The default is true here for the same reasons as above. We got the dictionary
    199                 // pack but if we don't have any settings for it it means the user has never been
    200                 // to the settings yet. So by default, the main dictionaries should be on.
    201                 return mDictPreferences.getBoolean(dictId, true);
    202             }
    203         }
    204     }
    205 
    206     /**
    207      * Helper method to the list of cache directories, one for each distinct locale.
    208      */
    209     private static File[] getCachedDirectoryList(final Context context) {
    210         return new File(getWordListCacheDirectory(context)).listFiles();
    211     }
    212 
    213     /**
    214      * Returns the category for a given file name.
    215      *
    216      * This parses the file name, extracts the category, and returns it. See
    217      * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}.
    218      * @return The category as a string or null if it can't be found in the file name.
    219      */
    220     private static String getCategoryFromFileName(final String fileName) {
    221         final String id = getWordListIdFromFileName(fileName);
    222         final String[] idArray = id.split(ID_CATEGORY_SEPARATOR);
    223         if (2 != idArray.length) return null;
    224         return idArray[0];
    225     }
    226 
    227     /**
    228      * Utility class for the {@link #getCachedWordLists} method
    229      */
    230     private static class FileAndMatchLevel {
    231         final File mFile;
    232         final int mMatchLevel;
    233         public FileAndMatchLevel(final File file, final int matchLevel) {
    234             mFile = file;
    235             mMatchLevel = matchLevel;
    236         }
    237     }
    238 
    239     /**
    240      * Returns the list of cached files for a specific locale, one for each category.
    241      *
    242      * This will return exactly one file for each word list category that matches
    243      * the passed locale. If several files match the locale for any given category,
    244      * this returns the file with the closest match to the locale. For example, if
    245      * the passed word list is en_US, and for a category we have an en and an en_US
    246      * word list available, we'll return only the en_US one.
    247      * Thus, the list will contain as many files as there are categories.
    248      *
    249      * @param locale the locale to find the dictionary files for, as a string.
    250      * @param context the context on which to open the files upon.
    251      * @return an array of binary dictionary files, which may be empty but may not be null.
    252      */
    253     private static File[] getCachedWordLists(final String locale,
    254             final Context context) {
    255         final File[] directoryList = getCachedDirectoryList(context);
    256         if (null == directoryList) return EMPTY_FILE_ARRAY;
    257         final HashMap<String, FileAndMatchLevel> cacheFiles =
    258                 new HashMap<String, FileAndMatchLevel>();
    259         for (File directory : directoryList) {
    260             if (!directory.isDirectory()) continue;
    261             final String dirLocale = getWordListIdFromFileName(directory.getName());
    262             final int matchLevel = LocaleUtils.getMatchLevel(dirLocale, locale);
    263             if (LocaleUtils.isMatch(matchLevel)) {
    264                 final File[] wordLists = directory.listFiles();
    265                 if (null != wordLists) {
    266                     for (File wordList : wordLists) {
    267                         final String category = getCategoryFromFileName(wordList.getName());
    268                         final FileAndMatchLevel currentBestMatch = cacheFiles.get(category);
    269                         if (null == currentBestMatch || currentBestMatch.mMatchLevel < matchLevel) {
    270                             cacheFiles.put(category, new FileAndMatchLevel(wordList, matchLevel));
    271                         }
    272                     }
    273                 }
    274             }
    275         }
    276         if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY;
    277         final File[] result = new File[cacheFiles.size()];
    278         int index = 0;
    279         for (final FileAndMatchLevel entry : cacheFiles.values()) {
    280             result[index++] = entry.mFile;
    281         }
    282         return result;
    283     }
    284 
    285     /**
    286      * Remove all files with the passed id, except the passed file.
    287      *
    288      * If a dictionary with a given ID has a metadata change that causes it to change
    289      * path, we need to remove the old version. The only way to do this is to check all
    290      * installed files for a matching ID in a different directory.
    291      */
    292     public static void removeFilesWithIdExcept(final Context context, final String id,
    293             final File fileToKeep) {
    294         try {
    295             final File canonicalFileToKeep = fileToKeep.getCanonicalFile();
    296             final File[] directoryList = getCachedDirectoryList(context);
    297             if (null == directoryList) return;
    298             for (File directory : directoryList) {
    299                 // There is one directory per locale. See #getCachedDirectoryList
    300                 if (!directory.isDirectory()) continue;
    301                 final File[] wordLists = directory.listFiles();
    302                 if (null == wordLists) continue;
    303                 for (File wordList : wordLists) {
    304                     final String fileId = getWordListIdFromFileName(wordList.getName());
    305                     if (fileId.equals(id)) {
    306                         if (!canonicalFileToKeep.equals(wordList.getCanonicalFile())) {
    307                             wordList.delete();
    308                         }
    309                     }
    310                 }
    311             }
    312         } catch (java.io.IOException e) {
    313             Log.e(TAG, "IOException trying to cleanup files : " + e);
    314         }
    315     }
    316 
    317 
    318     /**
    319      * Returns the id associated with the main word list for a specified locale.
    320      *
    321      * Word lists stored in Android Keyboard's resources are referred to as the "main"
    322      * word lists. Since they can be updated like any other list, we need to assign a
    323      * unique ID to them. This ID is just the name of the language (locale-wise) they
    324      * are for, and this method returns this ID.
    325      */
    326     private static String getMainDictId(final Locale locale) {
    327         // This works because we don't include by default different dictionaries for
    328         // different countries. This actually needs to return the id that we would
    329         // like to use for word lists included in resources, and the following is okay.
    330         return MAIN_DICTIONARY_CATEGORY + ID_CATEGORY_SEPARATOR + locale.getLanguage().toString();
    331     }
    332 
    333     private static boolean isMainWordListId(final String id) {
    334         final String[] idArray = id.split(ID_CATEGORY_SEPARATOR);
    335         if (2 != idArray.length) return false;
    336         return MAIN_DICTIONARY_CATEGORY.equals(idArray[0]);
    337     }
    338 
    339     /**
    340      * Returns a list of file addresses for a given locale, trying relevant methods in order.
    341      *
    342      * Tries to get binary dictionaries from various sources, in order:
    343      * - Uses a content provider to get a public dictionary set, as per the protocol described
    344      *   in BinaryDictionaryFileDumper.
    345      * If that fails:
    346      * - Gets a file name from the built-in dictionary for this locale, if any.
    347      * If that fails:
    348      * - Returns null.
    349      * @return The list of addresses of valid dictionary files, or null.
    350      */
    351     public static ArrayList<AssetFileAddress> getDictionaryFiles(final Locale locale,
    352             final Context context) {
    353 
    354         final boolean hasDefaultWordList = DictionaryFactory.isDictionaryAvailable(context, locale);
    355         // cacheWordListsFromContentProvider returns the list of files it copied to local
    356         // storage, but we don't really care about what was copied NOW: what we want is the
    357         // list of everything we ever cached, so we ignore the return value.
    358         BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context,
    359                 hasDefaultWordList);
    360         final File[] cachedWordLists = getCachedWordLists(locale.toString(), context);
    361         final String mainDictId = getMainDictId(locale);
    362         final DictPackSettings dictPackSettings = new DictPackSettings(context);
    363 
    364         boolean foundMainDict = false;
    365         final ArrayList<AssetFileAddress> fileList = new ArrayList<AssetFileAddress>();
    366         // cachedWordLists may not be null, see doc for getCachedDictionaryList
    367         for (final File f : cachedWordLists) {
    368             final String wordListId = getWordListIdFromFileName(f.getName());
    369             if (isMainWordListId(wordListId)) {
    370                 foundMainDict = true;
    371             }
    372             if (!dictPackSettings.isWordListActive(wordListId)) continue;
    373             if (f.canRead()) {
    374                 fileList.add(AssetFileAddress.makeFromFileName(f.getPath()));
    375             } else {
    376                 Log.e(TAG, "Found a cached dictionary file but cannot read it");
    377             }
    378         }
    379 
    380         if (!foundMainDict && dictPackSettings.isWordListActive(mainDictId)) {
    381             final int fallbackResId =
    382                     DictionaryFactory.getMainDictionaryResourceId(context.getResources(), locale);
    383             final AssetFileAddress fallbackAsset = loadFallbackResource(context, fallbackResId);
    384             if (null != fallbackAsset) {
    385                 fileList.add(fallbackAsset);
    386             }
    387         }
    388 
    389         return fileList;
    390     }
    391 }
    392