Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.content.ContentValues;
     20 import android.content.Context;
     21 import android.content.res.AssetManager;
     22 import android.content.res.Resources;
     23 import android.text.format.DateUtils;
     24 import android.util.Log;
     25 
     26 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
     27 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
     28 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
     29 
     30 import java.io.File;
     31 import java.io.IOException;
     32 import java.util.ArrayList;
     33 import java.util.Locale;
     34 
     35 /**
     36  * This class encapsulates the logic for the Latin-IME side of dictionary information management.
     37  */
     38 public class DictionaryInfoUtils {
     39     private static final String TAG = DictionaryInfoUtils.class.getSimpleName();
     40     // This class must be located in the same package as LatinIME.java.
     41     private static final String RESOURCE_PACKAGE_NAME =
     42             DictionaryInfoUtils.class.getPackage().getName();
     43     private static final String DEFAULT_MAIN_DICT = "main";
     44     private static final String MAIN_DICT_PREFIX = "main_";
     45     // 6 digits - unicode is limited to 21 bits
     46     private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6;
     47 
     48     public static class DictionaryInfo {
     49         private static final String LOCALE_COLUMN = "locale";
     50         private static final String WORDLISTID_COLUMN = "id";
     51         private static final String LOCAL_FILENAME_COLUMN = "filename";
     52         private static final String DESCRIPTION_COLUMN = "description";
     53         private static final String DATE_COLUMN = "date";
     54         private static final String FILESIZE_COLUMN = "filesize";
     55         private static final String VERSION_COLUMN = "version";
     56         public final String mId;
     57         public final Locale mLocale;
     58         public final String mDescription;
     59         public final AssetFileAddress mFileAddress;
     60         public final int mVersion;
     61         public DictionaryInfo(final String id, final Locale locale, final String description,
     62                 final AssetFileAddress fileAddress, final int version) {
     63             mId = id;
     64             mLocale = locale;
     65             mDescription = description;
     66             mFileAddress = fileAddress;
     67             mVersion = version;
     68         }
     69         public ContentValues toContentValues() {
     70             final ContentValues values = new ContentValues();
     71             values.put(WORDLISTID_COLUMN, mId);
     72             values.put(LOCALE_COLUMN, mLocale.toString());
     73             values.put(DESCRIPTION_COLUMN, mDescription);
     74             values.put(LOCAL_FILENAME_COLUMN, mFileAddress.mFilename);
     75             values.put(DATE_COLUMN,
     76                     new File(mFileAddress.mFilename).lastModified() / DateUtils.SECOND_IN_MILLIS);
     77             values.put(FILESIZE_COLUMN, mFileAddress.mLength);
     78             values.put(VERSION_COLUMN, mVersion);
     79             return values;
     80         }
     81     }
     82 
     83     private DictionaryInfoUtils() {
     84         // Private constructor to forbid instantation of this helper class.
     85     }
     86 
     87     /**
     88      * Returns whether we may want to use this character as part of a file name.
     89      *
     90      * This basically only accepts ascii letters and numbers, and rejects everything else.
     91      */
     92     private static boolean isFileNameCharacter(int codePoint) {
     93         if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit
     94         if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase
     95         if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase
     96         return codePoint == '_'; // Underscore
     97     }
     98 
     99     /**
    100      * Escapes a string for any characters that may be suspicious for a file or directory name.
    101      *
    102      * Concretely this does a sort of URL-encoding except it will encode everything that's not
    103      * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
    104      * we cannot allow here)
    105      */
    106     // TODO: create a unit test for this method
    107     public static String replaceFileNameDangerousCharacters(final String name) {
    108         // This assumes '%' is fully available as a non-separator, normal
    109         // character in a file name. This is probably true for all file systems.
    110         final StringBuilder sb = new StringBuilder();
    111         final int nameLength = name.length();
    112         for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) {
    113             final int codePoint = name.codePointAt(i);
    114             if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) {
    115                 sb.appendCodePoint(codePoint);
    116             } else {
    117                 sb.append(String.format((Locale)null, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x",
    118                         codePoint));
    119             }
    120         }
    121         return sb.toString();
    122     }
    123 
    124     /**
    125      * Helper method to get the top level cache directory.
    126      */
    127     private static String getWordListCacheDirectory(final Context context) {
    128         return context.getFilesDir() + File.separator + "dicts";
    129     }
    130 
    131     /**
    132      * Helper method to get the top level temp directory.
    133      */
    134     public static String getWordListTempDirectory(final Context context) {
    135         return context.getFilesDir() + File.separator + "tmp";
    136     }
    137 
    138     /**
    139      * Reverse escaping done by replaceFileNameDangerousCharacters.
    140      */
    141     public static String getWordListIdFromFileName(final String fname) {
    142         final StringBuilder sb = new StringBuilder();
    143         final int fnameLength = fname.length();
    144         for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) {
    145             final int codePoint = fname.codePointAt(i);
    146             if ('%' != codePoint) {
    147                 sb.appendCodePoint(codePoint);
    148             } else {
    149                 // + 1 to pass the % sign
    150                 final int encodedCodePoint = Integer.parseInt(
    151                         fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16);
    152                 i += MAX_HEX_DIGITS_FOR_CODEPOINT;
    153                 sb.appendCodePoint(encodedCodePoint);
    154             }
    155         }
    156         return sb.toString();
    157     }
    158 
    159     /**
    160      * Helper method to the list of cache directories, one for each distinct locale.
    161      */
    162     public static File[] getCachedDirectoryList(final Context context) {
    163         return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles();
    164     }
    165 
    166     /**
    167      * Returns the category for a given file name.
    168      *
    169      * This parses the file name, extracts the category, and returns it. See
    170      * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}.
    171      * @return The category as a string or null if it can't be found in the file name.
    172      */
    173     public static String getCategoryFromFileName(final String fileName) {
    174         final String id = getWordListIdFromFileName(fileName);
    175         final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR);
    176         // An id is supposed to be in format category:locale, so splitting on the separator
    177         // should yield a 2-elements array
    178         if (2 != idArray.length) return null;
    179         return idArray[0];
    180     }
    181 
    182     /**
    183      * Find out the cache directory associated with a specific locale.
    184      */
    185     private static String getCacheDirectoryForLocale(final String locale, final Context context) {
    186         final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
    187         final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
    188                 + relativeDirectoryName;
    189         final File directory = new File(absoluteDirectoryName);
    190         if (!directory.exists()) {
    191             if (!directory.mkdirs()) {
    192                 Log.e(TAG, "Could not create the directory for locale" + locale);
    193             }
    194         }
    195         return absoluteDirectoryName;
    196     }
    197 
    198     /**
    199      * Generates a file name for the id and locale passed as an argument.
    200      *
    201      * In the current implementation the file name returned will always be unique for
    202      * any id/locale pair, but please do not expect that the id can be the same for
    203      * different dictionaries with different locales. An id should be unique for any
    204      * dictionary.
    205      * The file name is pretty much an URL-encoded version of the id inside a directory
    206      * named like the locale, except it will also escape characters that look dangerous
    207      * to some file systems.
    208      * @param id the id of the dictionary for which to get a file name
    209      * @param locale the locale for which to get the file name as a string
    210      * @param context the context to use for getting the directory
    211      * @return the name of the file to be created
    212      */
    213     public static String getCacheFileName(String id, String locale, Context context) {
    214         final String fileName = replaceFileNameDangerousCharacters(id);
    215         return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
    216     }
    217 
    218     public static boolean isMainWordListId(final String id) {
    219         final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR);
    220         // An id is supposed to be in format category:locale, so splitting on the separator
    221         // should yield a 2-elements array
    222         if (2 != idArray.length) return false;
    223         return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY.equals(idArray[0]);
    224     }
    225 
    226     /**
    227      * Helper method to return a dictionary res id for a locale, or 0 if none.
    228      * @param locale dictionary locale
    229      * @return main dictionary resource id
    230      */
    231     public static int getMainDictionaryResourceIdIfAvailableForLocale(final Resources res,
    232             final Locale locale) {
    233         int resId;
    234         // Try to find main_language_country dictionary.
    235         if (!locale.getCountry().isEmpty()) {
    236             final String dictLanguageCountry =
    237                     MAIN_DICT_PREFIX + locale.toString().toLowerCase(Locale.ROOT);
    238             if ((resId = res.getIdentifier(
    239                     dictLanguageCountry, "raw", RESOURCE_PACKAGE_NAME)) != 0) {
    240                 return resId;
    241             }
    242         }
    243 
    244         // Try to find main_language dictionary.
    245         final String dictLanguage = MAIN_DICT_PREFIX + locale.getLanguage();
    246         if ((resId = res.getIdentifier(dictLanguage, "raw", RESOURCE_PACKAGE_NAME)) != 0) {
    247             return resId;
    248         }
    249 
    250         // Not found, return 0
    251         return 0;
    252     }
    253 
    254     /**
    255      * Returns a main dictionary resource id
    256      * @param locale dictionary locale
    257      * @return main dictionary resource id
    258      */
    259     public static int getMainDictionaryResourceId(final Resources res, final Locale locale) {
    260         int resourceId = getMainDictionaryResourceIdIfAvailableForLocale(res, locale);
    261         if (0 != resourceId) return resourceId;
    262         return res.getIdentifier(DEFAULT_MAIN_DICT, "raw", RESOURCE_PACKAGE_NAME);
    263     }
    264 
    265     /**
    266      * Returns the id associated with the main word list for a specified locale.
    267      *
    268      * Word lists stored in Android Keyboard's resources are referred to as the "main"
    269      * word lists. Since they can be updated like any other list, we need to assign a
    270      * unique ID to them. This ID is just the name of the language (locale-wise) they
    271      * are for, and this method returns this ID.
    272      */
    273     public static String getMainDictId(final Locale locale) {
    274         // This works because we don't include by default different dictionaries for
    275         // different countries. This actually needs to return the id that we would
    276         // like to use for word lists included in resources, and the following is okay.
    277         return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY +
    278                 BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR + locale.getLanguage().toString();
    279     }
    280 
    281     public static FileHeader getDictionaryFileHeaderOrNull(final File file) {
    282         try {
    283             return BinaryDictIOUtils.getDictionaryFileHeader(file, 0, file.length());
    284         } catch (UnsupportedFormatException e) {
    285             return null;
    286         } catch (IOException e) {
    287             return null;
    288         }
    289     }
    290 
    291     private static DictionaryInfo createDictionaryInfoFromFileAddress(
    292             final AssetFileAddress fileAddress) {
    293         final FileHeader header = BinaryDictIOUtils.getDictionaryFileHeaderOrNull(
    294                 new File(fileAddress.mFilename), fileAddress.mOffset, fileAddress.mLength);
    295         final String id = header.getId();
    296         final Locale locale = LocaleUtils.constructLocaleFromString(header.getLocaleString());
    297         final String description = header.getDescription();
    298         final String version = header.getVersion();
    299         return new DictionaryInfo(id, locale, description, fileAddress, Integer.parseInt(version));
    300     }
    301 
    302     private static void addOrUpdateDictInfo(final ArrayList<DictionaryInfo> dictList,
    303             final DictionaryInfo newElement) {
    304         for (final DictionaryInfo info : dictList) {
    305             if (info.mLocale.equals(newElement.mLocale)) {
    306                 if (newElement.mVersion <= info.mVersion) {
    307                     return;
    308                 }
    309                 dictList.remove(info);
    310             }
    311         }
    312         dictList.add(newElement);
    313     }
    314 
    315     public static ArrayList<DictionaryInfo> getCurrentDictionaryFileNameAndVersionInfo(
    316             final Context context) {
    317         final ArrayList<DictionaryInfo> dictList = CollectionUtils.newArrayList();
    318 
    319         // Retrieve downloaded dictionaries
    320         final File[] directoryList = getCachedDirectoryList(context);
    321         if (null != directoryList) {
    322             for (final File directory : directoryList) {
    323                 final String localeString = getWordListIdFromFileName(directory.getName());
    324                 File[] dicts = BinaryDictionaryGetter.getCachedWordLists(localeString, context);
    325                 for (final File dict : dicts) {
    326                     final String wordListId = getWordListIdFromFileName(dict.getName());
    327                     if (!DictionaryInfoUtils.isMainWordListId(wordListId)) continue;
    328                     final Locale locale = LocaleUtils.constructLocaleFromString(localeString);
    329                     final AssetFileAddress fileAddress = AssetFileAddress.makeFromFile(dict);
    330                     final DictionaryInfo dictionaryInfo =
    331                             createDictionaryInfoFromFileAddress(fileAddress);
    332                     // Protect against cases of a less-specific dictionary being found, like an
    333                     // en dictionary being used for an en_US locale. In this case, the en dictionary
    334                     // should be used for en_US but discounted for listing purposes.
    335                     if (!dictionaryInfo.mLocale.equals(locale)) continue;
    336                     addOrUpdateDictInfo(dictList, dictionaryInfo);
    337                 }
    338             }
    339         }
    340 
    341         // Retrieve files from assets
    342         final Resources resources = context.getResources();
    343         final AssetManager assets = resources.getAssets();
    344         for (final String localeString : assets.getLocales()) {
    345             final Locale locale = LocaleUtils.constructLocaleFromString(localeString);
    346             final int resourceId =
    347                     DictionaryInfoUtils.getMainDictionaryResourceIdIfAvailableForLocale(
    348                             context.getResources(), locale);
    349             if (0 == resourceId) continue;
    350             final AssetFileAddress fileAddress =
    351                     BinaryDictionaryGetter.loadFallbackResource(context, resourceId);
    352             final DictionaryInfo dictionaryInfo = createDictionaryInfoFromFileAddress(fileAddress);
    353             // Protect against cases of a less-specific dictionary being found, like an
    354             // en dictionary being used for an en_US locale. In this case, the en dictionary
    355             // should be used for en_US but discounted for listing purposes.
    356             if (!dictionaryInfo.mLocale.equals(locale)) continue;
    357             addOrUpdateDictInfo(dictList, dictionaryInfo);
    358         }
    359 
    360         return dictList;
    361     }
    362 }
    363