Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.content.Context;
     20 import android.content.SharedPreferences;
     21 import android.os.AsyncTask;
     22 import android.util.Log;
     23 
     24 import com.android.inputmethod.annotations.UsedForTesting;
     25 import com.android.inputmethod.keyboard.ProximityInfo;
     26 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     27 import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface;
     28 import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener;
     29 import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
     30 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     31 
     32 import java.io.File;
     33 import java.io.FileInputStream;
     34 import java.io.FileNotFoundException;
     35 import java.io.FileOutputStream;
     36 import java.io.IOException;
     37 import java.lang.ref.SoftReference;
     38 import java.util.ArrayList;
     39 import java.util.concurrent.ConcurrentHashMap;
     40 import java.util.concurrent.locks.ReentrantLock;
     41 
     42 /**
     43  * Locally gathers stats about the words user types and various other signals like auto-correction
     44  * cancellation or manual picks. This allows the keyboard to adapt to the typist over time.
     45  */
     46 public final class UserHistoryDictionary extends ExpandableDictionary {
     47     private static final String TAG = UserHistoryDictionary.class.getSimpleName();
     48     private static final String NAME = UserHistoryDictionary.class.getSimpleName();
     49     public static final boolean DBG_SAVE_RESTORE = false;
     50     public static final boolean DBG_STRESS_TEST = false;
     51     public static final boolean DBG_ALWAYS_WRITE = false;
     52     public static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG;
     53 
     54     private static final FormatOptions VERSION3 = new FormatOptions(3,
     55             true /* supportsDynamicUpdate */);
     56 
     57     /** Any pair being typed or picked */
     58     private static final int FREQUENCY_FOR_TYPED = 2;
     59 
     60     /** Maximum number of pairs. Pruning will start when databases goes above this number. */
     61     public static final int MAX_HISTORY_BIGRAMS = 10000;
     62 
     63     /**
     64      * When it hits maximum bigram pair, it will delete until you are left with
     65      * only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs.
     66      * Do not keep this number small to avoid deleting too often.
     67      */
     68     public static final int DELETE_HISTORY_BIGRAMS = 1000;
     69 
     70     /** Locale for which this user history dictionary is storing words */
     71     private final String mLocale;
     72 
     73     private final UserHistoryDictionaryBigramList mBigramList =
     74             new UserHistoryDictionaryBigramList();
     75     private final ReentrantLock mBigramListLock = new ReentrantLock();
     76     private final SharedPreferences mPrefs;
     77 
     78     // Should always be false except when we use this class for test
     79     @UsedForTesting boolean isTest = false;
     80 
     81     private static final ConcurrentHashMap<String, SoftReference<UserHistoryDictionary>>
     82             sLangDictCache = CollectionUtils.newConcurrentHashMap();
     83 
     84     public static synchronized UserHistoryDictionary getInstance(
     85             final Context context, final String locale, final SharedPreferences sp) {
     86         if (sLangDictCache.containsKey(locale)) {
     87             final SoftReference<UserHistoryDictionary> ref = sLangDictCache.get(locale);
     88             final UserHistoryDictionary dict = ref == null ? null : ref.get();
     89             if (dict != null) {
     90                 if (PROFILE_SAVE_RESTORE) {
     91                     Log.w(TAG, "Use cached UserHistoryDictionary for " + locale);
     92                 }
     93                 return dict;
     94             }
     95         }
     96         final UserHistoryDictionary dict =
     97                 new UserHistoryDictionary(context, locale, sp);
     98         sLangDictCache.put(locale, new SoftReference<UserHistoryDictionary>(dict));
     99         return dict;
    100     }
    101 
    102     private UserHistoryDictionary(final Context context, final String locale,
    103             final SharedPreferences sp) {
    104         super(context, Dictionary.TYPE_USER_HISTORY);
    105         mLocale = locale;
    106         mPrefs = sp;
    107         if (mLocale != null && mLocale.length() > 1) {
    108             loadDictionary();
    109         }
    110     }
    111 
    112     @Override
    113     public void close() {
    114         flushPendingWrites();
    115         // Don't close the database as locale changes will require it to be reopened anyway
    116         // Also, the database is written to somewhat frequently, so it needs to be kept alive
    117         // throughout the life of the process.
    118         // mOpenHelper.close();
    119         // Ignore close because we cache UserHistoryDictionary for each language. See getInstance()
    120         // above.
    121         // super.close();
    122     }
    123 
    124     @Override
    125     protected ArrayList<SuggestedWordInfo> getWordsInner(final WordComposer composer,
    126             final String prevWord, final ProximityInfo proximityInfo) {
    127         // Inhibit suggestions (not predictions) for user history for now. Removing this method
    128         // is enough to use it through the standard ExpandableDictionary way.
    129         return null;
    130     }
    131 
    132     /**
    133      * Return whether the passed charsequence is in the dictionary.
    134      */
    135     @Override
    136     public synchronized boolean isValidWord(final String word) {
    137         // TODO: figure out what is the correct thing to do here.
    138         return false;
    139     }
    140 
    141     /**
    142      * Pair will be added to the user history dictionary.
    143      *
    144      * The first word may be null. That means we don't know the context, in other words,
    145      * it's only a unigram. The first word may also be an empty string : this means start
    146      * context, as in beginning of a sentence for example.
    147      * The second word may not be null (a NullPointerException would be thrown).
    148      */
    149     public int addToUserHistory(final String word1, final String word2, final boolean isValid) {
    150         if (word2.length() >= Constants.Dictionary.MAX_WORD_LENGTH ||
    151                 (word1 != null && word1.length() >= Constants.Dictionary.MAX_WORD_LENGTH)) {
    152             return -1;
    153         }
    154         if (mBigramListLock.tryLock()) {
    155             try {
    156                 super.addWord(
    157                         word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED);
    158                 mBigramList.addBigram(null, word2, (byte)FREQUENCY_FOR_TYPED);
    159                 // Do not insert a word as a bigram of itself
    160                 if (word2.equals(word1)) {
    161                     return 0;
    162                 }
    163                 final int freq;
    164                 if (null == word1) {
    165                     freq = FREQUENCY_FOR_TYPED;
    166                 } else {
    167                     freq = super.setBigramAndGetFrequency(
    168                             word1, word2, new ForgettingCurveParams(isValid));
    169                 }
    170                 mBigramList.addBigram(word1, word2);
    171                 return freq;
    172             } finally {
    173                 mBigramListLock.unlock();
    174             }
    175         }
    176         return -1;
    177     }
    178 
    179     public boolean cancelAddingUserHistory(final String word1, final String word2) {
    180         if (mBigramListLock.tryLock()) {
    181             try {
    182                 if (mBigramList.removeBigram(word1, word2)) {
    183                     return super.removeBigram(word1, word2);
    184                 }
    185             } finally {
    186                 mBigramListLock.unlock();
    187             }
    188         }
    189         return false;
    190     }
    191 
    192     /**
    193      * Schedules a background thread to write any pending words to the database.
    194      */
    195     private void flushPendingWrites() {
    196         // Create a background thread to write the pending entries
    197         new UpdateBinaryTask(mBigramList, mLocale, this, mPrefs, getContext()).execute();
    198     }
    199 
    200     @Override
    201     public void loadDictionaryAsync() {
    202         // This must be run on non-main thread
    203         mBigramListLock.lock();
    204         try {
    205             loadDictionaryAsyncLocked();
    206         } finally {
    207             mBigramListLock.unlock();
    208         }
    209     }
    210 
    211     private int profTotal;
    212 
    213     private void loadDictionaryAsyncLocked() {
    214         if (DBG_STRESS_TEST) {
    215             try {
    216                 Log.w(TAG, "Start stress in loading: " + mLocale);
    217                 Thread.sleep(15000);
    218                 Log.w(TAG, "End stress in loading");
    219             } catch (InterruptedException e) {
    220             }
    221         }
    222         final long last = Settings.readLastUserHistoryWriteTime(mPrefs, mLocale);
    223         final boolean initializing = last == 0;
    224         final long now = System.currentTimeMillis();
    225         profTotal = 0;
    226         final String fileName = NAME + "." + mLocale + ".dict";
    227         final ExpandableDictionary dictionary = this;
    228         final OnAddWordListener listener = new OnAddWordListener() {
    229             @Override
    230             public void setUnigram(final String word, final String shortcutTarget,
    231                     final int frequency) {
    232                 profTotal++;
    233                 if (DBG_SAVE_RESTORE) {
    234                     Log.d(TAG, "load unigram: " + word + "," + frequency);
    235                 }
    236                 dictionary.addWord(word, shortcutTarget, frequency);
    237                 mBigramList.addBigram(null, word, (byte)frequency);
    238             }
    239 
    240             @Override
    241             public void setBigram(final String word1, final String word2, final int frequency) {
    242                 if (word1.length() < Constants.Dictionary.MAX_WORD_LENGTH
    243                         && word2.length() < Constants.Dictionary.MAX_WORD_LENGTH) {
    244                     profTotal++;
    245                     if (DBG_SAVE_RESTORE) {
    246                         Log.d(TAG, "load bigram: " + word1 + "," + word2 + "," + frequency);
    247                     }
    248                     dictionary.setBigramAndGetFrequency(
    249                             word1, word2, initializing ? new ForgettingCurveParams(true)
    250                             : new ForgettingCurveParams(frequency, now, last));
    251                 }
    252                 mBigramList.addBigram(word1, word2, (byte)frequency);
    253             }
    254         };
    255 
    256         // Load the dictionary from binary file
    257         FileInputStream inStream = null;
    258         try {
    259             final File file = new File(getContext().getFilesDir(), fileName);
    260             final byte[] buffer = new byte[(int)file.length()];
    261             inStream = new FileInputStream(file);
    262             inStream.read(buffer);
    263             UserHistoryDictIOUtils.readDictionaryBinary(
    264                     new UserHistoryDictIOUtils.ByteArrayWrapper(buffer), listener);
    265         } catch (FileNotFoundException e) {
    266             // This is an expected condition: we don't have a user history dictionary for this
    267             // language yet. It will be created sometime later.
    268         } catch (IOException e) {
    269             Log.e(TAG, "IOException on opening a bytebuffer", e);
    270         } finally {
    271             if (inStream != null) {
    272                 try {
    273                     inStream.close();
    274                 } catch (IOException e) {
    275                     // do nothing
    276                 }
    277             }
    278             if (PROFILE_SAVE_RESTORE) {
    279                 final long diff = System.currentTimeMillis() - now;
    280                 Log.d(TAG, "PROF: Load UserHistoryDictionary: "
    281                         + mLocale + ", " + diff + "ms. load " + profTotal + "entries.");
    282             }
    283         }
    284     }
    285 
    286     /**
    287      * Async task to write pending words to the binarydicts.
    288      */
    289     private static final class UpdateBinaryTask extends AsyncTask<Void, Void, Void>
    290             implements BigramDictionaryInterface {
    291         private final UserHistoryDictionaryBigramList mBigramList;
    292         private final boolean mAddLevel0Bigrams;
    293         private final String mLocale;
    294         private final UserHistoryDictionary mUserHistoryDictionary;
    295         private final SharedPreferences mPrefs;
    296         private final Context mContext;
    297 
    298         public UpdateBinaryTask(final UserHistoryDictionaryBigramList pendingWrites,
    299                 final String locale, final UserHistoryDictionary dict,
    300                 final SharedPreferences prefs, final Context context) {
    301             mBigramList = pendingWrites;
    302             mLocale = locale;
    303             mUserHistoryDictionary = dict;
    304             mPrefs = prefs;
    305             mContext = context;
    306             mAddLevel0Bigrams = mBigramList.size() <= MAX_HISTORY_BIGRAMS;
    307         }
    308 
    309         @Override
    310         protected Void doInBackground(final Void... v) {
    311             if (mUserHistoryDictionary.isTest) {
    312                 // If isTest == true, wait until the lock is released.
    313                 mUserHistoryDictionary.mBigramListLock.lock();
    314                 try {
    315                     doWriteTaskLocked();
    316                 } finally {
    317                     mUserHistoryDictionary.mBigramListLock.unlock();
    318                 }
    319             } else if (mUserHistoryDictionary.mBigramListLock.tryLock()) {
    320                 doWriteTaskLocked();
    321             }
    322             return null;
    323         }
    324 
    325         private void doWriteTaskLocked() {
    326             if (DBG_STRESS_TEST) {
    327                 try {
    328                     Log.w(TAG, "Start stress in closing: " + mLocale);
    329                     Thread.sleep(15000);
    330                     Log.w(TAG, "End stress in closing");
    331                 } catch (InterruptedException e) {
    332                     Log.e(TAG, "In stress test", e);
    333                 }
    334             }
    335 
    336             final long now = PROFILE_SAVE_RESTORE ? System.currentTimeMillis() : 0;
    337             final String fileName = NAME + "." + mLocale + ".dict";
    338             final File file = new File(mContext.getFilesDir(), fileName);
    339             FileOutputStream out = null;
    340 
    341             try {
    342                 out = new FileOutputStream(file);
    343                 UserHistoryDictIOUtils.writeDictionaryBinary(out, this, mBigramList, VERSION3);
    344                 out.flush();
    345                 out.close();
    346             } catch (IOException e) {
    347                 Log.e(TAG, "IO Exception while writing file", e);
    348             } finally {
    349                 if (out != null) {
    350                     try {
    351                         out.close();
    352                     } catch (IOException e) {
    353                         // ignore
    354                     }
    355                 }
    356             }
    357 
    358             // Save the timestamp after we finish writing the binary dictionary.
    359             Settings.writeLastUserHistoryWriteTime(mPrefs, mLocale);
    360             if (PROFILE_SAVE_RESTORE) {
    361                 final long diff = System.currentTimeMillis() - now;
    362                 Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", " + diff + "ms.");
    363             }
    364         }
    365 
    366         @Override
    367         public int getFrequency(final String word1, final String word2) {
    368             final int freq;
    369             if (word1 == null) { // unigram
    370                 freq = FREQUENCY_FOR_TYPED;
    371                 final byte prevFc = mBigramList.getBigrams(word1).get(word2);
    372             } else { // bigram
    373                 final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
    374                 if (nw != null) {
    375                     final ForgettingCurveParams fcp = nw.getFcParams();
    376                     final byte prevFc = mBigramList.getBigrams(word1).get(word2);
    377                     final byte fc = fcp.getFc();
    378                     final boolean isValid = fcp.isValid();
    379                     if (prevFc > 0 && prevFc == fc) {
    380                         freq = fc & 0xFF;
    381                     } else if (UserHistoryForgettingCurveUtils.
    382                             needsToSave(fc, isValid, mAddLevel0Bigrams)) {
    383                         freq = fc & 0xFF;
    384                     } else {
    385                         // Delete this entry
    386                         freq = -1;
    387                     }
    388                 } else {
    389                     // Delete this entry
    390                     freq = -1;
    391                 }
    392             }
    393             return freq;
    394         }
    395     }
    396 
    397     @UsedForTesting
    398     void forceAddWordForTest(final String word1, final String word2, final boolean isValid) {
    399         mBigramListLock.lock();
    400         try {
    401             addToUserHistory(word1, word2, isValid);
    402         } finally {
    403             mBigramListLock.unlock();
    404         }
    405     }
    406 }
    407