1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 import android.content.SharedPreferences; 21 import android.os.AsyncTask; 22 import android.util.Log; 23 24 import com.android.inputmethod.annotations.UsedForTesting; 25 import com.android.inputmethod.keyboard.ProximityInfo; 26 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 27 import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface; 28 import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener; 29 import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams; 30 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 31 32 import java.io.File; 33 import java.io.FileInputStream; 34 import java.io.FileNotFoundException; 35 import java.io.FileOutputStream; 36 import java.io.IOException; 37 import java.lang.ref.SoftReference; 38 import java.util.ArrayList; 39 import java.util.concurrent.ConcurrentHashMap; 40 import java.util.concurrent.locks.ReentrantLock; 41 42 /** 43 * Locally gathers stats about the words user types and various other signals like auto-correction 44 * cancellation or manual picks. This allows the keyboard to adapt to the typist over time. 45 */ 46 public final class UserHistoryDictionary extends ExpandableDictionary { 47 private static final String TAG = UserHistoryDictionary.class.getSimpleName(); 48 private static final String NAME = UserHistoryDictionary.class.getSimpleName(); 49 public static final boolean DBG_SAVE_RESTORE = false; 50 public static final boolean DBG_STRESS_TEST = false; 51 public static final boolean DBG_ALWAYS_WRITE = false; 52 public static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG; 53 54 private static final FormatOptions VERSION3 = new FormatOptions(3, 55 true /* supportsDynamicUpdate */); 56 57 /** Any pair being typed or picked */ 58 private static final int FREQUENCY_FOR_TYPED = 2; 59 60 /** Maximum number of pairs. Pruning will start when databases goes above this number. */ 61 public static final int MAX_HISTORY_BIGRAMS = 10000; 62 63 /** 64 * When it hits maximum bigram pair, it will delete until you are left with 65 * only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs. 66 * Do not keep this number small to avoid deleting too often. 67 */ 68 public static final int DELETE_HISTORY_BIGRAMS = 1000; 69 70 /** Locale for which this user history dictionary is storing words */ 71 private final String mLocale; 72 73 private final UserHistoryDictionaryBigramList mBigramList = 74 new UserHistoryDictionaryBigramList(); 75 private final ReentrantLock mBigramListLock = new ReentrantLock(); 76 private final SharedPreferences mPrefs; 77 78 // Should always be false except when we use this class for test 79 @UsedForTesting boolean isTest = false; 80 81 private static final ConcurrentHashMap<String, SoftReference<UserHistoryDictionary>> 82 sLangDictCache = CollectionUtils.newConcurrentHashMap(); 83 84 public static synchronized UserHistoryDictionary getInstance( 85 final Context context, final String locale, final SharedPreferences sp) { 86 if (sLangDictCache.containsKey(locale)) { 87 final SoftReference<UserHistoryDictionary> ref = sLangDictCache.get(locale); 88 final UserHistoryDictionary dict = ref == null ? null : ref.get(); 89 if (dict != null) { 90 if (PROFILE_SAVE_RESTORE) { 91 Log.w(TAG, "Use cached UserHistoryDictionary for " + locale); 92 } 93 return dict; 94 } 95 } 96 final UserHistoryDictionary dict = 97 new UserHistoryDictionary(context, locale, sp); 98 sLangDictCache.put(locale, new SoftReference<UserHistoryDictionary>(dict)); 99 return dict; 100 } 101 102 private UserHistoryDictionary(final Context context, final String locale, 103 final SharedPreferences sp) { 104 super(context, Dictionary.TYPE_USER_HISTORY); 105 mLocale = locale; 106 mPrefs = sp; 107 if (mLocale != null && mLocale.length() > 1) { 108 loadDictionary(); 109 } 110 } 111 112 @Override 113 public void close() { 114 flushPendingWrites(); 115 // Don't close the database as locale changes will require it to be reopened anyway 116 // Also, the database is written to somewhat frequently, so it needs to be kept alive 117 // throughout the life of the process. 118 // mOpenHelper.close(); 119 // Ignore close because we cache UserHistoryDictionary for each language. See getInstance() 120 // above. 121 // super.close(); 122 } 123 124 @Override 125 protected ArrayList<SuggestedWordInfo> getWordsInner(final WordComposer composer, 126 final String prevWord, final ProximityInfo proximityInfo) { 127 // Inhibit suggestions (not predictions) for user history for now. Removing this method 128 // is enough to use it through the standard ExpandableDictionary way. 129 return null; 130 } 131 132 /** 133 * Return whether the passed charsequence is in the dictionary. 134 */ 135 @Override 136 public synchronized boolean isValidWord(final String word) { 137 // TODO: figure out what is the correct thing to do here. 138 return false; 139 } 140 141 /** 142 * Pair will be added to the user history dictionary. 143 * 144 * The first word may be null. That means we don't know the context, in other words, 145 * it's only a unigram. The first word may also be an empty string : this means start 146 * context, as in beginning of a sentence for example. 147 * The second word may not be null (a NullPointerException would be thrown). 148 */ 149 public int addToUserHistory(final String word1, final String word2, final boolean isValid) { 150 if (word2.length() >= Constants.Dictionary.MAX_WORD_LENGTH || 151 (word1 != null && word1.length() >= Constants.Dictionary.MAX_WORD_LENGTH)) { 152 return -1; 153 } 154 if (mBigramListLock.tryLock()) { 155 try { 156 super.addWord( 157 word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED); 158 mBigramList.addBigram(null, word2, (byte)FREQUENCY_FOR_TYPED); 159 // Do not insert a word as a bigram of itself 160 if (word2.equals(word1)) { 161 return 0; 162 } 163 final int freq; 164 if (null == word1) { 165 freq = FREQUENCY_FOR_TYPED; 166 } else { 167 freq = super.setBigramAndGetFrequency( 168 word1, word2, new ForgettingCurveParams(isValid)); 169 } 170 mBigramList.addBigram(word1, word2); 171 return freq; 172 } finally { 173 mBigramListLock.unlock(); 174 } 175 } 176 return -1; 177 } 178 179 public boolean cancelAddingUserHistory(final String word1, final String word2) { 180 if (mBigramListLock.tryLock()) { 181 try { 182 if (mBigramList.removeBigram(word1, word2)) { 183 return super.removeBigram(word1, word2); 184 } 185 } finally { 186 mBigramListLock.unlock(); 187 } 188 } 189 return false; 190 } 191 192 /** 193 * Schedules a background thread to write any pending words to the database. 194 */ 195 private void flushPendingWrites() { 196 // Create a background thread to write the pending entries 197 new UpdateBinaryTask(mBigramList, mLocale, this, mPrefs, getContext()).execute(); 198 } 199 200 @Override 201 public void loadDictionaryAsync() { 202 // This must be run on non-main thread 203 mBigramListLock.lock(); 204 try { 205 loadDictionaryAsyncLocked(); 206 } finally { 207 mBigramListLock.unlock(); 208 } 209 } 210 211 private int profTotal; 212 213 private void loadDictionaryAsyncLocked() { 214 if (DBG_STRESS_TEST) { 215 try { 216 Log.w(TAG, "Start stress in loading: " + mLocale); 217 Thread.sleep(15000); 218 Log.w(TAG, "End stress in loading"); 219 } catch (InterruptedException e) { 220 } 221 } 222 final long last = Settings.readLastUserHistoryWriteTime(mPrefs, mLocale); 223 final boolean initializing = last == 0; 224 final long now = System.currentTimeMillis(); 225 profTotal = 0; 226 final String fileName = NAME + "." + mLocale + ".dict"; 227 final ExpandableDictionary dictionary = this; 228 final OnAddWordListener listener = new OnAddWordListener() { 229 @Override 230 public void setUnigram(final String word, final String shortcutTarget, 231 final int frequency) { 232 profTotal++; 233 if (DBG_SAVE_RESTORE) { 234 Log.d(TAG, "load unigram: " + word + "," + frequency); 235 } 236 dictionary.addWord(word, shortcutTarget, frequency); 237 mBigramList.addBigram(null, word, (byte)frequency); 238 } 239 240 @Override 241 public void setBigram(final String word1, final String word2, final int frequency) { 242 if (word1.length() < Constants.Dictionary.MAX_WORD_LENGTH 243 && word2.length() < Constants.Dictionary.MAX_WORD_LENGTH) { 244 profTotal++; 245 if (DBG_SAVE_RESTORE) { 246 Log.d(TAG, "load bigram: " + word1 + "," + word2 + "," + frequency); 247 } 248 dictionary.setBigramAndGetFrequency( 249 word1, word2, initializing ? new ForgettingCurveParams(true) 250 : new ForgettingCurveParams(frequency, now, last)); 251 } 252 mBigramList.addBigram(word1, word2, (byte)frequency); 253 } 254 }; 255 256 // Load the dictionary from binary file 257 FileInputStream inStream = null; 258 try { 259 final File file = new File(getContext().getFilesDir(), fileName); 260 final byte[] buffer = new byte[(int)file.length()]; 261 inStream = new FileInputStream(file); 262 inStream.read(buffer); 263 UserHistoryDictIOUtils.readDictionaryBinary( 264 new UserHistoryDictIOUtils.ByteArrayWrapper(buffer), listener); 265 } catch (FileNotFoundException e) { 266 // This is an expected condition: we don't have a user history dictionary for this 267 // language yet. It will be created sometime later. 268 } catch (IOException e) { 269 Log.e(TAG, "IOException on opening a bytebuffer", e); 270 } finally { 271 if (inStream != null) { 272 try { 273 inStream.close(); 274 } catch (IOException e) { 275 // do nothing 276 } 277 } 278 if (PROFILE_SAVE_RESTORE) { 279 final long diff = System.currentTimeMillis() - now; 280 Log.d(TAG, "PROF: Load UserHistoryDictionary: " 281 + mLocale + ", " + diff + "ms. load " + profTotal + "entries."); 282 } 283 } 284 } 285 286 /** 287 * Async task to write pending words to the binarydicts. 288 */ 289 private static final class UpdateBinaryTask extends AsyncTask<Void, Void, Void> 290 implements BigramDictionaryInterface { 291 private final UserHistoryDictionaryBigramList mBigramList; 292 private final boolean mAddLevel0Bigrams; 293 private final String mLocale; 294 private final UserHistoryDictionary mUserHistoryDictionary; 295 private final SharedPreferences mPrefs; 296 private final Context mContext; 297 298 public UpdateBinaryTask(final UserHistoryDictionaryBigramList pendingWrites, 299 final String locale, final UserHistoryDictionary dict, 300 final SharedPreferences prefs, final Context context) { 301 mBigramList = pendingWrites; 302 mLocale = locale; 303 mUserHistoryDictionary = dict; 304 mPrefs = prefs; 305 mContext = context; 306 mAddLevel0Bigrams = mBigramList.size() <= MAX_HISTORY_BIGRAMS; 307 } 308 309 @Override 310 protected Void doInBackground(final Void... v) { 311 if (mUserHistoryDictionary.isTest) { 312 // If isTest == true, wait until the lock is released. 313 mUserHistoryDictionary.mBigramListLock.lock(); 314 try { 315 doWriteTaskLocked(); 316 } finally { 317 mUserHistoryDictionary.mBigramListLock.unlock(); 318 } 319 } else if (mUserHistoryDictionary.mBigramListLock.tryLock()) { 320 doWriteTaskLocked(); 321 } 322 return null; 323 } 324 325 private void doWriteTaskLocked() { 326 if (DBG_STRESS_TEST) { 327 try { 328 Log.w(TAG, "Start stress in closing: " + mLocale); 329 Thread.sleep(15000); 330 Log.w(TAG, "End stress in closing"); 331 } catch (InterruptedException e) { 332 Log.e(TAG, "In stress test", e); 333 } 334 } 335 336 final long now = PROFILE_SAVE_RESTORE ? System.currentTimeMillis() : 0; 337 final String fileName = NAME + "." + mLocale + ".dict"; 338 final File file = new File(mContext.getFilesDir(), fileName); 339 FileOutputStream out = null; 340 341 try { 342 out = new FileOutputStream(file); 343 UserHistoryDictIOUtils.writeDictionaryBinary(out, this, mBigramList, VERSION3); 344 out.flush(); 345 out.close(); 346 } catch (IOException e) { 347 Log.e(TAG, "IO Exception while writing file", e); 348 } finally { 349 if (out != null) { 350 try { 351 out.close(); 352 } catch (IOException e) { 353 // ignore 354 } 355 } 356 } 357 358 // Save the timestamp after we finish writing the binary dictionary. 359 Settings.writeLastUserHistoryWriteTime(mPrefs, mLocale); 360 if (PROFILE_SAVE_RESTORE) { 361 final long diff = System.currentTimeMillis() - now; 362 Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", " + diff + "ms."); 363 } 364 } 365 366 @Override 367 public int getFrequency(final String word1, final String word2) { 368 final int freq; 369 if (word1 == null) { // unigram 370 freq = FREQUENCY_FOR_TYPED; 371 final byte prevFc = mBigramList.getBigrams(word1).get(word2); 372 } else { // bigram 373 final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2); 374 if (nw != null) { 375 final ForgettingCurveParams fcp = nw.getFcParams(); 376 final byte prevFc = mBigramList.getBigrams(word1).get(word2); 377 final byte fc = fcp.getFc(); 378 final boolean isValid = fcp.isValid(); 379 if (prevFc > 0 && prevFc == fc) { 380 freq = fc & 0xFF; 381 } else if (UserHistoryForgettingCurveUtils. 382 needsToSave(fc, isValid, mAddLevel0Bigrams)) { 383 freq = fc & 0xFF; 384 } else { 385 // Delete this entry 386 freq = -1; 387 } 388 } else { 389 // Delete this entry 390 freq = -1; 391 } 392 } 393 return freq; 394 } 395 } 396 397 @UsedForTesting 398 void forceAddWordForTest(final String word1, final String word2, final boolean isValid) { 399 mBigramListLock.lock(); 400 try { 401 addToUserHistory(word1, word2, isValid); 402 } finally { 403 mBigramListLock.unlock(); 404 } 405 } 406 } 407