1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.ContentValues; 20 import android.content.Context; 21 import android.content.SharedPreferences; 22 import android.database.Cursor; 23 import android.database.sqlite.SQLiteDatabase; 24 import android.database.sqlite.SQLiteOpenHelper; 25 import android.database.sqlite.SQLiteQueryBuilder; 26 import android.os.AsyncTask; 27 import android.provider.BaseColumns; 28 import android.util.Log; 29 30 import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams; 31 32 import java.lang.ref.SoftReference; 33 import java.util.HashMap; 34 import java.util.concurrent.ConcurrentHashMap; 35 import java.util.concurrent.locks.ReentrantLock; 36 37 /** 38 * Locally gathers stats about the words user types and various other signals like auto-correction 39 * cancellation or manual picks. This allows the keyboard to adapt to the typist over time. 40 */ 41 public class UserHistoryDictionary extends ExpandableDictionary { 42 private static final String TAG = "UserHistoryDictionary"; 43 public static final boolean DBG_SAVE_RESTORE = false; 44 public static final boolean DBG_STRESS_TEST = false; 45 public static final boolean DBG_ALWAYS_WRITE = false; 46 public static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG; 47 48 /** Any pair being typed or picked */ 49 private static final int FREQUENCY_FOR_TYPED = 2; 50 51 /** Maximum number of pairs. Pruning will start when databases goes above this number. */ 52 private static int sMaxHistoryBigrams = 10000; 53 54 /** 55 * When it hits maximum bigram pair, it will delete until you are left with 56 * only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs. 57 * Do not keep this number small to avoid deleting too often. 58 */ 59 private static int sDeleteHistoryBigrams = 1000; 60 61 /** 62 * Database version should increase if the database structure changes 63 */ 64 private static final int DATABASE_VERSION = 1; 65 66 private static final String DATABASE_NAME = "userbigram_dict.db"; 67 68 /** Name of the words table in the database */ 69 private static final String MAIN_TABLE_NAME = "main"; 70 // TODO: Consume less space by using a unique id for locale instead of the whole 71 // 2-5 character string. 72 private static final String MAIN_COLUMN_ID = BaseColumns._ID; 73 private static final String MAIN_COLUMN_WORD1 = "word1"; 74 private static final String MAIN_COLUMN_WORD2 = "word2"; 75 private static final String MAIN_COLUMN_LOCALE = "locale"; 76 77 /** Name of the frequency table in the database */ 78 private static final String FREQ_TABLE_NAME = "frequency"; 79 private static final String FREQ_COLUMN_ID = BaseColumns._ID; 80 private static final String FREQ_COLUMN_PAIR_ID = "pair_id"; 81 private static final String COLUMN_FORGETTING_CURVE_VALUE = "freq"; 82 83 /** Locale for which this user history dictionary is storing words */ 84 private final String mLocale; 85 86 private final UserHistoryDictionaryBigramList mBigramList = 87 new UserHistoryDictionaryBigramList(); 88 private final ReentrantLock mBigramListLock = new ReentrantLock(); 89 private final SharedPreferences mPrefs; 90 91 private final static HashMap<String, String> sDictProjectionMap; 92 private final static ConcurrentHashMap<String, SoftReference<UserHistoryDictionary>> 93 sLangDictCache = new ConcurrentHashMap<String, SoftReference<UserHistoryDictionary>>(); 94 95 static { 96 sDictProjectionMap = new HashMap<String, String>(); 97 sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID); 98 sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1); 99 sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2); 100 sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE); 101 102 sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID); 103 sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID); 104 sDictProjectionMap.put(COLUMN_FORGETTING_CURVE_VALUE, COLUMN_FORGETTING_CURVE_VALUE); 105 } 106 107 private static DatabaseHelper sOpenHelper = null; 108 109 public void setDatabaseMax(int maxHistoryBigram) { 110 sMaxHistoryBigrams = maxHistoryBigram; 111 } 112 113 public void setDatabaseDelete(int deleteHistoryBigram) { 114 sDeleteHistoryBigrams = deleteHistoryBigram; 115 } 116 117 public synchronized static UserHistoryDictionary getInstance( 118 final Context context, final String locale, 119 final int dictTypeId, final SharedPreferences sp) { 120 if (sLangDictCache.containsKey(locale)) { 121 final SoftReference<UserHistoryDictionary> ref = sLangDictCache.get(locale); 122 final UserHistoryDictionary dict = ref == null ? null : ref.get(); 123 if (dict != null) { 124 if (PROFILE_SAVE_RESTORE) { 125 Log.w(TAG, "Use cached UserHistoryDictionary for " + locale); 126 } 127 return dict; 128 } 129 } 130 final UserHistoryDictionary dict = 131 new UserHistoryDictionary(context, locale, dictTypeId, sp); 132 sLangDictCache.put(locale, new SoftReference<UserHistoryDictionary>(dict)); 133 return dict; 134 } 135 136 private UserHistoryDictionary(final Context context, final String locale, final int dicTypeId, 137 SharedPreferences sp) { 138 super(context, dicTypeId); 139 mLocale = locale; 140 mPrefs = sp; 141 if (sOpenHelper == null) { 142 sOpenHelper = new DatabaseHelper(getContext()); 143 } 144 if (mLocale != null && mLocale.length() > 1) { 145 loadDictionary(); 146 } 147 } 148 149 @Override 150 public void close() { 151 flushPendingWrites(); 152 // Don't close the database as locale changes will require it to be reopened anyway 153 // Also, the database is written to somewhat frequently, so it needs to be kept alive 154 // throughout the life of the process. 155 // mOpenHelper.close(); 156 // Ignore close because we cache UserHistoryDictionary for each language. See getInstance() 157 // above. 158 // super.close(); 159 } 160 161 /** 162 * Return whether the passed charsequence is in the dictionary. 163 */ 164 @Override 165 public synchronized boolean isValidWord(final CharSequence word) { 166 // TODO: figure out what is the correct thing to do here. 167 return false; 168 } 169 170 /** 171 * Pair will be added to the user history dictionary. 172 * 173 * The first word may be null. That means we don't know the context, in other words, 174 * it's only a unigram. The first word may also be an empty string : this means start 175 * context, as in beginning of a sentence for example. 176 * The second word may not be null (a NullPointerException would be thrown). 177 */ 178 public int addToUserHistory(final String word1, String word2, boolean isValid) { 179 if (mBigramListLock.tryLock()) { 180 try { 181 super.addWord( 182 word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED); 183 // Do not insert a word as a bigram of itself 184 if (word2.equals(word1)) { 185 return 0; 186 } 187 final int freq; 188 if (null == word1) { 189 freq = FREQUENCY_FOR_TYPED; 190 } else { 191 freq = super.setBigramAndGetFrequency( 192 word1, word2, new ForgettingCurveParams(isValid)); 193 } 194 mBigramList.addBigram(word1, word2); 195 return freq; 196 } finally { 197 mBigramListLock.unlock(); 198 } 199 } 200 return -1; 201 } 202 203 public boolean cancelAddingUserHistory(String word1, String word2) { 204 if (mBigramListLock.tryLock()) { 205 try { 206 if (mBigramList.removeBigram(word1, word2)) { 207 return super.removeBigram(word1, word2); 208 } 209 } finally { 210 mBigramListLock.unlock(); 211 } 212 } 213 return false; 214 } 215 216 /** 217 * Schedules a background thread to write any pending words to the database. 218 */ 219 private void flushPendingWrites() { 220 if (mBigramListLock.isLocked()) { 221 return; 222 } 223 // Create a background thread to write the pending entries 224 new UpdateDbTask(sOpenHelper, mBigramList, mLocale, this, mPrefs).execute(); 225 } 226 227 @Override 228 public void loadDictionaryAsync() { 229 // This must be run on non-main thread 230 mBigramListLock.lock(); 231 try { 232 loadDictionaryAsyncLocked(); 233 } finally { 234 mBigramListLock.unlock(); 235 } 236 } 237 238 private void loadDictionaryAsyncLocked() { 239 if (DBG_STRESS_TEST) { 240 try { 241 Log.w(TAG, "Start stress in loading: " + mLocale); 242 Thread.sleep(15000); 243 Log.w(TAG, "End stress in loading"); 244 } catch (InterruptedException e) { 245 } 246 } 247 final long last = SettingsValues.getLastUserHistoryWriteTime(mPrefs, mLocale); 248 final boolean initializing = last == 0; 249 final long now = System.currentTimeMillis(); 250 // Load the words that correspond to the current input locale 251 final Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale }); 252 if (null == cursor) return; 253 try { 254 // TODO: Call SQLiteDataBase.beginTransaction / SQLiteDataBase.endTransaction 255 if (cursor.moveToFirst()) { 256 final int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1); 257 final int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2); 258 final int fcIndex = cursor.getColumnIndex(COLUMN_FORGETTING_CURVE_VALUE); 259 while (!cursor.isAfterLast()) { 260 final String word1 = cursor.getString(word1Index); 261 final String word2 = cursor.getString(word2Index); 262 final int fc = cursor.getInt(fcIndex); 263 if (DBG_SAVE_RESTORE) { 264 Log.d(TAG, "--- Load user history: " + word1 + ", " + word2 + "," 265 + mLocale + "," + this); 266 } 267 // Safeguard against adding really long words. Stack may overflow due 268 // to recursive lookup 269 if (null == word1) { 270 super.addWord(word2, null /* shortcut */, fc); 271 } else if (word1.length() < BinaryDictionary.MAX_WORD_LENGTH 272 && word2.length() < BinaryDictionary.MAX_WORD_LENGTH) { 273 super.setBigramAndGetFrequency( 274 word1, word2, initializing ? new ForgettingCurveParams(true) 275 : new ForgettingCurveParams(fc, now, last)); 276 } 277 mBigramList.addBigram(word1, word2, (byte)fc); 278 cursor.moveToNext(); 279 } 280 } 281 } finally { 282 cursor.close(); 283 if (PROFILE_SAVE_RESTORE) { 284 final long diff = System.currentTimeMillis() - now; 285 Log.w(TAG, "PROF: Load User HistoryDictionary: " 286 + mLocale + ", " + diff + "ms."); 287 } 288 } 289 } 290 291 /** 292 * Query the database 293 */ 294 private static Cursor query(String selection, String[] selectionArgs) { 295 SQLiteQueryBuilder qb = new SQLiteQueryBuilder(); 296 297 // main INNER JOIN frequency ON (main._id=freq.pair_id) 298 qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON (" 299 + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "." 300 + FREQ_COLUMN_PAIR_ID +")"); 301 302 qb.setProjectionMap(sDictProjectionMap); 303 304 // Get the database and run the query 305 try { 306 SQLiteDatabase db = sOpenHelper.getReadableDatabase(); 307 Cursor c = qb.query(db, 308 new String[] { 309 MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, COLUMN_FORGETTING_CURVE_VALUE }, 310 selection, selectionArgs, null, null, null); 311 return c; 312 } catch (android.database.sqlite.SQLiteCantOpenDatabaseException e) { 313 // Can't open the database : presumably we can't access storage. That may happen 314 // when the device is wedged; do a best effort to still start the keyboard. 315 return null; 316 } 317 } 318 319 /** 320 * This class helps open, create, and upgrade the database file. 321 */ 322 private static class DatabaseHelper extends SQLiteOpenHelper { 323 324 DatabaseHelper(Context context) { 325 super(context, DATABASE_NAME, null, DATABASE_VERSION); 326 } 327 328 @Override 329 public void onCreate(SQLiteDatabase db) { 330 db.execSQL("PRAGMA foreign_keys = ON;"); 331 db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " (" 332 + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY," 333 + MAIN_COLUMN_WORD1 + " TEXT," 334 + MAIN_COLUMN_WORD2 + " TEXT," 335 + MAIN_COLUMN_LOCALE + " TEXT" 336 + ");"); 337 db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " (" 338 + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY," 339 + FREQ_COLUMN_PAIR_ID + " INTEGER," 340 + COLUMN_FORGETTING_CURVE_VALUE + " INTEGER," 341 + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME 342 + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE" 343 + ");"); 344 } 345 346 @Override 347 public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) { 348 Log.w(TAG, "Upgrading database from version " + oldVersion + " to " 349 + newVersion + ", which will destroy all old data"); 350 db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME); 351 db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME); 352 onCreate(db); 353 } 354 } 355 356 /** 357 * Async task to write pending words to the database so that it stays in sync with 358 * the in-memory trie. 359 */ 360 private static class UpdateDbTask extends AsyncTask<Void, Void, Void> { 361 private final UserHistoryDictionaryBigramList mBigramList; 362 private final DatabaseHelper mDbHelper; 363 private final String mLocale; 364 private final UserHistoryDictionary mUserHistoryDictionary; 365 private final SharedPreferences mPrefs; 366 367 public UpdateDbTask( 368 DatabaseHelper openHelper, UserHistoryDictionaryBigramList pendingWrites, 369 String locale, UserHistoryDictionary dict, SharedPreferences prefs) { 370 mBigramList = pendingWrites; 371 mLocale = locale; 372 mDbHelper = openHelper; 373 mUserHistoryDictionary = dict; 374 mPrefs = prefs; 375 } 376 377 /** Prune any old data if the database is getting too big. */ 378 private static void checkPruneData(SQLiteDatabase db) { 379 db.execSQL("PRAGMA foreign_keys = ON;"); 380 Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID }, 381 null, null, null, null, null); 382 try { 383 int totalRowCount = c.getCount(); 384 // prune out old data if we have too much data 385 if (totalRowCount > sMaxHistoryBigrams) { 386 int numDeleteRows = (totalRowCount - sMaxHistoryBigrams) 387 + sDeleteHistoryBigrams; 388 int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID); 389 c.moveToFirst(); 390 int count = 0; 391 while (count < numDeleteRows && !c.isAfterLast()) { 392 String pairId = c.getString(pairIdColumnId); 393 // Deleting from MAIN table will delete the frequencies 394 // due to FOREIGN KEY .. ON DELETE CASCADE 395 db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?", 396 new String[] { pairId }); 397 c.moveToNext(); 398 count++; 399 } 400 } 401 } finally { 402 c.close(); 403 } 404 } 405 406 @Override 407 protected Void doInBackground(Void... v) { 408 SQLiteDatabase db = null; 409 if (mUserHistoryDictionary.mBigramListLock.tryLock()) { 410 try { 411 try { 412 db = mDbHelper.getWritableDatabase(); 413 } catch (android.database.sqlite.SQLiteCantOpenDatabaseException e) { 414 // If we can't open the db, don't do anything. Exit through the next test 415 // for non-nullity of the db variable. 416 } 417 if (null == db) { 418 // Not much we can do. Just exit. 419 return null; 420 } 421 db.beginTransaction(); 422 return doLoadTaskLocked(db); 423 } finally { 424 if (db != null) { 425 db.endTransaction(); 426 } 427 mUserHistoryDictionary.mBigramListLock.unlock(); 428 } 429 } 430 return null; 431 } 432 433 private Void doLoadTaskLocked(SQLiteDatabase db) { 434 if (DBG_STRESS_TEST) { 435 try { 436 Log.w(TAG, "Start stress in closing: " + mLocale); 437 Thread.sleep(15000); 438 Log.w(TAG, "End stress in closing"); 439 } catch (InterruptedException e) { 440 } 441 } 442 final long now = PROFILE_SAVE_RESTORE ? System.currentTimeMillis() : 0; 443 int profTotal = 0; 444 int profInsert = 0; 445 int profDelete = 0; 446 db.execSQL("PRAGMA foreign_keys = ON;"); 447 final boolean addLevel0Bigram = mBigramList.size() <= sMaxHistoryBigrams; 448 449 // Write all the entries to the db 450 for (String word1 : mBigramList.keySet()) { 451 final HashMap<String, Byte> word1Bigrams = mBigramList.getBigrams(word1); 452 for (String word2 : word1Bigrams.keySet()) { 453 if (PROFILE_SAVE_RESTORE) { 454 ++profTotal; 455 } 456 // Get new frequency. Do not insert unigrams/bigrams which freq is "-1". 457 final int freq; // -1, or 0~255 458 if (word1 == null) { // unigram 459 freq = FREQUENCY_FOR_TYPED; 460 final byte prevFc = word1Bigrams.get(word2); 461 if (prevFc == FREQUENCY_FOR_TYPED) { 462 // No need to update since we found no changes for this entry. 463 // Just skip to the next entry. 464 if (DBG_SAVE_RESTORE) { 465 Log.d(TAG, "Skip update user history: " + word1 + "," + word2 466 + "," + prevFc); 467 } 468 if (!DBG_ALWAYS_WRITE) { 469 continue; 470 } 471 } 472 } else { // bigram 473 final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2); 474 if (nw != null) { 475 final ForgettingCurveParams fcp = nw.getFcParams(); 476 final byte prevFc = word1Bigrams.get(word2); 477 final byte fc = (byte)fcp.getFc(); 478 final boolean isValid = fcp.isValid(); 479 if (prevFc > 0 && prevFc == fc) { 480 // No need to update since we found no changes for this entry. 481 // Just skip to the next entry. 482 if (DBG_SAVE_RESTORE) { 483 Log.d(TAG, "Skip update user history: " + word1 + "," 484 + word2 + "," + prevFc); 485 } 486 if (!DBG_ALWAYS_WRITE) { 487 continue; 488 } else { 489 freq = fc; 490 } 491 } else if (UserHistoryForgettingCurveUtils. 492 needsToSave(fc, isValid, addLevel0Bigram)) { 493 freq = fc; 494 } else { 495 freq = -1; 496 } 497 } else { 498 freq = -1; 499 } 500 } 501 // TODO: this process of making a text search for each pair each time 502 // is terribly inefficient. Optimize this. 503 // Find pair id 504 Cursor c = null; 505 try { 506 if (null != word1) { 507 c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, 508 MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " 509 + MAIN_COLUMN_LOCALE + "=?", 510 new String[] { word1, word2, mLocale }, null, null, 511 null); 512 } else { 513 c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, 514 MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 515 + "=? AND " + MAIN_COLUMN_LOCALE + "=?", 516 new String[] { word2, mLocale }, null, null, null); 517 } 518 519 final int pairId; 520 if (c.moveToFirst()) { 521 if (PROFILE_SAVE_RESTORE) { 522 ++profDelete; 523 } 524 // Delete existing pair 525 pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); 526 db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", 527 new String[] { Integer.toString(pairId) }); 528 } else { 529 // Create new pair 530 Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, 531 getContentValues(word1, word2, mLocale)); 532 pairId = pairIdLong.intValue(); 533 } 534 if (freq > 0) { 535 if (PROFILE_SAVE_RESTORE) { 536 ++profInsert; 537 } 538 if (DBG_SAVE_RESTORE) { 539 Log.d(TAG, "--- Save user history: " + word1 + ", " + word2 540 + mLocale + "," + this); 541 } 542 // Insert new frequency 543 db.insert(FREQ_TABLE_NAME, null, 544 getFrequencyContentValues(pairId, freq)); 545 // Update an existing bigram entry in mBigramList too in order to 546 // synchronize the SQL DB and mBigramList. 547 mBigramList.updateBigram(word1, word2, (byte)freq); 548 } 549 } finally { 550 if (c != null) { 551 c.close(); 552 } 553 } 554 } 555 } 556 557 checkPruneData(db); 558 // Save the timestamp after we finish writing the SQL DB. 559 SettingsValues.setLastUserHistoryWriteTime(mPrefs, mLocale); 560 if (PROFILE_SAVE_RESTORE) { 561 final long diff = System.currentTimeMillis() - now; 562 Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", "+ diff 563 + "ms. Total: " + profTotal + ". Insert: " + profInsert + ". Delete: " 564 + profDelete); 565 } 566 db.setTransactionSuccessful(); 567 return null; 568 } 569 570 private static ContentValues getContentValues(String word1, String word2, String locale) { 571 ContentValues values = new ContentValues(3); 572 values.put(MAIN_COLUMN_WORD1, word1); 573 values.put(MAIN_COLUMN_WORD2, word2); 574 values.put(MAIN_COLUMN_LOCALE, locale); 575 return values; 576 } 577 578 private static ContentValues getFrequencyContentValues(int pairId, int frequency) { 579 ContentValues values = new ContentValues(2); 580 values.put(FREQ_COLUMN_PAIR_ID, pairId); 581 values.put(COLUMN_FORGETTING_CURVE_VALUE, frequency); 582 return values; 583 } 584 } 585 586 } 587