1 /* 2 * Copyright (C) 2010 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import java.util.HashMap; 20 import java.util.HashSet; 21 import java.util.Iterator; 22 23 import android.content.ContentValues; 24 import android.content.Context; 25 import android.database.Cursor; 26 import android.database.sqlite.SQLiteDatabase; 27 import android.database.sqlite.SQLiteOpenHelper; 28 import android.database.sqlite.SQLiteQueryBuilder; 29 import android.os.AsyncTask; 30 import android.provider.BaseColumns; 31 import android.util.Log; 32 33 /** 34 * Stores all the pairs user types in databases. Prune the database if the size 35 * gets too big. Unlike AutoDictionary, it even stores the pairs that are already 36 * in the dictionary. 37 */ 38 public class UserBigramDictionary extends ExpandableDictionary { 39 private static final String TAG = "UserBigramDictionary"; 40 41 /** Any pair being typed or picked */ 42 private static final int FREQUENCY_FOR_TYPED = 2; 43 44 /** Maximum frequency for all pairs */ 45 private static final int FREQUENCY_MAX = 127; 46 47 /** 48 * If this pair is typed 6 times, it would be suggested. 49 * Should be smaller than ContactsDictionary.FREQUENCY_FOR_CONTACTS_BIGRAM 50 */ 51 protected static final int SUGGEST_THRESHOLD = 6 * FREQUENCY_FOR_TYPED; 52 53 /** Maximum number of pairs. Pruning will start when databases goes above this number. */ 54 private static int sMaxUserBigrams = 10000; 55 56 /** 57 * When it hits maximum bigram pair, it will delete until you are left with 58 * only (sMaxUserBigrams - sDeleteUserBigrams) pairs. 59 * Do not keep this number small to avoid deleting too often. 60 */ 61 private static int sDeleteUserBigrams = 1000; 62 63 /** 64 * Database version should increase if the database structure changes 65 */ 66 private static final int DATABASE_VERSION = 1; 67 68 private static final String DATABASE_NAME = "userbigram_dict.db"; 69 70 /** Name of the words table in the database */ 71 private static final String MAIN_TABLE_NAME = "main"; 72 // TODO: Consume less space by using a unique id for locale instead of the whole 73 // 2-5 character string. (Same TODO from AutoDictionary) 74 private static final String MAIN_COLUMN_ID = BaseColumns._ID; 75 private static final String MAIN_COLUMN_WORD1 = "word1"; 76 private static final String MAIN_COLUMN_WORD2 = "word2"; 77 private static final String MAIN_COLUMN_LOCALE = "locale"; 78 79 /** Name of the frequency table in the database */ 80 private static final String FREQ_TABLE_NAME = "frequency"; 81 private static final String FREQ_COLUMN_ID = BaseColumns._ID; 82 private static final String FREQ_COLUMN_PAIR_ID = "pair_id"; 83 private static final String FREQ_COLUMN_FREQUENCY = "freq"; 84 85 private final LatinIME mIme; 86 87 /** Locale for which this auto dictionary is storing words */ 88 private String mLocale; 89 90 private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>(); 91 private final Object mPendingWritesLock = new Object(); 92 private static volatile boolean sUpdatingDB = false; 93 94 private final static HashMap<String, String> sDictProjectionMap; 95 96 static { 97 sDictProjectionMap = new HashMap<String, String>(); 98 sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID); 99 sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1); 100 sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2); 101 sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE); 102 103 sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID); 104 sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID); 105 sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY); 106 } 107 108 private static DatabaseHelper sOpenHelper = null; 109 110 private static class Bigram { 111 String word1; 112 String word2; 113 int frequency; 114 115 Bigram(String word1, String word2, int frequency) { 116 this.word1 = word1; 117 this.word2 = word2; 118 this.frequency = frequency; 119 } 120 121 @Override 122 public boolean equals(Object bigram) { 123 Bigram bigram2 = (Bigram) bigram; 124 return (word1.equals(bigram2.word1) && word2.equals(bigram2.word2)); 125 } 126 127 @Override 128 public int hashCode() { 129 return (word1 + " " + word2).hashCode(); 130 } 131 } 132 133 public void setDatabaseMax(int maxUserBigram) { 134 sMaxUserBigrams = maxUserBigram; 135 } 136 137 public void setDatabaseDelete(int deleteUserBigram) { 138 sDeleteUserBigrams = deleteUserBigram; 139 } 140 141 public UserBigramDictionary(Context context, LatinIME ime, String locale, int dicTypeId) { 142 super(context, dicTypeId); 143 mIme = ime; 144 mLocale = locale; 145 if (sOpenHelper == null) { 146 sOpenHelper = new DatabaseHelper(getContext()); 147 } 148 if (mLocale != null && mLocale.length() > 1) { 149 loadDictionary(); 150 } 151 } 152 153 @Override 154 public void close() { 155 flushPendingWrites(); 156 // Don't close the database as locale changes will require it to be reopened anyway 157 // Also, the database is written to somewhat frequently, so it needs to be kept alive 158 // throughout the life of the process. 159 // mOpenHelper.close(); 160 super.close(); 161 } 162 163 /** 164 * Pair will be added to the userbigram database. 165 */ 166 public int addBigrams(String word1, String word2) { 167 // remove caps 168 if (mIme != null && mIme.getCurrentWord().isAutoCapitalized()) { 169 word2 = Character.toLowerCase(word2.charAt(0)) + word2.substring(1); 170 } 171 172 int freq = super.addBigram(word1, word2, FREQUENCY_FOR_TYPED); 173 if (freq > FREQUENCY_MAX) freq = FREQUENCY_MAX; 174 synchronized (mPendingWritesLock) { 175 if (freq == FREQUENCY_FOR_TYPED || mPendingWrites.isEmpty()) { 176 mPendingWrites.add(new Bigram(word1, word2, freq)); 177 } else { 178 Bigram bi = new Bigram(word1, word2, freq); 179 mPendingWrites.remove(bi); 180 mPendingWrites.add(bi); 181 } 182 } 183 184 return freq; 185 } 186 187 /** 188 * Schedules a background thread to write any pending words to the database. 189 */ 190 public void flushPendingWrites() { 191 synchronized (mPendingWritesLock) { 192 // Nothing pending? Return 193 if (mPendingWrites.isEmpty()) return; 194 // Create a background thread to write the pending entries 195 new UpdateDbTask(getContext(), sOpenHelper, mPendingWrites, mLocale).execute(); 196 // Create a new map for writing new entries into while the old one is written to db 197 mPendingWrites = new HashSet<Bigram>(); 198 } 199 } 200 201 /** Used for testing purpose **/ 202 void waitUntilUpdateDBDone() { 203 synchronized (mPendingWritesLock) { 204 while (sUpdatingDB) { 205 try { 206 Thread.sleep(100); 207 } catch (InterruptedException e) { 208 } 209 } 210 return; 211 } 212 } 213 214 @Override 215 public void loadDictionaryAsync() { 216 // Load the words that correspond to the current input locale 217 Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale }); 218 try { 219 if (cursor.moveToFirst()) { 220 int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1); 221 int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2); 222 int frequencyIndex = cursor.getColumnIndex(FREQ_COLUMN_FREQUENCY); 223 while (!cursor.isAfterLast()) { 224 String word1 = cursor.getString(word1Index); 225 String word2 = cursor.getString(word2Index); 226 int frequency = cursor.getInt(frequencyIndex); 227 // Safeguard against adding really long words. Stack may overflow due 228 // to recursive lookup 229 if (word1.length() < MAX_WORD_LENGTH && word2.length() < MAX_WORD_LENGTH) { 230 super.setBigram(word1, word2, frequency); 231 } 232 cursor.moveToNext(); 233 } 234 } 235 } finally { 236 cursor.close(); 237 } 238 } 239 240 /** 241 * Query the database 242 */ 243 private Cursor query(String selection, String[] selectionArgs) { 244 SQLiteQueryBuilder qb = new SQLiteQueryBuilder(); 245 246 // main INNER JOIN frequency ON (main._id=freq.pair_id) 247 qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON (" 248 + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "." 249 + FREQ_COLUMN_PAIR_ID +")"); 250 251 qb.setProjectionMap(sDictProjectionMap); 252 253 // Get the database and run the query 254 SQLiteDatabase db = sOpenHelper.getReadableDatabase(); 255 Cursor c = qb.query(db, 256 new String[] { MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, FREQ_COLUMN_FREQUENCY }, 257 selection, selectionArgs, null, null, null); 258 return c; 259 } 260 261 /** 262 * This class helps open, create, and upgrade the database file. 263 */ 264 private static class DatabaseHelper extends SQLiteOpenHelper { 265 266 DatabaseHelper(Context context) { 267 super(context, DATABASE_NAME, null, DATABASE_VERSION); 268 } 269 270 @Override 271 public void onCreate(SQLiteDatabase db) { 272 db.execSQL("PRAGMA foreign_keys = ON;"); 273 db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " (" 274 + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY," 275 + MAIN_COLUMN_WORD1 + " TEXT," 276 + MAIN_COLUMN_WORD2 + " TEXT," 277 + MAIN_COLUMN_LOCALE + " TEXT" 278 + ");"); 279 db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " (" 280 + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY," 281 + FREQ_COLUMN_PAIR_ID + " INTEGER," 282 + FREQ_COLUMN_FREQUENCY + " INTEGER," 283 + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME 284 + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE" 285 + ");"); 286 } 287 288 @Override 289 public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) { 290 Log.w(TAG, "Upgrading database from version " + oldVersion + " to " 291 + newVersion + ", which will destroy all old data"); 292 db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME); 293 db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME); 294 onCreate(db); 295 } 296 } 297 298 /** 299 * Async task to write pending words to the database so that it stays in sync with 300 * the in-memory trie. 301 */ 302 private static class UpdateDbTask extends AsyncTask<Void, Void, Void> { 303 private final HashSet<Bigram> mMap; 304 private final DatabaseHelper mDbHelper; 305 private final String mLocale; 306 307 public UpdateDbTask(Context context, DatabaseHelper openHelper, 308 HashSet<Bigram> pendingWrites, String locale) { 309 mMap = pendingWrites; 310 mLocale = locale; 311 mDbHelper = openHelper; 312 } 313 314 /** Prune any old data if the database is getting too big. */ 315 private void checkPruneData(SQLiteDatabase db) { 316 db.execSQL("PRAGMA foreign_keys = ON;"); 317 Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID }, 318 null, null, null, null, null); 319 try { 320 int totalRowCount = c.getCount(); 321 // prune out old data if we have too much data 322 if (totalRowCount > sMaxUserBigrams) { 323 int numDeleteRows = (totalRowCount - sMaxUserBigrams) + sDeleteUserBigrams; 324 int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID); 325 c.moveToFirst(); 326 int count = 0; 327 while (count < numDeleteRows && !c.isAfterLast()) { 328 String pairId = c.getString(pairIdColumnId); 329 // Deleting from MAIN table will delete the frequencies 330 // due to FOREIGN KEY .. ON DELETE CASCADE 331 db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?", 332 new String[] { pairId }); 333 c.moveToNext(); 334 count++; 335 } 336 } 337 } finally { 338 c.close(); 339 } 340 } 341 342 @Override 343 protected void onPreExecute() { 344 sUpdatingDB = true; 345 } 346 347 @Override 348 protected Void doInBackground(Void... v) { 349 SQLiteDatabase db = mDbHelper.getWritableDatabase(); 350 db.execSQL("PRAGMA foreign_keys = ON;"); 351 // Write all the entries to the db 352 Iterator<Bigram> iterator = mMap.iterator(); 353 while (iterator.hasNext()) { 354 Bigram bi = iterator.next(); 355 356 // find pair id 357 Cursor c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, 358 MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " 359 + MAIN_COLUMN_LOCALE + "=?", 360 new String[] { bi.word1, bi.word2, mLocale }, null, null, null); 361 362 int pairId; 363 if (c.moveToFirst()) { 364 // existing pair 365 pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); 366 db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", 367 new String[] { Integer.toString(pairId) }); 368 } else { 369 // new pair 370 Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, 371 getContentValues(bi.word1, bi.word2, mLocale)); 372 pairId = pairIdLong.intValue(); 373 } 374 c.close(); 375 376 // insert new frequency 377 db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, bi.frequency)); 378 } 379 checkPruneData(db); 380 sUpdatingDB = false; 381 382 return null; 383 } 384 385 private ContentValues getContentValues(String word1, String word2, String locale) { 386 ContentValues values = new ContentValues(3); 387 values.put(MAIN_COLUMN_WORD1, word1); 388 values.put(MAIN_COLUMN_WORD2, word2); 389 values.put(MAIN_COLUMN_LOCALE, locale); 390 return values; 391 } 392 393 private ContentValues getFrequencyContentValues(int pairId, int frequency) { 394 ContentValues values = new ContentValues(2); 395 values.put(FREQ_COLUMN_PAIR_ID, pairId); 396 values.put(FREQ_COLUMN_FREQUENCY, frequency); 397 return values; 398 } 399 } 400 401 } 402