Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2010 Google Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import java.util.HashMap;
     20 import java.util.HashSet;
     21 import java.util.Iterator;
     22 
     23 import android.content.ContentValues;
     24 import android.content.Context;
     25 import android.database.Cursor;
     26 import android.database.sqlite.SQLiteDatabase;
     27 import android.database.sqlite.SQLiteOpenHelper;
     28 import android.database.sqlite.SQLiteQueryBuilder;
     29 import android.os.AsyncTask;
     30 import android.provider.BaseColumns;
     31 import android.util.Log;
     32 
     33 /**
     34  * Stores all the pairs user types in databases. Prune the database if the size
     35  * gets too big. Unlike AutoDictionary, it even stores the pairs that are already
     36  * in the dictionary.
     37  */
     38 public class UserBigramDictionary extends ExpandableDictionary {
     39     private static final String TAG = "UserBigramDictionary";
     40 
     41     /** Any pair being typed or picked */
     42     private static final int FREQUENCY_FOR_TYPED = 2;
     43 
     44     /** Maximum frequency for all pairs */
     45     private static final int FREQUENCY_MAX = 127;
     46 
     47     /**
     48      * If this pair is typed 6 times, it would be suggested.
     49      * Should be smaller than ContactsDictionary.FREQUENCY_FOR_CONTACTS_BIGRAM
     50      */
     51     protected static final int SUGGEST_THRESHOLD = 6 * FREQUENCY_FOR_TYPED;
     52 
     53     /** Maximum number of pairs. Pruning will start when databases goes above this number. */
     54     private static int sMaxUserBigrams = 10000;
     55 
     56     /**
     57      * When it hits maximum bigram pair, it will delete until you are left with
     58      * only (sMaxUserBigrams - sDeleteUserBigrams) pairs.
     59      * Do not keep this number small to avoid deleting too often.
     60      */
     61     private static int sDeleteUserBigrams = 1000;
     62 
     63     /**
     64      * Database version should increase if the database structure changes
     65      */
     66     private static final int DATABASE_VERSION = 1;
     67 
     68     private static final String DATABASE_NAME = "userbigram_dict.db";
     69 
     70     /** Name of the words table in the database */
     71     private static final String MAIN_TABLE_NAME = "main";
     72     // TODO: Consume less space by using a unique id for locale instead of the whole
     73     // 2-5 character string. (Same TODO from AutoDictionary)
     74     private static final String MAIN_COLUMN_ID = BaseColumns._ID;
     75     private static final String MAIN_COLUMN_WORD1 = "word1";
     76     private static final String MAIN_COLUMN_WORD2 = "word2";
     77     private static final String MAIN_COLUMN_LOCALE = "locale";
     78 
     79     /** Name of the frequency table in the database */
     80     private static final String FREQ_TABLE_NAME = "frequency";
     81     private static final String FREQ_COLUMN_ID = BaseColumns._ID;
     82     private static final String FREQ_COLUMN_PAIR_ID = "pair_id";
     83     private static final String FREQ_COLUMN_FREQUENCY = "freq";
     84 
     85     private final LatinIME mIme;
     86 
     87     /** Locale for which this auto dictionary is storing words */
     88     private String mLocale;
     89 
     90     private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>();
     91     private final Object mPendingWritesLock = new Object();
     92     private static volatile boolean sUpdatingDB = false;
     93 
     94     private final static HashMap<String, String> sDictProjectionMap;
     95 
     96     static {
     97         sDictProjectionMap = new HashMap<String, String>();
     98         sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID);
     99         sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1);
    100         sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2);
    101         sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE);
    102 
    103         sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID);
    104         sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID);
    105         sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY);
    106     }
    107 
    108     private static DatabaseHelper sOpenHelper = null;
    109 
    110     private static class Bigram {
    111         String word1;
    112         String word2;
    113         int frequency;
    114 
    115         Bigram(String word1, String word2, int frequency) {
    116             this.word1 = word1;
    117             this.word2 = word2;
    118             this.frequency = frequency;
    119         }
    120 
    121         @Override
    122         public boolean equals(Object bigram) {
    123             Bigram bigram2 = (Bigram) bigram;
    124             return (word1.equals(bigram2.word1) && word2.equals(bigram2.word2));
    125         }
    126 
    127         @Override
    128         public int hashCode() {
    129             return (word1 + " " + word2).hashCode();
    130         }
    131     }
    132 
    133     public void setDatabaseMax(int maxUserBigram) {
    134         sMaxUserBigrams = maxUserBigram;
    135     }
    136 
    137     public void setDatabaseDelete(int deleteUserBigram) {
    138         sDeleteUserBigrams = deleteUserBigram;
    139     }
    140 
    141     public UserBigramDictionary(Context context, LatinIME ime, String locale, int dicTypeId) {
    142         super(context, dicTypeId);
    143         mIme = ime;
    144         mLocale = locale;
    145         if (sOpenHelper == null) {
    146             sOpenHelper = new DatabaseHelper(getContext());
    147         }
    148         if (mLocale != null && mLocale.length() > 1) {
    149             loadDictionary();
    150         }
    151     }
    152 
    153     @Override
    154     public void close() {
    155         flushPendingWrites();
    156         // Don't close the database as locale changes will require it to be reopened anyway
    157         // Also, the database is written to somewhat frequently, so it needs to be kept alive
    158         // throughout the life of the process.
    159         // mOpenHelper.close();
    160         super.close();
    161     }
    162 
    163     /**
    164      * Pair will be added to the userbigram database.
    165      */
    166     public int addBigrams(String word1, String word2) {
    167         // remove caps
    168         if (mIme != null && mIme.getCurrentWord().isAutoCapitalized()) {
    169             word2 = Character.toLowerCase(word2.charAt(0)) + word2.substring(1);
    170         }
    171 
    172         int freq = super.addBigram(word1, word2, FREQUENCY_FOR_TYPED);
    173         if (freq > FREQUENCY_MAX) freq = FREQUENCY_MAX;
    174         synchronized (mPendingWritesLock) {
    175             if (freq == FREQUENCY_FOR_TYPED || mPendingWrites.isEmpty()) {
    176                 mPendingWrites.add(new Bigram(word1, word2, freq));
    177             } else {
    178                 Bigram bi = new Bigram(word1, word2, freq);
    179                 mPendingWrites.remove(bi);
    180                 mPendingWrites.add(bi);
    181             }
    182         }
    183 
    184         return freq;
    185     }
    186 
    187     /**
    188      * Schedules a background thread to write any pending words to the database.
    189      */
    190     public void flushPendingWrites() {
    191         synchronized (mPendingWritesLock) {
    192             // Nothing pending? Return
    193             if (mPendingWrites.isEmpty()) return;
    194             // Create a background thread to write the pending entries
    195             new UpdateDbTask(getContext(), sOpenHelper, mPendingWrites, mLocale).execute();
    196             // Create a new map for writing new entries into while the old one is written to db
    197             mPendingWrites = new HashSet<Bigram>();
    198         }
    199     }
    200 
    201     /** Used for testing purpose **/
    202     void waitUntilUpdateDBDone() {
    203         synchronized (mPendingWritesLock) {
    204             while (sUpdatingDB) {
    205                 try {
    206                     Thread.sleep(100);
    207                 } catch (InterruptedException e) {
    208                 }
    209             }
    210             return;
    211         }
    212     }
    213 
    214     @Override
    215     public void loadDictionaryAsync() {
    216         // Load the words that correspond to the current input locale
    217         Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale });
    218         try {
    219             if (cursor.moveToFirst()) {
    220                 int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1);
    221                 int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2);
    222                 int frequencyIndex = cursor.getColumnIndex(FREQ_COLUMN_FREQUENCY);
    223                 while (!cursor.isAfterLast()) {
    224                     String word1 = cursor.getString(word1Index);
    225                     String word2 = cursor.getString(word2Index);
    226                     int frequency = cursor.getInt(frequencyIndex);
    227                     // Safeguard against adding really long words. Stack may overflow due
    228                     // to recursive lookup
    229                     if (word1.length() < MAX_WORD_LENGTH && word2.length() < MAX_WORD_LENGTH) {
    230                         super.setBigram(word1, word2, frequency);
    231                     }
    232                     cursor.moveToNext();
    233                 }
    234             }
    235         } finally {
    236             cursor.close();
    237         }
    238     }
    239 
    240     /**
    241      * Query the database
    242      */
    243     private Cursor query(String selection, String[] selectionArgs) {
    244         SQLiteQueryBuilder qb = new SQLiteQueryBuilder();
    245 
    246         // main INNER JOIN frequency ON (main._id=freq.pair_id)
    247         qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON ("
    248                 + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "."
    249                 + FREQ_COLUMN_PAIR_ID +")");
    250 
    251         qb.setProjectionMap(sDictProjectionMap);
    252 
    253         // Get the database and run the query
    254         SQLiteDatabase db = sOpenHelper.getReadableDatabase();
    255         Cursor c = qb.query(db,
    256                 new String[] { MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, FREQ_COLUMN_FREQUENCY },
    257                 selection, selectionArgs, null, null, null);
    258         return c;
    259     }
    260 
    261     /**
    262      * This class helps open, create, and upgrade the database file.
    263      */
    264     private static class DatabaseHelper extends SQLiteOpenHelper {
    265 
    266         DatabaseHelper(Context context) {
    267             super(context, DATABASE_NAME, null, DATABASE_VERSION);
    268         }
    269 
    270         @Override
    271         public void onCreate(SQLiteDatabase db) {
    272             db.execSQL("PRAGMA foreign_keys = ON;");
    273             db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " ("
    274                     + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY,"
    275                     + MAIN_COLUMN_WORD1 + " TEXT,"
    276                     + MAIN_COLUMN_WORD2 + " TEXT,"
    277                     + MAIN_COLUMN_LOCALE + " TEXT"
    278                     + ");");
    279             db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " ("
    280                     + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY,"
    281                     + FREQ_COLUMN_PAIR_ID + " INTEGER,"
    282                     + FREQ_COLUMN_FREQUENCY + " INTEGER,"
    283                     + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME
    284                     + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE"
    285                     + ");");
    286         }
    287 
    288         @Override
    289         public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) {
    290             Log.w(TAG, "Upgrading database from version " + oldVersion + " to "
    291                     + newVersion + ", which will destroy all old data");
    292             db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME);
    293             db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME);
    294             onCreate(db);
    295         }
    296     }
    297 
    298     /**
    299      * Async task to write pending words to the database so that it stays in sync with
    300      * the in-memory trie.
    301      */
    302     private static class UpdateDbTask extends AsyncTask<Void, Void, Void> {
    303         private final HashSet<Bigram> mMap;
    304         private final DatabaseHelper mDbHelper;
    305         private final String mLocale;
    306 
    307         public UpdateDbTask(Context context, DatabaseHelper openHelper,
    308                 HashSet<Bigram> pendingWrites, String locale) {
    309             mMap = pendingWrites;
    310             mLocale = locale;
    311             mDbHelper = openHelper;
    312         }
    313 
    314         /** Prune any old data if the database is getting too big. */
    315         private void checkPruneData(SQLiteDatabase db) {
    316             db.execSQL("PRAGMA foreign_keys = ON;");
    317             Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID },
    318                     null, null, null, null, null);
    319             try {
    320                 int totalRowCount = c.getCount();
    321                 // prune out old data if we have too much data
    322                 if (totalRowCount > sMaxUserBigrams) {
    323                     int numDeleteRows = (totalRowCount - sMaxUserBigrams) + sDeleteUserBigrams;
    324                     int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID);
    325                     c.moveToFirst();
    326                     int count = 0;
    327                     while (count < numDeleteRows && !c.isAfterLast()) {
    328                         String pairId = c.getString(pairIdColumnId);
    329                         // Deleting from MAIN table will delete the frequencies
    330                         // due to FOREIGN KEY .. ON DELETE CASCADE
    331                         db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?",
    332                             new String[] { pairId });
    333                         c.moveToNext();
    334                         count++;
    335                     }
    336                 }
    337             } finally {
    338                 c.close();
    339             }
    340         }
    341 
    342         @Override
    343         protected void onPreExecute() {
    344             sUpdatingDB = true;
    345         }
    346 
    347         @Override
    348         protected Void doInBackground(Void... v) {
    349             SQLiteDatabase db = mDbHelper.getWritableDatabase();
    350             db.execSQL("PRAGMA foreign_keys = ON;");
    351             // Write all the entries to the db
    352             Iterator<Bigram> iterator = mMap.iterator();
    353             while (iterator.hasNext()) {
    354                 Bigram bi = iterator.next();
    355 
    356                 // find pair id
    357                 Cursor c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
    358                         MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
    359                         + MAIN_COLUMN_LOCALE + "=?",
    360                         new String[] { bi.word1, bi.word2, mLocale }, null, null, null);
    361 
    362                 int pairId;
    363                 if (c.moveToFirst()) {
    364                     // existing pair
    365                     pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID));
    366                     db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?",
    367                             new String[] { Integer.toString(pairId) });
    368                 } else {
    369                     // new pair
    370                     Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
    371                             getContentValues(bi.word1, bi.word2, mLocale));
    372                     pairId = pairIdLong.intValue();
    373                 }
    374                 c.close();
    375 
    376                 // insert new frequency
    377                 db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, bi.frequency));
    378             }
    379             checkPruneData(db);
    380             sUpdatingDB = false;
    381 
    382             return null;
    383         }
    384 
    385         private ContentValues getContentValues(String word1, String word2, String locale) {
    386             ContentValues values = new ContentValues(3);
    387             values.put(MAIN_COLUMN_WORD1, word1);
    388             values.put(MAIN_COLUMN_WORD2, word2);
    389             values.put(MAIN_COLUMN_LOCALE, locale);
    390             return values;
    391         }
    392 
    393         private ContentValues getFrequencyContentValues(int pairId, int frequency) {
    394            ContentValues values = new ContentValues(2);
    395            values.put(FREQ_COLUMN_PAIR_ID, pairId);
    396            values.put(FREQ_COLUMN_FREQUENCY, frequency);
    397            return values;
    398         }
    399     }
    400 
    401 }
    402