Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.utils;
     18 
     19 import android.util.Log;
     20 
     21 import com.android.inputmethod.annotations.UsedForTesting;
     22 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
     23 import com.android.inputmethod.latin.makedict.DictDecoder;
     24 import com.android.inputmethod.latin.makedict.DictEncoder;
     25 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     26 import com.android.inputmethod.latin.makedict.FusionDictionary;
     27 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
     28 import com.android.inputmethod.latin.makedict.PendingAttribute;
     29 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
     30 import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
     31 
     32 import java.io.IOException;
     33 import java.util.ArrayList;
     34 import java.util.HashMap;
     35 import java.util.Map.Entry;
     36 import java.util.TreeMap;
     37 import java.util.concurrent.TimeUnit;
     38 
     39 /**
     40  * Reads and writes Binary files for a UserHistoryDictionary.
     41  *
     42  * All the methods in this class are static.
     43  */
     44 public final class UserHistoryDictIOUtils {
     45     private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
     46     private static final boolean DEBUG = false;
     47     private static final String USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
     48     private static final String USES_FORGETTING_CURVE_VALUE = "1";
     49     private static final String LAST_UPDATED_TIME_KEY = "date";
     50 
     51     public interface OnAddWordListener {
     52         /**
     53          * Callback to be notified when a word is added to the dictionary.
     54          * @param word The added word.
     55          * @param shortcutTarget A shortcut target for this word, or null if none.
     56          * @param frequency The frequency for this word.
     57          * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
     58          *   Unspecified if shortcutTarget is null - do not rely on its value.
     59          */
     60         public void setUnigram(final String word, final String shortcutTarget, final int frequency,
     61                 final int shortcutFreq);
     62         public void setBigram(final String word1, final String word2, final int frequency);
     63     }
     64 
     65     @UsedForTesting
     66     public interface BigramDictionaryInterface {
     67         public int getFrequency(final String word1, final String word2);
     68     }
     69 
     70     /**
     71      * Writes dictionary to file.
     72      */
     73     public static void writeDictionary(final DictEncoder dictEncoder,
     74             final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
     75             final FormatOptions formatOptions) {
     76         final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
     77         fusionDict.addOptionAttribute(USES_FORGETTING_CURVE_KEY, USES_FORGETTING_CURVE_VALUE);
     78         fusionDict.addOptionAttribute(LAST_UPDATED_TIME_KEY,
     79                 String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
     80         try {
     81             dictEncoder.writeDictionary(fusionDict, formatOptions);
     82             Log.d(TAG, "end writing");
     83         } catch (IOException e) {
     84             Log.e(TAG, "IO exception while writing file", e);
     85         } catch (UnsupportedFormatException e) {
     86             Log.e(TAG, "Unsupported format", e);
     87         }
     88     }
     89 
     90     /**
     91      * Constructs a new FusionDictionary from BigramDictionaryInterface.
     92      */
     93     @UsedForTesting
     94     static FusionDictionary constructFusionDictionary(
     95             final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
     96         final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
     97                 new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
     98                         false));
     99         int profTotal = 0;
    100         for (final String word1 : bigrams.keySet()) {
    101             final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
    102             for (final String word2 : word1Bigrams.keySet()) {
    103                 final int freq = dict.getFrequency(word1, word2);
    104                 if (freq == -1) {
    105                     // don't add this bigram.
    106                     continue;
    107                 }
    108                 if (DEBUG) {
    109                     if (word1 == null) {
    110                         Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
    111                     } else {
    112                         Log.d(TAG, "add bigram: " + word1
    113                                 + "," + word2 + "," + Integer.toString(freq));
    114                     }
    115                     profTotal++;
    116                 }
    117                 if (word1 == null) { // unigram
    118                     fusionDict.add(word2, freq, null, false /* isNotAWord */);
    119                 } else { // bigram
    120                     if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
    121                         fusionDict.add(word1, 2, null, false /* isNotAWord */);
    122                     }
    123                     fusionDict.setBigram(word1, word2, freq);
    124                 }
    125                 bigrams.updateBigram(word1, word2, (byte)freq);
    126             }
    127         }
    128         if (DEBUG) {
    129             Log.d(TAG, "add " + profTotal + "words");
    130         }
    131         return fusionDict;
    132     }
    133 
    134     /**
    135      * Reads dictionary from file.
    136      */
    137     public static void readDictionaryBinary(final DictDecoder dictDecoder,
    138             final OnAddWordListener dict) {
    139         final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap();
    140         final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
    141         final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
    142         try {
    143             dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams);
    144         } catch (IOException e) {
    145             Log.e(TAG, "IO exception while reading file", e);
    146         } catch (UnsupportedFormatException e) {
    147             Log.e(TAG, "Unsupported format", e);
    148         } catch (ArrayIndexOutOfBoundsException e) {
    149             Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e);
    150         }
    151         addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
    152     }
    153 
    154     /**
    155      * Adds all unigrams and bigrams in maps to OnAddWordListener.
    156      */
    157     @UsedForTesting
    158     static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams,
    159             final TreeMap<Integer, Integer> frequencies,
    160             final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams,
    161             final OnAddWordListener to) {
    162         for (Entry<Integer, String> entry : unigrams.entrySet()) {
    163             final String word1 = entry.getValue();
    164             final int unigramFrequency = frequencies.get(entry.getKey());
    165             to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */);
    166             final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
    167             if (attrList != null) {
    168                 for (final PendingAttribute attr : attrList) {
    169                     final String word2 = unigrams.get(attr.mAddress);
    170                     if (word1 == null || word2 == null) {
    171                         Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2);
    172                         continue;
    173                     }
    174                     to.setBigram(word1, word2,
    175                             BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
    176                                     attr.mFrequency));
    177                 }
    178             }
    179         }
    180 
    181     }
    182 }
    183