1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.utils; 18 19 import android.util.Log; 20 21 import com.android.inputmethod.annotations.UsedForTesting; 22 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; 23 import com.android.inputmethod.latin.makedict.DictDecoder; 24 import com.android.inputmethod.latin.makedict.DictEncoder; 25 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 26 import com.android.inputmethod.latin.makedict.FusionDictionary; 27 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 28 import com.android.inputmethod.latin.makedict.PendingAttribute; 29 import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 30 import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList; 31 32 import java.io.IOException; 33 import java.util.ArrayList; 34 import java.util.HashMap; 35 import java.util.Map.Entry; 36 import java.util.TreeMap; 37 import java.util.concurrent.TimeUnit; 38 39 /** 40 * Reads and writes Binary files for a UserHistoryDictionary. 41 * 42 * All the methods in this class are static. 43 */ 44 public final class UserHistoryDictIOUtils { 45 private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName(); 46 private static final boolean DEBUG = false; 47 private static final String USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; 48 private static final String USES_FORGETTING_CURVE_VALUE = "1"; 49 private static final String LAST_UPDATED_TIME_KEY = "date"; 50 51 public interface OnAddWordListener { 52 /** 53 * Callback to be notified when a word is added to the dictionary. 54 * @param word The added word. 55 * @param shortcutTarget A shortcut target for this word, or null if none. 56 * @param frequency The frequency for this word. 57 * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). 58 * Unspecified if shortcutTarget is null - do not rely on its value. 59 */ 60 public void setUnigram(final String word, final String shortcutTarget, final int frequency, 61 final int shortcutFreq); 62 public void setBigram(final String word1, final String word2, final int frequency); 63 } 64 65 @UsedForTesting 66 public interface BigramDictionaryInterface { 67 public int getFrequency(final String word1, final String word2); 68 } 69 70 /** 71 * Writes dictionary to file. 72 */ 73 public static void writeDictionary(final DictEncoder dictEncoder, 74 final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, 75 final FormatOptions formatOptions) { 76 final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams); 77 fusionDict.addOptionAttribute(USES_FORGETTING_CURVE_KEY, USES_FORGETTING_CURVE_VALUE); 78 fusionDict.addOptionAttribute(LAST_UPDATED_TIME_KEY, 79 String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); 80 try { 81 dictEncoder.writeDictionary(fusionDict, formatOptions); 82 Log.d(TAG, "end writing"); 83 } catch (IOException e) { 84 Log.e(TAG, "IO exception while writing file", e); 85 } catch (UnsupportedFormatException e) { 86 Log.e(TAG, "Unsupported format", e); 87 } 88 } 89 90 /** 91 * Constructs a new FusionDictionary from BigramDictionaryInterface. 92 */ 93 @UsedForTesting 94 static FusionDictionary constructFusionDictionary( 95 final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) { 96 final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(), 97 new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false, 98 false)); 99 int profTotal = 0; 100 for (final String word1 : bigrams.keySet()) { 101 final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1); 102 for (final String word2 : word1Bigrams.keySet()) { 103 final int freq = dict.getFrequency(word1, word2); 104 if (freq == -1) { 105 // don't add this bigram. 106 continue; 107 } 108 if (DEBUG) { 109 if (word1 == null) { 110 Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq)); 111 } else { 112 Log.d(TAG, "add bigram: " + word1 113 + "," + word2 + "," + Integer.toString(freq)); 114 } 115 profTotal++; 116 } 117 if (word1 == null) { // unigram 118 fusionDict.add(word2, freq, null, false /* isNotAWord */); 119 } else { // bigram 120 if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) { 121 fusionDict.add(word1, 2, null, false /* isNotAWord */); 122 } 123 fusionDict.setBigram(word1, word2, freq); 124 } 125 bigrams.updateBigram(word1, word2, (byte)freq); 126 } 127 } 128 if (DEBUG) { 129 Log.d(TAG, "add " + profTotal + "words"); 130 } 131 return fusionDict; 132 } 133 134 /** 135 * Reads dictionary from file. 136 */ 137 public static void readDictionaryBinary(final DictDecoder dictDecoder, 138 final OnAddWordListener dict) { 139 final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap(); 140 final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap(); 141 final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap(); 142 try { 143 dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams); 144 } catch (IOException e) { 145 Log.e(TAG, "IO exception while reading file", e); 146 } catch (UnsupportedFormatException e) { 147 Log.e(TAG, "Unsupported format", e); 148 } catch (ArrayIndexOutOfBoundsException e) { 149 Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e); 150 } 151 addWordsFromWordMap(unigrams, frequencies, bigrams, dict); 152 } 153 154 /** 155 * Adds all unigrams and bigrams in maps to OnAddWordListener. 156 */ 157 @UsedForTesting 158 static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams, 159 final TreeMap<Integer, Integer> frequencies, 160 final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams, 161 final OnAddWordListener to) { 162 for (Entry<Integer, String> entry : unigrams.entrySet()) { 163 final String word1 = entry.getValue(); 164 final int unigramFrequency = frequencies.get(entry.getKey()); 165 to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */); 166 final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey()); 167 if (attrList != null) { 168 for (final PendingAttribute attr : attrList) { 169 final String word2 = unigrams.get(attr.mAddress); 170 if (word1 == null || word2 == null) { 171 Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2); 172 continue; 173 } 174 to.setBigram(word1, word2, 175 BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency, 176 attr.mFrequency)); 177 } 178 } 179 } 180 181 } 182 } 183